2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
35 * Ulises Alonso : Frame number limit removal and
36 * packet_set_ring memory leak.
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
40 * byte arrays at the end of sockaddr_ll
42 * Johann Baudy : Added TX RING.
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
51 #include <linux/types.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <linux/slab.h>
64 #include <linux/vmalloc.h>
65 #include <net/net_namespace.h>
67 #include <net/protocol.h>
68 #include <linux/skbuff.h>
70 #include <linux/errno.h>
71 #include <linux/timer.h>
72 #include <asm/system.h>
73 #include <asm/uaccess.h>
74 #include <asm/ioctls.h>
76 #include <asm/cacheflush.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 #include <linux/poll.h>
81 #include <linux/module.h>
82 #include <linux/init.h>
83 #include <linux/mutex.h>
84 #include <linux/if_vlan.h>
85 #include <linux/virtio_net.h>
86 #include <linux/errqueue.h>
87 #include <linux/net_tstamp.h>
90 #include <net/inet_common.h>
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
107 Incoming, dev->hard_header!=NULL
108 mac_header -> ll header
111 Outgoing, dev->hard_header!=NULL
112 mac_header -> ll header
115 Incoming, dev->hard_header==NULL
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
118 assymetry between rx and tx paths.
121 Outgoing, dev->hard_header==NULL
122 mac_header -> data. ll header is still not built!
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
132 dev->hard_header != NULL
133 mac_header -> ll header
136 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
144 /* Private packet socket structures. */
146 struct packet_mclist
{
147 struct packet_mclist
*next
;
152 unsigned char addr
[MAX_ADDR_LEN
];
154 /* identical to struct packet_mreq except it has
155 * a longer address field.
157 struct packet_mreq_max
{
159 unsigned short mr_type
;
160 unsigned short mr_alen
;
161 unsigned char mr_address
[MAX_ADDR_LEN
];
164 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
165 int closing
, int tx_ring
);
171 struct packet_ring_buffer
{
174 unsigned int frames_per_block
;
175 unsigned int frame_size
;
176 unsigned int frame_max
;
178 unsigned int pg_vec_order
;
179 unsigned int pg_vec_pages
;
180 unsigned int pg_vec_len
;
186 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
);
188 static void packet_flush_mclist(struct sock
*sk
);
191 /* struct sock has to be the first member of packet_sock */
193 struct tpacket_stats stats
;
194 struct packet_ring_buffer rx_ring
;
195 struct packet_ring_buffer tx_ring
;
197 spinlock_t bind_lock
;
198 struct mutex pg_vec_lock
;
199 unsigned int running
:1, /* prot_hook is attached*/
203 int ifindex
; /* bound device */
205 struct packet_mclist
*mclist
;
207 enum tpacket_versions tp_version
;
208 unsigned int tp_hdrlen
;
209 unsigned int tp_reserve
;
210 unsigned int tp_loss
:1;
211 unsigned int tp_tstamp
;
212 struct packet_type prot_hook ____cacheline_aligned_in_smp
;
215 struct packet_skb_cb
{
216 unsigned int origlen
;
218 struct sockaddr_pkt pkt
;
219 struct sockaddr_ll ll
;
223 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
225 static inline __pure
struct page
*pgv_to_page(void *addr
)
227 if (is_vmalloc_addr(addr
))
228 return vmalloc_to_page(addr
);
229 return virt_to_page(addr
);
232 static void __packet_set_status(struct packet_sock
*po
, void *frame
, int status
)
235 struct tpacket_hdr
*h1
;
236 struct tpacket2_hdr
*h2
;
241 switch (po
->tp_version
) {
243 h
.h1
->tp_status
= status
;
244 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
247 h
.h2
->tp_status
= status
;
248 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
251 pr_err("TPACKET version not supported\n");
258 static int __packet_get_status(struct packet_sock
*po
, void *frame
)
261 struct tpacket_hdr
*h1
;
262 struct tpacket2_hdr
*h2
;
269 switch (po
->tp_version
) {
271 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
272 return h
.h1
->tp_status
;
274 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
275 return h
.h2
->tp_status
;
277 pr_err("TPACKET version not supported\n");
283 static void *packet_lookup_frame(struct packet_sock
*po
,
284 struct packet_ring_buffer
*rb
,
285 unsigned int position
,
288 unsigned int pg_vec_pos
, frame_offset
;
290 struct tpacket_hdr
*h1
;
291 struct tpacket2_hdr
*h2
;
295 pg_vec_pos
= position
/ rb
->frames_per_block
;
296 frame_offset
= position
% rb
->frames_per_block
;
298 h
.raw
= rb
->pg_vec
[pg_vec_pos
].buffer
+
299 (frame_offset
* rb
->frame_size
);
301 if (status
!= __packet_get_status(po
, h
.raw
))
307 static inline void *packet_current_frame(struct packet_sock
*po
,
308 struct packet_ring_buffer
*rb
,
311 return packet_lookup_frame(po
, rb
, rb
->head
, status
);
314 static inline void *packet_previous_frame(struct packet_sock
*po
,
315 struct packet_ring_buffer
*rb
,
318 unsigned int previous
= rb
->head
? rb
->head
- 1 : rb
->frame_max
;
319 return packet_lookup_frame(po
, rb
, previous
, status
);
322 static inline void packet_increment_head(struct packet_ring_buffer
*buff
)
324 buff
->head
= buff
->head
!= buff
->frame_max
? buff
->head
+1 : 0;
327 static inline struct packet_sock
*pkt_sk(struct sock
*sk
)
329 return (struct packet_sock
*)sk
;
332 static void packet_sock_destruct(struct sock
*sk
)
334 skb_queue_purge(&sk
->sk_error_queue
);
336 WARN_ON(atomic_read(&sk
->sk_rmem_alloc
));
337 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
339 if (!sock_flag(sk
, SOCK_DEAD
)) {
340 pr_err("Attempt to release alive packet socket: %p\n", sk
);
344 sk_refcnt_debug_dec(sk
);
348 static const struct proto_ops packet_ops
;
350 static const struct proto_ops packet_ops_spkt
;
352 static int packet_rcv_spkt(struct sk_buff
*skb
, struct net_device
*dev
,
353 struct packet_type
*pt
, struct net_device
*orig_dev
)
356 struct sockaddr_pkt
*spkt
;
359 * When we registered the protocol we saved the socket in the data
360 * field for just this event.
363 sk
= pt
->af_packet_priv
;
366 * Yank back the headers [hope the device set this
367 * right or kerboom...]
369 * Incoming packets have ll header pulled,
372 * For outgoing ones skb->data == skb_mac_header(skb)
373 * so that this procedure is noop.
376 if (skb
->pkt_type
== PACKET_LOOPBACK
)
379 if (!net_eq(dev_net(dev
), sock_net(sk
)))
382 skb
= skb_share_check(skb
, GFP_ATOMIC
);
386 /* drop any routing info */
389 /* drop conntrack reference */
392 spkt
= &PACKET_SKB_CB(skb
)->sa
.pkt
;
394 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
397 * The SOCK_PACKET socket receives _all_ frames.
400 spkt
->spkt_family
= dev
->type
;
401 strlcpy(spkt
->spkt_device
, dev
->name
, sizeof(spkt
->spkt_device
));
402 spkt
->spkt_protocol
= skb
->protocol
;
405 * Charge the memory to the socket. This is done specifically
406 * to prevent sockets using all the memory up.
409 if (sock_queue_rcv_skb(sk
, skb
) == 0)
420 * Output a raw packet to a device layer. This bypasses all the other
421 * protocol layers and you must therefore supply it with a complete frame
424 static int packet_sendmsg_spkt(struct kiocb
*iocb
, struct socket
*sock
,
425 struct msghdr
*msg
, size_t len
)
427 struct sock
*sk
= sock
->sk
;
428 struct sockaddr_pkt
*saddr
= (struct sockaddr_pkt
*)msg
->msg_name
;
429 struct sk_buff
*skb
= NULL
;
430 struct net_device
*dev
;
435 * Get and verify the address.
439 if (msg
->msg_namelen
< sizeof(struct sockaddr
))
441 if (msg
->msg_namelen
== sizeof(struct sockaddr_pkt
))
442 proto
= saddr
->spkt_protocol
;
444 return -ENOTCONN
; /* SOCK_PACKET must be sent giving an address */
447 * Find the device first to size check it
450 saddr
->spkt_device
[13] = 0;
453 dev
= dev_get_by_name_rcu(sock_net(sk
), saddr
->spkt_device
);
459 if (!(dev
->flags
& IFF_UP
))
463 * You may not queue a frame bigger than the mtu. This is the lowest level
464 * raw protocol and you must do your own fragmentation at this level.
468 if (len
> dev
->mtu
+ dev
->hard_header_len
)
472 size_t reserved
= LL_RESERVED_SPACE(dev
);
473 unsigned int hhlen
= dev
->header_ops
? dev
->hard_header_len
: 0;
476 skb
= sock_wmalloc(sk
, len
+ reserved
, 0, GFP_KERNEL
);
479 /* FIXME: Save some space for broken drivers that write a hard
480 * header at transmission time by themselves. PPP is the notable
481 * one here. This should really be fixed at the driver level.
483 skb_reserve(skb
, reserved
);
484 skb_reset_network_header(skb
);
486 /* Try to align data part correctly */
491 skb_reset_network_header(skb
);
493 err
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
500 skb
->protocol
= proto
;
502 skb
->priority
= sk
->sk_priority
;
503 skb
->mark
= sk
->sk_mark
;
504 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
519 static inline unsigned int run_filter(const struct sk_buff
*skb
,
520 const struct sock
*sk
,
523 struct sk_filter
*filter
;
526 filter
= rcu_dereference(sk
->sk_filter
);
528 res
= sk_run_filter(skb
, filter
->insns
);
535 * This function makes lazy skb cloning in hope that most of packets
536 * are discarded by BPF.
538 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
539 * and skb->cb are mangled. It works because (and until) packets
540 * falling here are owned by current CPU. Output packets are cloned
541 * by dev_queue_xmit_nit(), input packets are processed by net_bh
542 * sequencially, so that if we return skb to original state on exit,
543 * we will not harm anyone.
546 static int packet_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
547 struct packet_type
*pt
, struct net_device
*orig_dev
)
550 struct sockaddr_ll
*sll
;
551 struct packet_sock
*po
;
552 u8
*skb_head
= skb
->data
;
553 int skb_len
= skb
->len
;
554 unsigned int snaplen
, res
;
556 if (skb
->pkt_type
== PACKET_LOOPBACK
)
559 sk
= pt
->af_packet_priv
;
562 if (!net_eq(dev_net(dev
), sock_net(sk
)))
567 if (dev
->header_ops
) {
568 /* The device has an explicit notion of ll header,
569 * exported to higher levels.
571 * Otherwise, the device hides details of its frame
572 * structure, so that corresponding packet head is
573 * never delivered to user.
575 if (sk
->sk_type
!= SOCK_DGRAM
)
576 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
577 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
578 /* Special case: outgoing packets have ll header at head */
579 skb_pull(skb
, skb_network_offset(skb
));
585 res
= run_filter(skb
, sk
, snaplen
);
591 if (atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
>=
592 (unsigned)sk
->sk_rcvbuf
)
595 if (skb_shared(skb
)) {
596 struct sk_buff
*nskb
= skb_clone(skb
, GFP_ATOMIC
);
600 if (skb_head
!= skb
->data
) {
601 skb
->data
= skb_head
;
608 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb
)) + MAX_ADDR_LEN
- 8 >
611 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
612 sll
->sll_family
= AF_PACKET
;
613 sll
->sll_hatype
= dev
->type
;
614 sll
->sll_protocol
= skb
->protocol
;
615 sll
->sll_pkttype
= skb
->pkt_type
;
616 if (unlikely(po
->origdev
))
617 sll
->sll_ifindex
= orig_dev
->ifindex
;
619 sll
->sll_ifindex
= dev
->ifindex
;
621 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
623 PACKET_SKB_CB(skb
)->origlen
= skb
->len
;
625 if (pskb_trim(skb
, snaplen
))
628 skb_set_owner_r(skb
, sk
);
632 /* drop conntrack reference */
635 spin_lock(&sk
->sk_receive_queue
.lock
);
636 po
->stats
.tp_packets
++;
637 skb
->dropcount
= atomic_read(&sk
->sk_drops
);
638 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
639 spin_unlock(&sk
->sk_receive_queue
.lock
);
640 sk
->sk_data_ready(sk
, skb
->len
);
644 po
->stats
.tp_drops
= atomic_inc_return(&sk
->sk_drops
);
647 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
648 skb
->data
= skb_head
;
656 static int tpacket_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
657 struct packet_type
*pt
, struct net_device
*orig_dev
)
660 struct packet_sock
*po
;
661 struct sockaddr_ll
*sll
;
663 struct tpacket_hdr
*h1
;
664 struct tpacket2_hdr
*h2
;
667 u8
*skb_head
= skb
->data
;
668 int skb_len
= skb
->len
;
669 unsigned int snaplen
, res
;
670 unsigned long status
= TP_STATUS_LOSING
|TP_STATUS_USER
;
671 unsigned short macoff
, netoff
, hdrlen
;
672 struct sk_buff
*copy_skb
= NULL
;
675 struct skb_shared_hwtstamps
*shhwtstamps
= skb_hwtstamps(skb
);
677 if (skb
->pkt_type
== PACKET_LOOPBACK
)
680 sk
= pt
->af_packet_priv
;
683 if (!net_eq(dev_net(dev
), sock_net(sk
)))
686 if (dev
->header_ops
) {
687 if (sk
->sk_type
!= SOCK_DGRAM
)
688 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
689 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
690 /* Special case: outgoing packets have ll header at head */
691 skb_pull(skb
, skb_network_offset(skb
));
695 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
696 status
|= TP_STATUS_CSUMNOTREADY
;
700 res
= run_filter(skb
, sk
, snaplen
);
706 if (sk
->sk_type
== SOCK_DGRAM
) {
707 macoff
= netoff
= TPACKET_ALIGN(po
->tp_hdrlen
) + 16 +
710 unsigned maclen
= skb_network_offset(skb
);
711 netoff
= TPACKET_ALIGN(po
->tp_hdrlen
+
712 (maclen
< 16 ? 16 : maclen
)) +
714 macoff
= netoff
- maclen
;
717 if (macoff
+ snaplen
> po
->rx_ring
.frame_size
) {
718 if (po
->copy_thresh
&&
719 atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
<
720 (unsigned)sk
->sk_rcvbuf
) {
721 if (skb_shared(skb
)) {
722 copy_skb
= skb_clone(skb
, GFP_ATOMIC
);
724 copy_skb
= skb_get(skb
);
725 skb_head
= skb
->data
;
728 skb_set_owner_r(copy_skb
, sk
);
730 snaplen
= po
->rx_ring
.frame_size
- macoff
;
731 if ((int)snaplen
< 0)
735 spin_lock(&sk
->sk_receive_queue
.lock
);
736 h
.raw
= packet_current_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
);
739 packet_increment_head(&po
->rx_ring
);
740 po
->stats
.tp_packets
++;
742 status
|= TP_STATUS_COPY
;
743 __skb_queue_tail(&sk
->sk_receive_queue
, copy_skb
);
745 if (!po
->stats
.tp_drops
)
746 status
&= ~TP_STATUS_LOSING
;
747 spin_unlock(&sk
->sk_receive_queue
.lock
);
749 skb_copy_bits(skb
, 0, h
.raw
+ macoff
, snaplen
);
751 switch (po
->tp_version
) {
753 h
.h1
->tp_len
= skb
->len
;
754 h
.h1
->tp_snaplen
= snaplen
;
755 h
.h1
->tp_mac
= macoff
;
756 h
.h1
->tp_net
= netoff
;
757 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
758 && shhwtstamps
->syststamp
.tv64
)
759 tv
= ktime_to_timeval(shhwtstamps
->syststamp
);
760 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
761 && shhwtstamps
->hwtstamp
.tv64
)
762 tv
= ktime_to_timeval(shhwtstamps
->hwtstamp
);
763 else if (skb
->tstamp
.tv64
)
764 tv
= ktime_to_timeval(skb
->tstamp
);
766 do_gettimeofday(&tv
);
767 h
.h1
->tp_sec
= tv
.tv_sec
;
768 h
.h1
->tp_usec
= tv
.tv_usec
;
769 hdrlen
= sizeof(*h
.h1
);
772 h
.h2
->tp_len
= skb
->len
;
773 h
.h2
->tp_snaplen
= snaplen
;
774 h
.h2
->tp_mac
= macoff
;
775 h
.h2
->tp_net
= netoff
;
776 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
777 && shhwtstamps
->syststamp
.tv64
)
778 ts
= ktime_to_timespec(shhwtstamps
->syststamp
);
779 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
780 && shhwtstamps
->hwtstamp
.tv64
)
781 ts
= ktime_to_timespec(shhwtstamps
->hwtstamp
);
782 else if (skb
->tstamp
.tv64
)
783 ts
= ktime_to_timespec(skb
->tstamp
);
786 h
.h2
->tp_sec
= ts
.tv_sec
;
787 h
.h2
->tp_nsec
= ts
.tv_nsec
;
788 h
.h2
->tp_vlan_tci
= vlan_tx_tag_get(skb
);
789 hdrlen
= sizeof(*h
.h2
);
795 sll
= h
.raw
+ TPACKET_ALIGN(hdrlen
);
796 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
797 sll
->sll_family
= AF_PACKET
;
798 sll
->sll_hatype
= dev
->type
;
799 sll
->sll_protocol
= skb
->protocol
;
800 sll
->sll_pkttype
= skb
->pkt_type
;
801 if (unlikely(po
->origdev
))
802 sll
->sll_ifindex
= orig_dev
->ifindex
;
804 sll
->sll_ifindex
= dev
->ifindex
;
806 __packet_set_status(po
, h
.raw
, status
);
808 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
812 end
= (u8
*)PAGE_ALIGN((unsigned long)h
.raw
+ macoff
+ snaplen
);
813 for (start
= h
.raw
; start
< end
; start
+= PAGE_SIZE
)
814 flush_dcache_page(pgv_to_page(start
));
818 sk
->sk_data_ready(sk
, 0);
821 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
822 skb
->data
= skb_head
;
830 po
->stats
.tp_drops
++;
831 spin_unlock(&sk
->sk_receive_queue
.lock
);
833 sk
->sk_data_ready(sk
, 0);
838 static void tpacket_destruct_skb(struct sk_buff
*skb
)
840 struct packet_sock
*po
= pkt_sk(skb
->sk
);
845 if (likely(po
->tx_ring
.pg_vec
)) {
846 ph
= skb_shinfo(skb
)->destructor_arg
;
847 BUG_ON(__packet_get_status(po
, ph
) != TP_STATUS_SENDING
);
848 BUG_ON(atomic_read(&po
->tx_ring
.pending
) == 0);
849 atomic_dec(&po
->tx_ring
.pending
);
850 __packet_set_status(po
, ph
, TP_STATUS_AVAILABLE
);
856 static int tpacket_fill_skb(struct packet_sock
*po
, struct sk_buff
*skb
,
857 void *frame
, struct net_device
*dev
, int size_max
,
858 __be16 proto
, unsigned char *addr
)
861 struct tpacket_hdr
*h1
;
862 struct tpacket2_hdr
*h2
;
865 int to_write
, offset
, len
, tp_len
, nr_frags
, len_max
;
866 struct socket
*sock
= po
->sk
.sk_socket
;
873 skb
->protocol
= proto
;
875 skb
->priority
= po
->sk
.sk_priority
;
876 skb
->mark
= po
->sk
.sk_mark
;
877 skb_shinfo(skb
)->destructor_arg
= ph
.raw
;
879 switch (po
->tp_version
) {
881 tp_len
= ph
.h2
->tp_len
;
884 tp_len
= ph
.h1
->tp_len
;
887 if (unlikely(tp_len
> size_max
)) {
888 pr_err("packet size is too long (%d > %d)\n", tp_len
, size_max
);
892 skb_reserve(skb
, LL_RESERVED_SPACE(dev
));
893 skb_reset_network_header(skb
);
895 data
= ph
.raw
+ po
->tp_hdrlen
- sizeof(struct sockaddr_ll
);
898 if (sock
->type
== SOCK_DGRAM
) {
899 err
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
,
901 if (unlikely(err
< 0))
903 } else if (dev
->hard_header_len
) {
904 /* net device doesn't like empty head */
905 if (unlikely(tp_len
<= dev
->hard_header_len
)) {
906 pr_err("packet size is too short (%d < %d)\n",
907 tp_len
, dev
->hard_header_len
);
911 skb_push(skb
, dev
->hard_header_len
);
912 err
= skb_store_bits(skb
, 0, data
,
913 dev
->hard_header_len
);
917 data
+= dev
->hard_header_len
;
918 to_write
-= dev
->hard_header_len
;
922 offset
= offset_in_page(data
);
923 len_max
= PAGE_SIZE
- offset
;
924 len
= ((to_write
> len_max
) ? len_max
: to_write
);
926 skb
->data_len
= to_write
;
927 skb
->len
+= to_write
;
928 skb
->truesize
+= to_write
;
929 atomic_add(to_write
, &po
->sk
.sk_wmem_alloc
);
931 while (likely(to_write
)) {
932 nr_frags
= skb_shinfo(skb
)->nr_frags
;
934 if (unlikely(nr_frags
>= MAX_SKB_FRAGS
)) {
935 pr_err("Packet exceed the number of skb frags(%lu)\n",
940 page
= pgv_to_page(data
);
942 flush_dcache_page(page
);
944 skb_fill_page_desc(skb
, nr_frags
, page
, offset
, len
);
948 len
= ((to_write
> len_max
) ? len_max
: to_write
);
954 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
)
958 struct net_device
*dev
;
960 int ifindex
, err
, reserve
= 0;
962 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
963 int tp_len
, size_max
;
968 sock
= po
->sk
.sk_socket
;
970 mutex_lock(&po
->pg_vec_lock
);
974 ifindex
= po
->ifindex
;
979 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
981 if (msg
->msg_namelen
< (saddr
->sll_halen
982 + offsetof(struct sockaddr_ll
,
985 ifindex
= saddr
->sll_ifindex
;
986 proto
= saddr
->sll_protocol
;
987 addr
= saddr
->sll_addr
;
990 dev
= dev_get_by_index(sock_net(&po
->sk
), ifindex
);
992 if (unlikely(dev
== NULL
))
995 reserve
= dev
->hard_header_len
;
998 if (unlikely(!(dev
->flags
& IFF_UP
)))
1001 size_max
= po
->tx_ring
.frame_size
1002 - (po
->tp_hdrlen
- sizeof(struct sockaddr_ll
));
1004 if (size_max
> dev
->mtu
+ reserve
)
1005 size_max
= dev
->mtu
+ reserve
;
1008 ph
= packet_current_frame(po
, &po
->tx_ring
,
1009 TP_STATUS_SEND_REQUEST
);
1011 if (unlikely(ph
== NULL
)) {
1016 status
= TP_STATUS_SEND_REQUEST
;
1017 skb
= sock_alloc_send_skb(&po
->sk
,
1018 LL_ALLOCATED_SPACE(dev
)
1019 + sizeof(struct sockaddr_ll
),
1022 if (unlikely(skb
== NULL
))
1025 tp_len
= tpacket_fill_skb(po
, skb
, ph
, dev
, size_max
, proto
,
1028 if (unlikely(tp_len
< 0)) {
1030 __packet_set_status(po
, ph
,
1031 TP_STATUS_AVAILABLE
);
1032 packet_increment_head(&po
->tx_ring
);
1036 status
= TP_STATUS_WRONG_FORMAT
;
1042 skb
->destructor
= tpacket_destruct_skb
;
1043 __packet_set_status(po
, ph
, TP_STATUS_SENDING
);
1044 atomic_inc(&po
->tx_ring
.pending
);
1046 status
= TP_STATUS_SEND_REQUEST
;
1047 err
= dev_queue_xmit(skb
);
1048 if (unlikely(err
> 0)) {
1049 err
= net_xmit_errno(err
);
1050 if (err
&& __packet_get_status(po
, ph
) ==
1051 TP_STATUS_AVAILABLE
) {
1052 /* skb was destructed already */
1057 * skb was dropped but not destructed yet;
1058 * let's treat it like congestion or err < 0
1062 packet_increment_head(&po
->tx_ring
);
1064 } while (likely((ph
!= NULL
) ||
1065 ((!(msg
->msg_flags
& MSG_DONTWAIT
)) &&
1066 (atomic_read(&po
->tx_ring
.pending
))))
1073 __packet_set_status(po
, ph
, status
);
1078 mutex_unlock(&po
->pg_vec_lock
);
1082 static inline struct sk_buff
*packet_alloc_skb(struct sock
*sk
, size_t prepad
,
1083 size_t reserve
, size_t len
,
1084 size_t linear
, int noblock
,
1087 struct sk_buff
*skb
;
1089 /* Under a page? Don't bother with paged skb. */
1090 if (prepad
+ len
< PAGE_SIZE
|| !linear
)
1093 skb
= sock_alloc_send_pskb(sk
, prepad
+ linear
, len
- linear
, noblock
,
1098 skb_reserve(skb
, reserve
);
1099 skb_put(skb
, linear
);
1100 skb
->data_len
= len
- linear
;
1101 skb
->len
+= len
- linear
;
1106 static int packet_snd(struct socket
*sock
,
1107 struct msghdr
*msg
, size_t len
)
1109 struct sock
*sk
= sock
->sk
;
1110 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
1111 struct sk_buff
*skb
;
1112 struct net_device
*dev
;
1114 unsigned char *addr
;
1115 int ifindex
, err
, reserve
= 0;
1116 struct virtio_net_hdr vnet_hdr
= { 0 };
1119 struct packet_sock
*po
= pkt_sk(sk
);
1120 unsigned short gso_type
= 0;
1123 * Get and verify the address.
1126 if (saddr
== NULL
) {
1127 ifindex
= po
->ifindex
;
1132 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
1134 if (msg
->msg_namelen
< (saddr
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
)))
1136 ifindex
= saddr
->sll_ifindex
;
1137 proto
= saddr
->sll_protocol
;
1138 addr
= saddr
->sll_addr
;
1142 dev
= dev_get_by_index(sock_net(sk
), ifindex
);
1146 if (sock
->type
== SOCK_RAW
)
1147 reserve
= dev
->hard_header_len
;
1150 if (!(dev
->flags
& IFF_UP
))
1153 if (po
->has_vnet_hdr
) {
1154 vnet_hdr_len
= sizeof(vnet_hdr
);
1157 if (len
< vnet_hdr_len
)
1160 len
-= vnet_hdr_len
;
1162 err
= memcpy_fromiovec((void *)&vnet_hdr
, msg
->msg_iov
,
1167 if ((vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) &&
1168 (vnet_hdr
.csum_start
+ vnet_hdr
.csum_offset
+ 2 >
1170 vnet_hdr
.hdr_len
= vnet_hdr
.csum_start
+
1171 vnet_hdr
.csum_offset
+ 2;
1174 if (vnet_hdr
.hdr_len
> len
)
1177 if (vnet_hdr
.gso_type
!= VIRTIO_NET_HDR_GSO_NONE
) {
1178 switch (vnet_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
1179 case VIRTIO_NET_HDR_GSO_TCPV4
:
1180 gso_type
= SKB_GSO_TCPV4
;
1182 case VIRTIO_NET_HDR_GSO_TCPV6
:
1183 gso_type
= SKB_GSO_TCPV6
;
1185 case VIRTIO_NET_HDR_GSO_UDP
:
1186 gso_type
= SKB_GSO_UDP
;
1192 if (vnet_hdr
.gso_type
& VIRTIO_NET_HDR_GSO_ECN
)
1193 gso_type
|= SKB_GSO_TCP_ECN
;
1195 if (vnet_hdr
.gso_size
== 0)
1202 if (!gso_type
&& (len
> dev
->mtu
+reserve
))
1206 skb
= packet_alloc_skb(sk
, LL_ALLOCATED_SPACE(dev
),
1207 LL_RESERVED_SPACE(dev
), len
, vnet_hdr
.hdr_len
,
1208 msg
->msg_flags
& MSG_DONTWAIT
, &err
);
1212 skb_set_network_header(skb
, reserve
);
1215 if (sock
->type
== SOCK_DGRAM
&&
1216 (offset
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
, NULL
, len
)) < 0)
1219 /* Returns -EFAULT on error */
1220 err
= skb_copy_datagram_from_iovec(skb
, offset
, msg
->msg_iov
, 0, len
);
1223 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
1227 skb
->protocol
= proto
;
1229 skb
->priority
= sk
->sk_priority
;
1230 skb
->mark
= sk
->sk_mark
;
1232 if (po
->has_vnet_hdr
) {
1233 if (vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
1234 if (!skb_partial_csum_set(skb
, vnet_hdr
.csum_start
,
1235 vnet_hdr
.csum_offset
)) {
1241 skb_shinfo(skb
)->gso_size
= vnet_hdr
.gso_size
;
1242 skb_shinfo(skb
)->gso_type
= gso_type
;
1244 /* Header must be checked, and gso_segs computed. */
1245 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1246 skb_shinfo(skb
)->gso_segs
= 0;
1248 len
+= vnet_hdr_len
;
1255 err
= dev_queue_xmit(skb
);
1256 if (err
> 0 && (err
= net_xmit_errno(err
)) != 0)
1272 static int packet_sendmsg(struct kiocb
*iocb
, struct socket
*sock
,
1273 struct msghdr
*msg
, size_t len
)
1275 struct sock
*sk
= sock
->sk
;
1276 struct packet_sock
*po
= pkt_sk(sk
);
1277 if (po
->tx_ring
.pg_vec
)
1278 return tpacket_snd(po
, msg
);
1280 return packet_snd(sock
, msg
, len
);
1284 * Close a PACKET socket. This is fairly simple. We immediately go
1285 * to 'closed' state and remove our protocol entry in the device list.
1288 static int packet_release(struct socket
*sock
)
1290 struct sock
*sk
= sock
->sk
;
1291 struct packet_sock
*po
;
1293 struct tpacket_req req
;
1301 spin_lock_bh(&net
->packet
.sklist_lock
);
1302 sk_del_node_init_rcu(sk
);
1303 sock_prot_inuse_add(net
, sk
->sk_prot
, -1);
1304 spin_unlock_bh(&net
->packet
.sklist_lock
);
1306 spin_lock(&po
->bind_lock
);
1309 * Remove from protocol table
1313 __dev_remove_pack(&po
->prot_hook
);
1316 spin_unlock(&po
->bind_lock
);
1318 packet_flush_mclist(sk
);
1320 memset(&req
, 0, sizeof(req
));
1322 if (po
->rx_ring
.pg_vec
)
1323 packet_set_ring(sk
, &req
, 1, 0);
1325 if (po
->tx_ring
.pg_vec
)
1326 packet_set_ring(sk
, &req
, 1, 1);
1330 * Now the socket is dead. No more input will appear.
1337 skb_queue_purge(&sk
->sk_receive_queue
);
1338 sk_refcnt_debug_release(sk
);
1345 * Attach a packet hook.
1348 static int packet_do_bind(struct sock
*sk
, struct net_device
*dev
, __be16 protocol
)
1350 struct packet_sock
*po
= pkt_sk(sk
);
1352 * Detach an existing hook if present.
1357 spin_lock(&po
->bind_lock
);
1362 spin_unlock(&po
->bind_lock
);
1363 dev_remove_pack(&po
->prot_hook
);
1364 spin_lock(&po
->bind_lock
);
1368 po
->prot_hook
.type
= protocol
;
1369 po
->prot_hook
.dev
= dev
;
1371 po
->ifindex
= dev
? dev
->ifindex
: 0;
1376 if (!dev
|| (dev
->flags
& IFF_UP
)) {
1377 dev_add_pack(&po
->prot_hook
);
1381 sk
->sk_err
= ENETDOWN
;
1382 if (!sock_flag(sk
, SOCK_DEAD
))
1383 sk
->sk_error_report(sk
);
1387 spin_unlock(&po
->bind_lock
);
1393 * Bind a packet socket to a device
1396 static int packet_bind_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1399 struct sock
*sk
= sock
->sk
;
1401 struct net_device
*dev
;
1408 if (addr_len
!= sizeof(struct sockaddr
))
1410 strlcpy(name
, uaddr
->sa_data
, sizeof(name
));
1412 dev
= dev_get_by_name(sock_net(sk
), name
);
1414 err
= packet_do_bind(sk
, dev
, pkt_sk(sk
)->num
);
1420 static int packet_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
1422 struct sockaddr_ll
*sll
= (struct sockaddr_ll
*)uaddr
;
1423 struct sock
*sk
= sock
->sk
;
1424 struct net_device
*dev
= NULL
;
1432 if (addr_len
< sizeof(struct sockaddr_ll
))
1434 if (sll
->sll_family
!= AF_PACKET
)
1437 if (sll
->sll_ifindex
) {
1439 dev
= dev_get_by_index(sock_net(sk
), sll
->sll_ifindex
);
1443 err
= packet_do_bind(sk
, dev
, sll
->sll_protocol
? : pkt_sk(sk
)->num
);
1451 static struct proto packet_proto
= {
1453 .owner
= THIS_MODULE
,
1454 .obj_size
= sizeof(struct packet_sock
),
1458 * Create a packet of type SOCK_PACKET.
1461 static int packet_create(struct net
*net
, struct socket
*sock
, int protocol
,
1465 struct packet_sock
*po
;
1466 __be16 proto
= (__force __be16
)protocol
; /* weird, but documented */
1469 if (!capable(CAP_NET_RAW
))
1471 if (sock
->type
!= SOCK_DGRAM
&& sock
->type
!= SOCK_RAW
&&
1472 sock
->type
!= SOCK_PACKET
)
1473 return -ESOCKTNOSUPPORT
;
1475 sock
->state
= SS_UNCONNECTED
;
1478 sk
= sk_alloc(net
, PF_PACKET
, GFP_KERNEL
, &packet_proto
);
1482 sock
->ops
= &packet_ops
;
1483 if (sock
->type
== SOCK_PACKET
)
1484 sock
->ops
= &packet_ops_spkt
;
1486 sock_init_data(sock
, sk
);
1489 sk
->sk_family
= PF_PACKET
;
1492 sk
->sk_destruct
= packet_sock_destruct
;
1493 sk_refcnt_debug_inc(sk
);
1496 * Attach a protocol block
1499 spin_lock_init(&po
->bind_lock
);
1500 mutex_init(&po
->pg_vec_lock
);
1501 po
->prot_hook
.func
= packet_rcv
;
1503 if (sock
->type
== SOCK_PACKET
)
1504 po
->prot_hook
.func
= packet_rcv_spkt
;
1506 po
->prot_hook
.af_packet_priv
= sk
;
1509 po
->prot_hook
.type
= proto
;
1510 dev_add_pack(&po
->prot_hook
);
1515 spin_lock_bh(&net
->packet
.sklist_lock
);
1516 sk_add_node_rcu(sk
, &net
->packet
.sklist
);
1517 sock_prot_inuse_add(net
, &packet_proto
, 1);
1518 spin_unlock_bh(&net
->packet
.sklist_lock
);
1525 static int packet_recv_error(struct sock
*sk
, struct msghdr
*msg
, int len
)
1527 struct sock_exterr_skb
*serr
;
1528 struct sk_buff
*skb
, *skb2
;
1532 skb
= skb_dequeue(&sk
->sk_error_queue
);
1538 msg
->msg_flags
|= MSG_TRUNC
;
1541 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1545 sock_recv_timestamp(msg
, sk
, skb
);
1547 serr
= SKB_EXT_ERR(skb
);
1548 put_cmsg(msg
, SOL_PACKET
, PACKET_TX_TIMESTAMP
,
1549 sizeof(serr
->ee
), &serr
->ee
);
1551 msg
->msg_flags
|= MSG_ERRQUEUE
;
1554 /* Reset and regenerate socket error */
1555 spin_lock_bh(&sk
->sk_error_queue
.lock
);
1557 if ((skb2
= skb_peek(&sk
->sk_error_queue
)) != NULL
) {
1558 sk
->sk_err
= SKB_EXT_ERR(skb2
)->ee
.ee_errno
;
1559 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1560 sk
->sk_error_report(sk
);
1562 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1571 * Pull a packet from our receive queue and hand it to the user.
1572 * If necessary we block.
1575 static int packet_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1576 struct msghdr
*msg
, size_t len
, int flags
)
1578 struct sock
*sk
= sock
->sk
;
1579 struct sk_buff
*skb
;
1581 struct sockaddr_ll
*sll
;
1582 int vnet_hdr_len
= 0;
1585 if (flags
& ~(MSG_PEEK
|MSG_DONTWAIT
|MSG_TRUNC
|MSG_CMSG_COMPAT
|MSG_ERRQUEUE
))
1589 /* What error should we return now? EUNATTACH? */
1590 if (pkt_sk(sk
)->ifindex
< 0)
1594 if (flags
& MSG_ERRQUEUE
) {
1595 err
= packet_recv_error(sk
, msg
, len
);
1600 * Call the generic datagram receiver. This handles all sorts
1601 * of horrible races and re-entrancy so we can forget about it
1602 * in the protocol layers.
1604 * Now it will return ENETDOWN, if device have just gone down,
1605 * but then it will block.
1608 skb
= skb_recv_datagram(sk
, flags
, flags
& MSG_DONTWAIT
, &err
);
1611 * An error occurred so return it. Because skb_recv_datagram()
1612 * handles the blocking we don't see and worry about blocking
1619 if (pkt_sk(sk
)->has_vnet_hdr
) {
1620 struct virtio_net_hdr vnet_hdr
= { 0 };
1623 vnet_hdr_len
= sizeof(vnet_hdr
);
1624 if (len
< vnet_hdr_len
)
1627 len
-= vnet_hdr_len
;
1629 if (skb_is_gso(skb
)) {
1630 struct skb_shared_info
*sinfo
= skb_shinfo(skb
);
1632 /* This is a hint as to how much should be linear. */
1633 vnet_hdr
.hdr_len
= skb_headlen(skb
);
1634 vnet_hdr
.gso_size
= sinfo
->gso_size
;
1635 if (sinfo
->gso_type
& SKB_GSO_TCPV4
)
1636 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1637 else if (sinfo
->gso_type
& SKB_GSO_TCPV6
)
1638 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1639 else if (sinfo
->gso_type
& SKB_GSO_UDP
)
1640 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_UDP
;
1641 else if (sinfo
->gso_type
& SKB_GSO_FCOE
)
1645 if (sinfo
->gso_type
& SKB_GSO_TCP_ECN
)
1646 vnet_hdr
.gso_type
|= VIRTIO_NET_HDR_GSO_ECN
;
1648 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
1650 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1651 vnet_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
1652 vnet_hdr
.csum_start
= skb_checksum_start_offset(skb
);
1653 vnet_hdr
.csum_offset
= skb
->csum_offset
;
1654 } /* else everything is zero */
1656 err
= memcpy_toiovec(msg
->msg_iov
, (void *)&vnet_hdr
,
1663 * If the address length field is there to be filled in, we fill
1667 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
1668 if (sock
->type
== SOCK_PACKET
)
1669 msg
->msg_namelen
= sizeof(struct sockaddr_pkt
);
1671 msg
->msg_namelen
= sll
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
);
1674 * You lose any data beyond the buffer you gave. If it worries a
1675 * user program they can ask the device for its MTU anyway.
1681 msg
->msg_flags
|= MSG_TRUNC
;
1684 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1688 sock_recv_ts_and_drops(msg
, sk
, skb
);
1691 memcpy(msg
->msg_name
, &PACKET_SKB_CB(skb
)->sa
,
1694 if (pkt_sk(sk
)->auxdata
) {
1695 struct tpacket_auxdata aux
;
1697 aux
.tp_status
= TP_STATUS_USER
;
1698 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1699 aux
.tp_status
|= TP_STATUS_CSUMNOTREADY
;
1700 aux
.tp_len
= PACKET_SKB_CB(skb
)->origlen
;
1701 aux
.tp_snaplen
= skb
->len
;
1703 aux
.tp_net
= skb_network_offset(skb
);
1704 aux
.tp_vlan_tci
= vlan_tx_tag_get(skb
);
1706 put_cmsg(msg
, SOL_PACKET
, PACKET_AUXDATA
, sizeof(aux
), &aux
);
1710 * Free or return the buffer as appropriate. Again this
1711 * hides all the races and re-entrancy issues from us.
1713 err
= vnet_hdr_len
+ ((flags
&MSG_TRUNC
) ? skb
->len
: copied
);
1716 skb_free_datagram(sk
, skb
);
1721 static int packet_getname_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1722 int *uaddr_len
, int peer
)
1724 struct net_device
*dev
;
1725 struct sock
*sk
= sock
->sk
;
1730 uaddr
->sa_family
= AF_PACKET
;
1732 dev
= dev_get_by_index_rcu(sock_net(sk
), pkt_sk(sk
)->ifindex
);
1734 strncpy(uaddr
->sa_data
, dev
->name
, 14);
1736 memset(uaddr
->sa_data
, 0, 14);
1738 *uaddr_len
= sizeof(*uaddr
);
1743 static int packet_getname(struct socket
*sock
, struct sockaddr
*uaddr
,
1744 int *uaddr_len
, int peer
)
1746 struct net_device
*dev
;
1747 struct sock
*sk
= sock
->sk
;
1748 struct packet_sock
*po
= pkt_sk(sk
);
1749 DECLARE_SOCKADDR(struct sockaddr_ll
*, sll
, uaddr
);
1754 sll
->sll_family
= AF_PACKET
;
1755 sll
->sll_ifindex
= po
->ifindex
;
1756 sll
->sll_protocol
= po
->num
;
1757 sll
->sll_pkttype
= 0;
1759 dev
= dev_get_by_index_rcu(sock_net(sk
), po
->ifindex
);
1761 sll
->sll_hatype
= dev
->type
;
1762 sll
->sll_halen
= dev
->addr_len
;
1763 memcpy(sll
->sll_addr
, dev
->dev_addr
, dev
->addr_len
);
1765 sll
->sll_hatype
= 0; /* Bad: we have no ARPHRD_UNSPEC */
1769 *uaddr_len
= offsetof(struct sockaddr_ll
, sll_addr
) + sll
->sll_halen
;
1774 static int packet_dev_mc(struct net_device
*dev
, struct packet_mclist
*i
,
1778 case PACKET_MR_MULTICAST
:
1779 if (i
->alen
!= dev
->addr_len
)
1782 return dev_mc_add(dev
, i
->addr
);
1784 return dev_mc_del(dev
, i
->addr
);
1786 case PACKET_MR_PROMISC
:
1787 return dev_set_promiscuity(dev
, what
);
1789 case PACKET_MR_ALLMULTI
:
1790 return dev_set_allmulti(dev
, what
);
1792 case PACKET_MR_UNICAST
:
1793 if (i
->alen
!= dev
->addr_len
)
1796 return dev_uc_add(dev
, i
->addr
);
1798 return dev_uc_del(dev
, i
->addr
);
1806 static void packet_dev_mclist(struct net_device
*dev
, struct packet_mclist
*i
, int what
)
1808 for ( ; i
; i
= i
->next
) {
1809 if (i
->ifindex
== dev
->ifindex
)
1810 packet_dev_mc(dev
, i
, what
);
1814 static int packet_mc_add(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1816 struct packet_sock
*po
= pkt_sk(sk
);
1817 struct packet_mclist
*ml
, *i
;
1818 struct net_device
*dev
;
1824 dev
= __dev_get_by_index(sock_net(sk
), mreq
->mr_ifindex
);
1829 if (mreq
->mr_alen
> dev
->addr_len
)
1833 i
= kmalloc(sizeof(*i
), GFP_KERNEL
);
1838 for (ml
= po
->mclist
; ml
; ml
= ml
->next
) {
1839 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1840 ml
->type
== mreq
->mr_type
&&
1841 ml
->alen
== mreq
->mr_alen
&&
1842 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1844 /* Free the new element ... */
1850 i
->type
= mreq
->mr_type
;
1851 i
->ifindex
= mreq
->mr_ifindex
;
1852 i
->alen
= mreq
->mr_alen
;
1853 memcpy(i
->addr
, mreq
->mr_address
, i
->alen
);
1855 i
->next
= po
->mclist
;
1857 err
= packet_dev_mc(dev
, i
, 1);
1859 po
->mclist
= i
->next
;
1868 static int packet_mc_drop(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1870 struct packet_mclist
*ml
, **mlp
;
1874 for (mlp
= &pkt_sk(sk
)->mclist
; (ml
= *mlp
) != NULL
; mlp
= &ml
->next
) {
1875 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1876 ml
->type
== mreq
->mr_type
&&
1877 ml
->alen
== mreq
->mr_alen
&&
1878 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1879 if (--ml
->count
== 0) {
1880 struct net_device
*dev
;
1882 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1884 packet_dev_mc(dev
, ml
, -1);
1892 return -EADDRNOTAVAIL
;
1895 static void packet_flush_mclist(struct sock
*sk
)
1897 struct packet_sock
*po
= pkt_sk(sk
);
1898 struct packet_mclist
*ml
;
1904 while ((ml
= po
->mclist
) != NULL
) {
1905 struct net_device
*dev
;
1907 po
->mclist
= ml
->next
;
1908 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1910 packet_dev_mc(dev
, ml
, -1);
1917 packet_setsockopt(struct socket
*sock
, int level
, int optname
, char __user
*optval
, unsigned int optlen
)
1919 struct sock
*sk
= sock
->sk
;
1920 struct packet_sock
*po
= pkt_sk(sk
);
1923 if (level
!= SOL_PACKET
)
1924 return -ENOPROTOOPT
;
1927 case PACKET_ADD_MEMBERSHIP
:
1928 case PACKET_DROP_MEMBERSHIP
:
1930 struct packet_mreq_max mreq
;
1932 memset(&mreq
, 0, sizeof(mreq
));
1933 if (len
< sizeof(struct packet_mreq
))
1935 if (len
> sizeof(mreq
))
1937 if (copy_from_user(&mreq
, optval
, len
))
1939 if (len
< (mreq
.mr_alen
+ offsetof(struct packet_mreq
, mr_address
)))
1941 if (optname
== PACKET_ADD_MEMBERSHIP
)
1942 ret
= packet_mc_add(sk
, &mreq
);
1944 ret
= packet_mc_drop(sk
, &mreq
);
1948 case PACKET_RX_RING
:
1949 case PACKET_TX_RING
:
1951 struct tpacket_req req
;
1953 if (optlen
< sizeof(req
))
1955 if (pkt_sk(sk
)->has_vnet_hdr
)
1957 if (copy_from_user(&req
, optval
, sizeof(req
)))
1959 return packet_set_ring(sk
, &req
, 0, optname
== PACKET_TX_RING
);
1961 case PACKET_COPY_THRESH
:
1965 if (optlen
!= sizeof(val
))
1967 if (copy_from_user(&val
, optval
, sizeof(val
)))
1970 pkt_sk(sk
)->copy_thresh
= val
;
1973 case PACKET_VERSION
:
1977 if (optlen
!= sizeof(val
))
1979 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
1981 if (copy_from_user(&val
, optval
, sizeof(val
)))
1986 po
->tp_version
= val
;
1992 case PACKET_RESERVE
:
1996 if (optlen
!= sizeof(val
))
1998 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2000 if (copy_from_user(&val
, optval
, sizeof(val
)))
2002 po
->tp_reserve
= val
;
2009 if (optlen
!= sizeof(val
))
2011 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2013 if (copy_from_user(&val
, optval
, sizeof(val
)))
2015 po
->tp_loss
= !!val
;
2018 case PACKET_AUXDATA
:
2022 if (optlen
< sizeof(val
))
2024 if (copy_from_user(&val
, optval
, sizeof(val
)))
2027 po
->auxdata
= !!val
;
2030 case PACKET_ORIGDEV
:
2034 if (optlen
< sizeof(val
))
2036 if (copy_from_user(&val
, optval
, sizeof(val
)))
2039 po
->origdev
= !!val
;
2042 case PACKET_VNET_HDR
:
2046 if (sock
->type
!= SOCK_RAW
)
2048 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2050 if (optlen
< sizeof(val
))
2052 if (copy_from_user(&val
, optval
, sizeof(val
)))
2055 po
->has_vnet_hdr
= !!val
;
2058 case PACKET_TIMESTAMP
:
2062 if (optlen
!= sizeof(val
))
2064 if (copy_from_user(&val
, optval
, sizeof(val
)))
2067 po
->tp_tstamp
= val
;
2071 return -ENOPROTOOPT
;
2075 static int packet_getsockopt(struct socket
*sock
, int level
, int optname
,
2076 char __user
*optval
, int __user
*optlen
)
2080 struct sock
*sk
= sock
->sk
;
2081 struct packet_sock
*po
= pkt_sk(sk
);
2083 struct tpacket_stats st
;
2085 if (level
!= SOL_PACKET
)
2086 return -ENOPROTOOPT
;
2088 if (get_user(len
, optlen
))
2095 case PACKET_STATISTICS
:
2096 if (len
> sizeof(struct tpacket_stats
))
2097 len
= sizeof(struct tpacket_stats
);
2098 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2100 memset(&po
->stats
, 0, sizeof(st
));
2101 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2102 st
.tp_packets
+= st
.tp_drops
;
2106 case PACKET_AUXDATA
:
2107 if (len
> sizeof(int))
2113 case PACKET_ORIGDEV
:
2114 if (len
> sizeof(int))
2120 case PACKET_VNET_HDR
:
2121 if (len
> sizeof(int))
2123 val
= po
->has_vnet_hdr
;
2127 case PACKET_VERSION
:
2128 if (len
> sizeof(int))
2130 val
= po
->tp_version
;
2134 if (len
> sizeof(int))
2136 if (copy_from_user(&val
, optval
, len
))
2140 val
= sizeof(struct tpacket_hdr
);
2143 val
= sizeof(struct tpacket2_hdr
);
2150 case PACKET_RESERVE
:
2151 if (len
> sizeof(unsigned int))
2152 len
= sizeof(unsigned int);
2153 val
= po
->tp_reserve
;
2157 if (len
> sizeof(unsigned int))
2158 len
= sizeof(unsigned int);
2162 case PACKET_TIMESTAMP
:
2163 if (len
> sizeof(int))
2165 val
= po
->tp_tstamp
;
2169 return -ENOPROTOOPT
;
2172 if (put_user(len
, optlen
))
2174 if (copy_to_user(optval
, data
, len
))
2180 static int packet_notifier(struct notifier_block
*this, unsigned long msg
, void *data
)
2183 struct hlist_node
*node
;
2184 struct net_device
*dev
= data
;
2185 struct net
*net
= dev_net(dev
);
2188 sk_for_each_rcu(sk
, node
, &net
->packet
.sklist
) {
2189 struct packet_sock
*po
= pkt_sk(sk
);
2192 case NETDEV_UNREGISTER
:
2194 packet_dev_mclist(dev
, po
->mclist
, -1);
2198 if (dev
->ifindex
== po
->ifindex
) {
2199 spin_lock(&po
->bind_lock
);
2201 __dev_remove_pack(&po
->prot_hook
);
2204 sk
->sk_err
= ENETDOWN
;
2205 if (!sock_flag(sk
, SOCK_DEAD
))
2206 sk
->sk_error_report(sk
);
2208 if (msg
== NETDEV_UNREGISTER
) {
2210 po
->prot_hook
.dev
= NULL
;
2212 spin_unlock(&po
->bind_lock
);
2216 if (dev
->ifindex
== po
->ifindex
) {
2217 spin_lock(&po
->bind_lock
);
2218 if (po
->num
&& !po
->running
) {
2219 dev_add_pack(&po
->prot_hook
);
2223 spin_unlock(&po
->bind_lock
);
2233 static int packet_ioctl(struct socket
*sock
, unsigned int cmd
,
2236 struct sock
*sk
= sock
->sk
;
2241 int amount
= sk_wmem_alloc_get(sk
);
2243 return put_user(amount
, (int __user
*)arg
);
2247 struct sk_buff
*skb
;
2250 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2251 skb
= skb_peek(&sk
->sk_receive_queue
);
2254 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2255 return put_user(amount
, (int __user
*)arg
);
2258 return sock_get_timestamp(sk
, (struct timeval __user
*)arg
);
2260 return sock_get_timestampns(sk
, (struct timespec __user
*)arg
);
2270 case SIOCGIFBRDADDR
:
2271 case SIOCSIFBRDADDR
:
2272 case SIOCGIFNETMASK
:
2273 case SIOCSIFNETMASK
:
2274 case SIOCGIFDSTADDR
:
2275 case SIOCSIFDSTADDR
:
2277 return inet_dgram_ops
.ioctl(sock
, cmd
, arg
);
2281 return -ENOIOCTLCMD
;
2286 static unsigned int packet_poll(struct file
*file
, struct socket
*sock
,
2289 struct sock
*sk
= sock
->sk
;
2290 struct packet_sock
*po
= pkt_sk(sk
);
2291 unsigned int mask
= datagram_poll(file
, sock
, wait
);
2293 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2294 if (po
->rx_ring
.pg_vec
) {
2295 if (!packet_previous_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
))
2296 mask
|= POLLIN
| POLLRDNORM
;
2298 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2299 spin_lock_bh(&sk
->sk_write_queue
.lock
);
2300 if (po
->tx_ring
.pg_vec
) {
2301 if (packet_current_frame(po
, &po
->tx_ring
, TP_STATUS_AVAILABLE
))
2302 mask
|= POLLOUT
| POLLWRNORM
;
2304 spin_unlock_bh(&sk
->sk_write_queue
.lock
);
2309 /* Dirty? Well, I still did not learn better way to account
2313 static void packet_mm_open(struct vm_area_struct
*vma
)
2315 struct file
*file
= vma
->vm_file
;
2316 struct socket
*sock
= file
->private_data
;
2317 struct sock
*sk
= sock
->sk
;
2320 atomic_inc(&pkt_sk(sk
)->mapped
);
2323 static void packet_mm_close(struct vm_area_struct
*vma
)
2325 struct file
*file
= vma
->vm_file
;
2326 struct socket
*sock
= file
->private_data
;
2327 struct sock
*sk
= sock
->sk
;
2330 atomic_dec(&pkt_sk(sk
)->mapped
);
2333 static const struct vm_operations_struct packet_mmap_ops
= {
2334 .open
= packet_mm_open
,
2335 .close
= packet_mm_close
,
2338 static void free_pg_vec(struct pgv
*pg_vec
, unsigned int order
,
2343 for (i
= 0; i
< len
; i
++) {
2344 if (likely(pg_vec
[i
].buffer
)) {
2345 if (is_vmalloc_addr(pg_vec
[i
].buffer
))
2346 vfree(pg_vec
[i
].buffer
);
2348 free_pages((unsigned long)pg_vec
[i
].buffer
,
2350 pg_vec
[i
].buffer
= NULL
;
2356 static inline char *alloc_one_pg_vec_page(unsigned long order
)
2358 char *buffer
= NULL
;
2359 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_COMP
|
2360 __GFP_ZERO
| __GFP_NOWARN
| __GFP_NORETRY
;
2362 buffer
= (char *) __get_free_pages(gfp_flags
, order
);
2368 * __get_free_pages failed, fall back to vmalloc
2370 buffer
= vzalloc((1 << order
) * PAGE_SIZE
);
2376 * vmalloc failed, lets dig into swap here
2378 gfp_flags
&= ~__GFP_NORETRY
;
2379 buffer
= (char *)__get_free_pages(gfp_flags
, order
);
2384 * complete and utter failure
2389 static struct pgv
*alloc_pg_vec(struct tpacket_req
*req
, int order
)
2391 unsigned int block_nr
= req
->tp_block_nr
;
2395 pg_vec
= kcalloc(block_nr
, sizeof(struct pgv
), GFP_KERNEL
);
2396 if (unlikely(!pg_vec
))
2399 for (i
= 0; i
< block_nr
; i
++) {
2400 pg_vec
[i
].buffer
= alloc_one_pg_vec_page(order
);
2401 if (unlikely(!pg_vec
[i
].buffer
))
2402 goto out_free_pgvec
;
2409 free_pg_vec(pg_vec
, order
, block_nr
);
2414 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
2415 int closing
, int tx_ring
)
2417 struct pgv
*pg_vec
= NULL
;
2418 struct packet_sock
*po
= pkt_sk(sk
);
2419 int was_running
, order
= 0;
2420 struct packet_ring_buffer
*rb
;
2421 struct sk_buff_head
*rb_queue
;
2425 rb
= tx_ring
? &po
->tx_ring
: &po
->rx_ring
;
2426 rb_queue
= tx_ring
? &sk
->sk_write_queue
: &sk
->sk_receive_queue
;
2430 if (atomic_read(&po
->mapped
))
2432 if (atomic_read(&rb
->pending
))
2436 if (req
->tp_block_nr
) {
2437 /* Sanity tests and some calculations */
2439 if (unlikely(rb
->pg_vec
))
2442 switch (po
->tp_version
) {
2444 po
->tp_hdrlen
= TPACKET_HDRLEN
;
2447 po
->tp_hdrlen
= TPACKET2_HDRLEN
;
2452 if (unlikely((int)req
->tp_block_size
<= 0))
2454 if (unlikely(req
->tp_block_size
& (PAGE_SIZE
- 1)))
2456 if (unlikely(req
->tp_frame_size
< po
->tp_hdrlen
+
2459 if (unlikely(req
->tp_frame_size
& (TPACKET_ALIGNMENT
- 1)))
2462 rb
->frames_per_block
= req
->tp_block_size
/req
->tp_frame_size
;
2463 if (unlikely(rb
->frames_per_block
<= 0))
2465 if (unlikely((rb
->frames_per_block
* req
->tp_block_nr
) !=
2470 order
= get_order(req
->tp_block_size
);
2471 pg_vec
= alloc_pg_vec(req
, order
);
2472 if (unlikely(!pg_vec
))
2478 if (unlikely(req
->tp_frame_nr
))
2484 /* Detach socket from network */
2485 spin_lock(&po
->bind_lock
);
2486 was_running
= po
->running
;
2489 __dev_remove_pack(&po
->prot_hook
);
2494 spin_unlock(&po
->bind_lock
);
2499 mutex_lock(&po
->pg_vec_lock
);
2500 if (closing
|| atomic_read(&po
->mapped
) == 0) {
2502 spin_lock_bh(&rb_queue
->lock
);
2503 swap(rb
->pg_vec
, pg_vec
);
2504 rb
->frame_max
= (req
->tp_frame_nr
- 1);
2506 rb
->frame_size
= req
->tp_frame_size
;
2507 spin_unlock_bh(&rb_queue
->lock
);
2509 swap(rb
->pg_vec_order
, order
);
2510 swap(rb
->pg_vec_len
, req
->tp_block_nr
);
2512 rb
->pg_vec_pages
= req
->tp_block_size
/PAGE_SIZE
;
2513 po
->prot_hook
.func
= (po
->rx_ring
.pg_vec
) ?
2514 tpacket_rcv
: packet_rcv
;
2515 skb_queue_purge(rb_queue
);
2516 if (atomic_read(&po
->mapped
))
2517 pr_err("packet_mmap: vma is busy: %d\n",
2518 atomic_read(&po
->mapped
));
2520 mutex_unlock(&po
->pg_vec_lock
);
2522 spin_lock(&po
->bind_lock
);
2523 if (was_running
&& !po
->running
) {
2527 dev_add_pack(&po
->prot_hook
);
2529 spin_unlock(&po
->bind_lock
);
2534 free_pg_vec(pg_vec
, order
, req
->tp_block_nr
);
2539 static int packet_mmap(struct file
*file
, struct socket
*sock
,
2540 struct vm_area_struct
*vma
)
2542 struct sock
*sk
= sock
->sk
;
2543 struct packet_sock
*po
= pkt_sk(sk
);
2544 unsigned long size
, expected_size
;
2545 struct packet_ring_buffer
*rb
;
2546 unsigned long start
;
2553 mutex_lock(&po
->pg_vec_lock
);
2556 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2558 expected_size
+= rb
->pg_vec_len
2564 if (expected_size
== 0)
2567 size
= vma
->vm_end
- vma
->vm_start
;
2568 if (size
!= expected_size
)
2571 start
= vma
->vm_start
;
2572 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2573 if (rb
->pg_vec
== NULL
)
2576 for (i
= 0; i
< rb
->pg_vec_len
; i
++) {
2578 void *kaddr
= rb
->pg_vec
[i
].buffer
;
2581 for (pg_num
= 0; pg_num
< rb
->pg_vec_pages
; pg_num
++) {
2582 page
= pgv_to_page(kaddr
);
2583 err
= vm_insert_page(vma
, start
, page
);
2592 atomic_inc(&po
->mapped
);
2593 vma
->vm_ops
= &packet_mmap_ops
;
2597 mutex_unlock(&po
->pg_vec_lock
);
2601 static const struct proto_ops packet_ops_spkt
= {
2602 .family
= PF_PACKET
,
2603 .owner
= THIS_MODULE
,
2604 .release
= packet_release
,
2605 .bind
= packet_bind_spkt
,
2606 .connect
= sock_no_connect
,
2607 .socketpair
= sock_no_socketpair
,
2608 .accept
= sock_no_accept
,
2609 .getname
= packet_getname_spkt
,
2610 .poll
= datagram_poll
,
2611 .ioctl
= packet_ioctl
,
2612 .listen
= sock_no_listen
,
2613 .shutdown
= sock_no_shutdown
,
2614 .setsockopt
= sock_no_setsockopt
,
2615 .getsockopt
= sock_no_getsockopt
,
2616 .sendmsg
= packet_sendmsg_spkt
,
2617 .recvmsg
= packet_recvmsg
,
2618 .mmap
= sock_no_mmap
,
2619 .sendpage
= sock_no_sendpage
,
2622 static const struct proto_ops packet_ops
= {
2623 .family
= PF_PACKET
,
2624 .owner
= THIS_MODULE
,
2625 .release
= packet_release
,
2626 .bind
= packet_bind
,
2627 .connect
= sock_no_connect
,
2628 .socketpair
= sock_no_socketpair
,
2629 .accept
= sock_no_accept
,
2630 .getname
= packet_getname
,
2631 .poll
= packet_poll
,
2632 .ioctl
= packet_ioctl
,
2633 .listen
= sock_no_listen
,
2634 .shutdown
= sock_no_shutdown
,
2635 .setsockopt
= packet_setsockopt
,
2636 .getsockopt
= packet_getsockopt
,
2637 .sendmsg
= packet_sendmsg
,
2638 .recvmsg
= packet_recvmsg
,
2639 .mmap
= packet_mmap
,
2640 .sendpage
= sock_no_sendpage
,
2643 static const struct net_proto_family packet_family_ops
= {
2644 .family
= PF_PACKET
,
2645 .create
= packet_create
,
2646 .owner
= THIS_MODULE
,
2649 static struct notifier_block packet_netdev_notifier
= {
2650 .notifier_call
= packet_notifier
,
2653 #ifdef CONFIG_PROC_FS
2655 static void *packet_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2658 struct net
*net
= seq_file_net(seq
);
2661 return seq_hlist_start_head_rcu(&net
->packet
.sklist
, *pos
);
2664 static void *packet_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2666 struct net
*net
= seq_file_net(seq
);
2667 return seq_hlist_next_rcu(v
, &net
->packet
.sklist
, pos
);
2670 static void packet_seq_stop(struct seq_file
*seq
, void *v
)
2676 static int packet_seq_show(struct seq_file
*seq
, void *v
)
2678 if (v
== SEQ_START_TOKEN
)
2679 seq_puts(seq
, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2681 struct sock
*s
= sk_entry(v
);
2682 const struct packet_sock
*po
= pkt_sk(s
);
2685 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2687 atomic_read(&s
->sk_refcnt
),
2692 atomic_read(&s
->sk_rmem_alloc
),
2700 static const struct seq_operations packet_seq_ops
= {
2701 .start
= packet_seq_start
,
2702 .next
= packet_seq_next
,
2703 .stop
= packet_seq_stop
,
2704 .show
= packet_seq_show
,
2707 static int packet_seq_open(struct inode
*inode
, struct file
*file
)
2709 return seq_open_net(inode
, file
, &packet_seq_ops
,
2710 sizeof(struct seq_net_private
));
2713 static const struct file_operations packet_seq_fops
= {
2714 .owner
= THIS_MODULE
,
2715 .open
= packet_seq_open
,
2717 .llseek
= seq_lseek
,
2718 .release
= seq_release_net
,
2723 static int __net_init
packet_net_init(struct net
*net
)
2725 spin_lock_init(&net
->packet
.sklist_lock
);
2726 INIT_HLIST_HEAD(&net
->packet
.sklist
);
2728 if (!proc_net_fops_create(net
, "packet", 0, &packet_seq_fops
))
2734 static void __net_exit
packet_net_exit(struct net
*net
)
2736 proc_net_remove(net
, "packet");
2739 static struct pernet_operations packet_net_ops
= {
2740 .init
= packet_net_init
,
2741 .exit
= packet_net_exit
,
2745 static void __exit
packet_exit(void)
2747 unregister_netdevice_notifier(&packet_netdev_notifier
);
2748 unregister_pernet_subsys(&packet_net_ops
);
2749 sock_unregister(PF_PACKET
);
2750 proto_unregister(&packet_proto
);
2753 static int __init
packet_init(void)
2755 int rc
= proto_register(&packet_proto
, 0);
2760 sock_register(&packet_family_ops
);
2761 register_pernet_subsys(&packet_net_ops
);
2762 register_netdevice_notifier(&packet_netdev_notifier
);
2767 module_init(packet_init
);
2768 module_exit(packet_exit
);
2769 MODULE_LICENSE("GPL");
2770 MODULE_ALIAS_NETPROTO(PF_PACKET
);