make TLLAO option for NA packets configurable
[deliverable/linux.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
457c4cbc 63#include <net/net_namespace.h>
1da177e4
LT
64#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
a1f8e7f7 74#include <asm/cacheflush.h>
1da177e4
LT
75#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
905db440 81#include <linux/mutex.h>
1da177e4
LT
82
83#ifdef CONFIG_INET
84#include <net/inet_common.h>
85#endif
86
1da177e4
LT
87/*
88 Assumptions:
89 - if device has no dev->hard_header routine, it adds and removes ll header
90 inside itself. In this case ll header is invisible outside of device,
91 but higher levels still should reserve dev->hard_header_len.
92 Some devices are enough clever to reallocate skb, when header
93 will not fit to reserved space (tunnel), another ones are silly
94 (PPP).
95 - packet socket receives packets with pulled ll header,
96 so that SOCK_RAW should push it back.
97
98On receive:
99-----------
100
101Incoming, dev->hard_header!=NULL
b0e380b1
ACM
102 mac_header -> ll header
103 data -> data
1da177e4
LT
104
105Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
106 mac_header -> ll header
107 data -> ll header
1da177e4
LT
108
109Incoming, dev->hard_header==NULL
b0e380b1
ACM
110 mac_header -> UNKNOWN position. It is very likely, that it points to ll
111 header. PPP makes it, that is wrong, because introduce
db0c58f9 112 assymetry between rx and tx paths.
b0e380b1 113 data -> data
1da177e4
LT
114
115Outgoing, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> data. ll header is still not built!
117 data -> data
1da177e4
LT
118
119Resume
120 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121
122
123On transmit:
124------------
125
126dev->hard_header != NULL
b0e380b1
ACM
127 mac_header -> ll header
128 data -> ll header
1da177e4
LT
129
130dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
131 mac_header -> data
132 data -> data
1da177e4
LT
133
134 We should set nh.raw on output to correct posistion,
135 packet classifier depends on it.
136 */
137
1da177e4
LT
138/* Private packet socket structures. */
139
40d4e3df 140struct packet_mclist {
1da177e4
LT
141 struct packet_mclist *next;
142 int ifindex;
143 int count;
144 unsigned short type;
145 unsigned short alen;
0fb375fb
EB
146 unsigned char addr[MAX_ADDR_LEN];
147};
148/* identical to struct packet_mreq except it has
149 * a longer address field.
150 */
40d4e3df 151struct packet_mreq_max {
0fb375fb
EB
152 int mr_ifindex;
153 unsigned short mr_type;
154 unsigned short mr_alen;
155 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 156};
a2efcfa0 157
1da177e4 158#ifdef CONFIG_PACKET_MMAP
69e3c75f
JB
159static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
160 int closing, int tx_ring);
161
162struct packet_ring_buffer {
40d4e3df 163 char **pg_vec;
69e3c75f
JB
164 unsigned int head;
165 unsigned int frames_per_block;
166 unsigned int frame_size;
167 unsigned int frame_max;
168
169 unsigned int pg_vec_order;
170 unsigned int pg_vec_pages;
171 unsigned int pg_vec_len;
172
173 atomic_t pending;
174};
175
176struct packet_sock;
177static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
178#endif
179
180static void packet_flush_mclist(struct sock *sk);
181
182struct packet_sock {
183 /* struct sock has to be the first member of packet_sock */
184 struct sock sk;
185 struct tpacket_stats stats;
186#ifdef CONFIG_PACKET_MMAP
69e3c75f
JB
187 struct packet_ring_buffer rx_ring;
188 struct packet_ring_buffer tx_ring;
1da177e4
LT
189 int copy_thresh;
190#endif
191 struct packet_type prot_hook;
192 spinlock_t bind_lock;
905db440 193 struct mutex pg_vec_lock;
8dc41944 194 unsigned int running:1, /* prot_hook is attached*/
80feaacb
PWJ
195 auxdata:1,
196 origdev:1;
1da177e4 197 int ifindex; /* bound device */
0e11c91e 198 __be16 num;
1da177e4 199 struct packet_mclist *mclist;
1da177e4
LT
200#ifdef CONFIG_PACKET_MMAP
201 atomic_t mapped;
bbd6ef87
PM
202 enum tpacket_versions tp_version;
203 unsigned int tp_hdrlen;
8913336a 204 unsigned int tp_reserve;
69e3c75f 205 unsigned int tp_loss:1;
1da177e4
LT
206#endif
207};
208
ffbc6111
HX
209struct packet_skb_cb {
210 unsigned int origlen;
211 union {
212 struct sockaddr_pkt pkt;
213 struct sockaddr_ll ll;
214 } sa;
215};
216
217#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 218
1da177e4
LT
219#ifdef CONFIG_PACKET_MMAP
220
69e3c75f 221static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 222{
bbd6ef87
PM
223 union {
224 struct tpacket_hdr *h1;
225 struct tpacket2_hdr *h2;
226 void *raw;
227 } h;
1da177e4 228
69e3c75f 229 h.raw = frame;
bbd6ef87
PM
230 switch (po->tp_version) {
231 case TPACKET_V1:
69e3c75f
JB
232 h.h1->tp_status = status;
233 flush_dcache_page(virt_to_page(&h.h1->tp_status));
bbd6ef87
PM
234 break;
235 case TPACKET_V2:
69e3c75f
JB
236 h.h2->tp_status = status;
237 flush_dcache_page(virt_to_page(&h.h2->tp_status));
bbd6ef87 238 break;
69e3c75f 239 default:
40d4e3df 240 pr_err("TPACKET version not supported\n");
69e3c75f 241 BUG();
bbd6ef87 242 }
69e3c75f
JB
243
244 smp_wmb();
bbd6ef87
PM
245}
246
69e3c75f 247static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
248{
249 union {
250 struct tpacket_hdr *h1;
251 struct tpacket2_hdr *h2;
252 void *raw;
253 } h;
254
69e3c75f
JB
255 smp_rmb();
256
bbd6ef87
PM
257 h.raw = frame;
258 switch (po->tp_version) {
259 case TPACKET_V1:
69e3c75f
JB
260 flush_dcache_page(virt_to_page(&h.h1->tp_status));
261 return h.h1->tp_status;
bbd6ef87 262 case TPACKET_V2:
69e3c75f
JB
263 flush_dcache_page(virt_to_page(&h.h2->tp_status));
264 return h.h2->tp_status;
265 default:
40d4e3df 266 pr_err("TPACKET version not supported\n");
69e3c75f
JB
267 BUG();
268 return 0;
bbd6ef87 269 }
1da177e4 270}
69e3c75f
JB
271
272static void *packet_lookup_frame(struct packet_sock *po,
273 struct packet_ring_buffer *rb,
274 unsigned int position,
275 int status)
276{
277 unsigned int pg_vec_pos, frame_offset;
278 union {
279 struct tpacket_hdr *h1;
280 struct tpacket2_hdr *h2;
281 void *raw;
282 } h;
283
284 pg_vec_pos = position / rb->frames_per_block;
285 frame_offset = position % rb->frames_per_block;
286
287 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
288
289 if (status != __packet_get_status(po, h.raw))
290 return NULL;
291
292 return h.raw;
293}
294
295static inline void *packet_current_frame(struct packet_sock *po,
296 struct packet_ring_buffer *rb,
297 int status)
298{
299 return packet_lookup_frame(po, rb, rb->head, status);
300}
301
302static inline void *packet_previous_frame(struct packet_sock *po,
303 struct packet_ring_buffer *rb,
304 int status)
305{
306 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
307 return packet_lookup_frame(po, rb, previous, status);
308}
309
310static inline void packet_increment_head(struct packet_ring_buffer *buff)
311{
312 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
313}
314
1da177e4
LT
315#endif
316
317static inline struct packet_sock *pkt_sk(struct sock *sk)
318{
319 return (struct packet_sock *)sk;
320}
321
322static void packet_sock_destruct(struct sock *sk)
323{
547b792c
IJ
324 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
325 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
326
327 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 328 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
329 return;
330 }
331
17ab56a2 332 sk_refcnt_debug_dec(sk);
1da177e4
LT
333}
334
335
90ddc4f0 336static const struct proto_ops packet_ops;
1da177e4 337
90ddc4f0 338static const struct proto_ops packet_ops_spkt;
1da177e4 339
40d4e3df
ED
340static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
341 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
342{
343 struct sock *sk;
344 struct sockaddr_pkt *spkt;
345
346 /*
347 * When we registered the protocol we saved the socket in the data
348 * field for just this event.
349 */
350
351 sk = pt->af_packet_priv;
1ce4f28b 352
1da177e4
LT
353 /*
354 * Yank back the headers [hope the device set this
355 * right or kerboom...]
356 *
357 * Incoming packets have ll header pulled,
358 * push it back.
359 *
98e399f8 360 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
361 * so that this procedure is noop.
362 */
363
364 if (skb->pkt_type == PACKET_LOOPBACK)
365 goto out;
366
3b1e0a65 367 if (dev_net(dev) != sock_net(sk))
d12d01d6
DL
368 goto out;
369
40d4e3df
ED
370 skb = skb_share_check(skb, GFP_ATOMIC);
371 if (skb == NULL)
1da177e4
LT
372 goto oom;
373
374 /* drop any routing info */
adf30907 375 skb_dst_drop(skb);
1da177e4 376
84531c24
PO
377 /* drop conntrack reference */
378 nf_reset(skb);
379
ffbc6111 380 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 381
98e399f8 382 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
383
384 /*
385 * The SOCK_PACKET socket receives _all_ frames.
386 */
387
388 spkt->spkt_family = dev->type;
389 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
390 spkt->spkt_protocol = skb->protocol;
391
392 /*
393 * Charge the memory to the socket. This is done specifically
394 * to prevent sockets using all the memory up.
395 */
396
40d4e3df 397 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
398 return 0;
399
400out:
401 kfree_skb(skb);
402oom:
403 return 0;
404}
405
406
407/*
408 * Output a raw packet to a device layer. This bypasses all the other
409 * protocol layers and you must therefore supply it with a complete frame
410 */
1ce4f28b 411
1da177e4
LT
412static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
413 struct msghdr *msg, size_t len)
414{
415 struct sock *sk = sock->sk;
40d4e3df 416 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1da177e4
LT
417 struct sk_buff *skb;
418 struct net_device *dev;
40d4e3df 419 __be16 proto = 0;
1da177e4 420 int err;
1ce4f28b 421
1da177e4 422 /*
1ce4f28b 423 * Get and verify the address.
1da177e4
LT
424 */
425
40d4e3df 426 if (saddr) {
1da177e4 427 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
428 return -EINVAL;
429 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
430 proto = saddr->spkt_protocol;
431 } else
432 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
433
434 /*
1ce4f28b 435 * Find the device first to size check it
1da177e4
LT
436 */
437
438 saddr->spkt_device[13] = 0;
3b1e0a65 439 dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
1da177e4
LT
440 err = -ENODEV;
441 if (dev == NULL)
442 goto out_unlock;
1ce4f28b 443
d5e76b0a
DM
444 err = -ENETDOWN;
445 if (!(dev->flags & IFF_UP))
446 goto out_unlock;
447
1da177e4 448 /*
40d4e3df
ED
449 * You may not queue a frame bigger than the mtu. This is the lowest level
450 * raw protocol and you must do your own fragmentation at this level.
1da177e4 451 */
1ce4f28b 452
1da177e4 453 err = -EMSGSIZE;
8ae55f04 454 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
455 goto out_unlock;
456
457 err = -ENOBUFS;
458 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
459
460 /*
40d4e3df
ED
461 * If the write buffer is full, then tough. At this level the user
462 * gets to deal with the problem - do your own algorithmic backoffs.
463 * That's far more flexible.
1da177e4 464 */
1ce4f28b
YH
465
466 if (skb == NULL)
1da177e4
LT
467 goto out_unlock;
468
469 /*
1ce4f28b 470 * Fill it in
1da177e4 471 */
1ce4f28b 472
1da177e4
LT
473 /* FIXME: Save some space for broken drivers that write a
474 * hard header at transmission time by themselves. PPP is the
475 * notable one here. This should really be fixed at the driver level.
476 */
477 skb_reserve(skb, LL_RESERVED_SPACE(dev));
c1d2bbe1 478 skb_reset_network_header(skb);
1da177e4
LT
479
480 /* Try to align data part correctly */
3b04ddde 481 if (dev->header_ops) {
1da177e4
LT
482 skb->data -= dev->hard_header_len;
483 skb->tail -= dev->hard_header_len;
484 if (len < dev->hard_header_len)
c1d2bbe1 485 skb_reset_network_header(skb);
1da177e4
LT
486 }
487
488 /* Returns -EFAULT on error */
40d4e3df 489 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1da177e4
LT
490 skb->protocol = proto;
491 skb->dev = dev;
492 skb->priority = sk->sk_priority;
2d37a186 493 skb->mark = sk->sk_mark;
1da177e4
LT
494 if (err)
495 goto out_free;
496
1da177e4
LT
497 /*
498 * Now send it
499 */
500
501 dev_queue_xmit(skb);
502 dev_put(dev);
40d4e3df 503 return len;
1da177e4
LT
504
505out_free:
506 kfree_skb(skb);
507out_unlock:
508 if (dev)
509 dev_put(dev);
510 return err;
511}
1da177e4 512
dbcb5855
DM
513static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
514 unsigned int res)
1da177e4
LT
515{
516 struct sk_filter *filter;
fda9ef5d
DM
517
518 rcu_read_lock_bh();
519 filter = rcu_dereference(sk->sk_filter);
dbcb5855
DM
520 if (filter != NULL)
521 res = sk_run_filter(skb, filter->insns, filter->len);
fda9ef5d 522 rcu_read_unlock_bh();
1da177e4 523
dbcb5855 524 return res;
1da177e4
LT
525}
526
97775007
NH
527/*
528 * If we've lost frames since the last time we queued one to the
529 * sk_receive_queue, we need to record it here.
530 * This must be called under the protection of the socket lock
531 * to prevent racing with other softirqs and user space
532 */
533static inline void record_packet_gap(struct sk_buff *skb,
534 struct packet_sock *po)
535{
536 /*
537 * We overload the mark field here, since we're about
538 * to enqueue to a receive queue and no body else will
539 * use this field at this point
540 */
541 skb->mark = po->stats.tp_gap;
542 po->stats.tp_gap = 0;
543 return;
544
545}
546
547static inline __u32 check_packet_gap(struct sk_buff *skb)
548{
549 return skb->mark;
550}
551
1da177e4
LT
552/*
553 This function makes lazy skb cloning in hope that most of packets
554 are discarded by BPF.
555
556 Note tricky part: we DO mangle shared skb! skb->data, skb->len
557 and skb->cb are mangled. It works because (and until) packets
558 falling here are owned by current CPU. Output packets are cloned
559 by dev_queue_xmit_nit(), input packets are processed by net_bh
560 sequencially, so that if we return skb to original state on exit,
561 we will not harm anyone.
562 */
563
40d4e3df
ED
564static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
565 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
566{
567 struct sock *sk;
568 struct sockaddr_ll *sll;
569 struct packet_sock *po;
40d4e3df 570 u8 *skb_head = skb->data;
1da177e4 571 int skb_len = skb->len;
dbcb5855 572 unsigned int snaplen, res;
1da177e4
LT
573
574 if (skb->pkt_type == PACKET_LOOPBACK)
575 goto drop;
576
577 sk = pt->af_packet_priv;
578 po = pkt_sk(sk);
579
3b1e0a65 580 if (dev_net(dev) != sock_net(sk))
d12d01d6
DL
581 goto drop;
582
1da177e4
LT
583 skb->dev = dev;
584
3b04ddde 585 if (dev->header_ops) {
1da177e4
LT
586 /* The device has an explicit notion of ll header,
587 exported to higher levels.
588
589 Otherwise, the device hides datails of it frame
590 structure, so that corresponding packet head
591 never delivered to user.
592 */
593 if (sk->sk_type != SOCK_DGRAM)
98e399f8 594 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
595 else if (skb->pkt_type == PACKET_OUTGOING) {
596 /* Special case: outgoing packets have ll header at head */
bbe735e4 597 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
598 }
599 }
600
601 snaplen = skb->len;
602
dbcb5855
DM
603 res = run_filter(skb, sk, snaplen);
604 if (!res)
fda9ef5d 605 goto drop_n_restore;
dbcb5855
DM
606 if (snaplen > res)
607 snaplen = res;
1da177e4
LT
608
609 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
610 (unsigned)sk->sk_rcvbuf)
611 goto drop_n_acct;
612
613 if (skb_shared(skb)) {
614 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
615 if (nskb == NULL)
616 goto drop_n_acct;
617
618 if (skb_head != skb->data) {
619 skb->data = skb_head;
620 skb->len = skb_len;
621 }
622 kfree_skb(skb);
623 skb = nskb;
624 }
625
ffbc6111
HX
626 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
627 sizeof(skb->cb));
628
629 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
630 sll->sll_family = AF_PACKET;
631 sll->sll_hatype = dev->type;
632 sll->sll_protocol = skb->protocol;
633 sll->sll_pkttype = skb->pkt_type;
8032b464 634 if (unlikely(po->origdev))
80feaacb
PWJ
635 sll->sll_ifindex = orig_dev->ifindex;
636 else
637 sll->sll_ifindex = dev->ifindex;
1da177e4 638
b95cce35 639 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 640
ffbc6111 641 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 642
1da177e4
LT
643 if (pskb_trim(skb, snaplen))
644 goto drop_n_acct;
645
646 skb_set_owner_r(skb, sk);
647 skb->dev = NULL;
adf30907 648 skb_dst_drop(skb);
1da177e4 649
84531c24
PO
650 /* drop conntrack reference */
651 nf_reset(skb);
652
1da177e4
LT
653 spin_lock(&sk->sk_receive_queue.lock);
654 po->stats.tp_packets++;
97775007 655 record_packet_gap(skb, po);
1da177e4
LT
656 __skb_queue_tail(&sk->sk_receive_queue, skb);
657 spin_unlock(&sk->sk_receive_queue.lock);
658 sk->sk_data_ready(sk, skb->len);
659 return 0;
660
661drop_n_acct:
662 spin_lock(&sk->sk_receive_queue.lock);
663 po->stats.tp_drops++;
97775007 664 po->stats.tp_gap++;
1da177e4
LT
665 spin_unlock(&sk->sk_receive_queue.lock);
666
667drop_n_restore:
668 if (skb_head != skb->data && skb_shared(skb)) {
669 skb->data = skb_head;
670 skb->len = skb_len;
671 }
672drop:
ead2ceb0 673 consume_skb(skb);
1da177e4
LT
674 return 0;
675}
676
677#ifdef CONFIG_PACKET_MMAP
40d4e3df
ED
678static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
679 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
680{
681 struct sock *sk;
682 struct packet_sock *po;
683 struct sockaddr_ll *sll;
bbd6ef87
PM
684 union {
685 struct tpacket_hdr *h1;
686 struct tpacket2_hdr *h2;
687 void *raw;
688 } h;
40d4e3df 689 u8 *skb_head = skb->data;
1da177e4 690 int skb_len = skb->len;
dbcb5855 691 unsigned int snaplen, res;
1da177e4 692 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 693 unsigned short macoff, netoff, hdrlen;
1da177e4 694 struct sk_buff *copy_skb = NULL;
b7aa0bf7 695 struct timeval tv;
bbd6ef87 696 struct timespec ts;
1da177e4
LT
697
698 if (skb->pkt_type == PACKET_LOOPBACK)
699 goto drop;
700
701 sk = pt->af_packet_priv;
702 po = pkt_sk(sk);
703
3b1e0a65 704 if (dev_net(dev) != sock_net(sk))
d12d01d6
DL
705 goto drop;
706
3b04ddde 707 if (dev->header_ops) {
1da177e4 708 if (sk->sk_type != SOCK_DGRAM)
98e399f8 709 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
710 else if (skb->pkt_type == PACKET_OUTGOING) {
711 /* Special case: outgoing packets have ll header at head */
bbe735e4 712 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
713 }
714 }
715
8dc41944
HX
716 if (skb->ip_summed == CHECKSUM_PARTIAL)
717 status |= TP_STATUS_CSUMNOTREADY;
718
1da177e4
LT
719 snaplen = skb->len;
720
dbcb5855
DM
721 res = run_filter(skb, sk, snaplen);
722 if (!res)
fda9ef5d 723 goto drop_n_restore;
dbcb5855
DM
724 if (snaplen > res)
725 snaplen = res;
1da177e4
LT
726
727 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
728 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
729 po->tp_reserve;
1da177e4 730 } else {
bbe735e4 731 unsigned maclen = skb_network_offset(skb);
bbd6ef87 732 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
733 (maclen < 16 ? 16 : maclen)) +
734 po->tp_reserve;
1da177e4
LT
735 macoff = netoff - maclen;
736 }
737
69e3c75f 738 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
739 if (po->copy_thresh &&
740 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
741 (unsigned)sk->sk_rcvbuf) {
742 if (skb_shared(skb)) {
743 copy_skb = skb_clone(skb, GFP_ATOMIC);
744 } else {
745 copy_skb = skb_get(skb);
746 skb_head = skb->data;
747 }
748 if (copy_skb)
749 skb_set_owner_r(copy_skb, sk);
750 }
69e3c75f 751 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
752 if ((int)snaplen < 0)
753 snaplen = 0;
754 }
1da177e4
LT
755
756 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 757 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 758 if (!h.raw)
1da177e4 759 goto ring_is_full;
69e3c75f 760 packet_increment_head(&po->rx_ring);
1da177e4
LT
761 po->stats.tp_packets++;
762 if (copy_skb) {
763 status |= TP_STATUS_COPY;
764 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
765 }
766 if (!po->stats.tp_drops)
767 status &= ~TP_STATUS_LOSING;
768 spin_unlock(&sk->sk_receive_queue.lock);
769
bbd6ef87 770 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 771
bbd6ef87
PM
772 switch (po->tp_version) {
773 case TPACKET_V1:
774 h.h1->tp_len = skb->len;
775 h.h1->tp_snaplen = snaplen;
776 h.h1->tp_mac = macoff;
777 h.h1->tp_net = netoff;
778 if (skb->tstamp.tv64)
779 tv = ktime_to_timeval(skb->tstamp);
780 else
781 do_gettimeofday(&tv);
782 h.h1->tp_sec = tv.tv_sec;
783 h.h1->tp_usec = tv.tv_usec;
784 hdrlen = sizeof(*h.h1);
785 break;
786 case TPACKET_V2:
787 h.h2->tp_len = skb->len;
788 h.h2->tp_snaplen = snaplen;
789 h.h2->tp_mac = macoff;
790 h.h2->tp_net = netoff;
791 if (skb->tstamp.tv64)
792 ts = ktime_to_timespec(skb->tstamp);
793 else
794 getnstimeofday(&ts);
795 h.h2->tp_sec = ts.tv_sec;
796 h.h2->tp_nsec = ts.tv_nsec;
393e52e3 797 h.h2->tp_vlan_tci = skb->vlan_tci;
bbd6ef87
PM
798 hdrlen = sizeof(*h.h2);
799 break;
800 default:
801 BUG();
802 }
1da177e4 803
bbd6ef87 804 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 805 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
806 sll->sll_family = AF_PACKET;
807 sll->sll_hatype = dev->type;
808 sll->sll_protocol = skb->protocol;
809 sll->sll_pkttype = skb->pkt_type;
8032b464 810 if (unlikely(po->origdev))
80feaacb
PWJ
811 sll->sll_ifindex = orig_dev->ifindex;
812 else
813 sll->sll_ifindex = dev->ifindex;
1da177e4 814
bbd6ef87 815 __packet_set_status(po, h.raw, status);
e16aa207 816 smp_mb();
1da177e4
LT
817 {
818 struct page *p_start, *p_end;
bbd6ef87 819 u8 *h_end = h.raw + macoff + snaplen - 1;
1da177e4 820
bbd6ef87 821 p_start = virt_to_page(h.raw);
1da177e4
LT
822 p_end = virt_to_page(h_end);
823 while (p_start <= p_end) {
824 flush_dcache_page(p_start);
825 p_start++;
826 }
827 }
828
829 sk->sk_data_ready(sk, 0);
830
831drop_n_restore:
832 if (skb_head != skb->data && skb_shared(skb)) {
833 skb->data = skb_head;
834 skb->len = skb_len;
835 }
836drop:
1ce4f28b 837 kfree_skb(skb);
1da177e4
LT
838 return 0;
839
840ring_is_full:
841 po->stats.tp_drops++;
97775007 842 po->stats.tp_gap++;
1da177e4
LT
843 spin_unlock(&sk->sk_receive_queue.lock);
844
845 sk->sk_data_ready(sk, 0);
acb5d75b 846 kfree_skb(copy_skb);
1da177e4
LT
847 goto drop_n_restore;
848}
849
69e3c75f
JB
850static void tpacket_destruct_skb(struct sk_buff *skb)
851{
852 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 853 void *ph;
1da177e4 854
69e3c75f 855 BUG_ON(skb == NULL);
1da177e4 856
69e3c75f
JB
857 if (likely(po->tx_ring.pg_vec)) {
858 ph = skb_shinfo(skb)->destructor_arg;
859 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
860 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
861 atomic_dec(&po->tx_ring.pending);
862 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
863 }
864
865 sock_wfree(skb);
866}
867
40d4e3df
ED
868static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
869 void *frame, struct net_device *dev, int size_max,
870 __be16 proto, unsigned char *addr)
69e3c75f
JB
871{
872 union {
873 struct tpacket_hdr *h1;
874 struct tpacket2_hdr *h2;
875 void *raw;
876 } ph;
877 int to_write, offset, len, tp_len, nr_frags, len_max;
878 struct socket *sock = po->sk.sk_socket;
879 struct page *page;
880 void *data;
881 int err;
882
883 ph.raw = frame;
884
885 skb->protocol = proto;
886 skb->dev = dev;
887 skb->priority = po->sk.sk_priority;
2d37a186 888 skb->mark = po->sk.sk_mark;
69e3c75f
JB
889 skb_shinfo(skb)->destructor_arg = ph.raw;
890
891 switch (po->tp_version) {
892 case TPACKET_V2:
893 tp_len = ph.h2->tp_len;
894 break;
895 default:
896 tp_len = ph.h1->tp_len;
897 break;
898 }
899 if (unlikely(tp_len > size_max)) {
40d4e3df 900 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
901 return -EMSGSIZE;
902 }
903
904 skb_reserve(skb, LL_RESERVED_SPACE(dev));
905 skb_reset_network_header(skb);
906
907 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
908 to_write = tp_len;
909
910 if (sock->type == SOCK_DGRAM) {
911 err = dev_hard_header(skb, dev, ntohs(proto), addr,
912 NULL, tp_len);
913 if (unlikely(err < 0))
914 return -EINVAL;
40d4e3df 915 } else if (dev->hard_header_len) {
69e3c75f
JB
916 /* net device doesn't like empty head */
917 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
918 pr_err("packet size is too short (%d < %d)\n",
919 tp_len, dev->hard_header_len);
69e3c75f
JB
920 return -EINVAL;
921 }
922
923 skb_push(skb, dev->hard_header_len);
924 err = skb_store_bits(skb, 0, data,
925 dev->hard_header_len);
926 if (unlikely(err))
927 return err;
928
929 data += dev->hard_header_len;
930 to_write -= dev->hard_header_len;
931 }
932
933 err = -EFAULT;
934 page = virt_to_page(data);
935 offset = offset_in_page(data);
936 len_max = PAGE_SIZE - offset;
937 len = ((to_write > len_max) ? len_max : to_write);
938
939 skb->data_len = to_write;
940 skb->len += to_write;
941 skb->truesize += to_write;
942 atomic_add(to_write, &po->sk.sk_wmem_alloc);
943
944 while (likely(to_write)) {
945 nr_frags = skb_shinfo(skb)->nr_frags;
946
947 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
948 pr_err("Packet exceed the number of skb frags(%lu)\n",
949 MAX_SKB_FRAGS);
69e3c75f
JB
950 return -EFAULT;
951 }
952
953 flush_dcache_page(page);
954 get_page(page);
955 skb_fill_page_desc(skb,
956 nr_frags,
957 page++, offset, len);
958 to_write -= len;
959 offset = 0;
960 len_max = PAGE_SIZE;
961 len = ((to_write > len_max) ? len_max : to_write);
962 }
963
964 return tp_len;
965}
966
967static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
968{
969 struct socket *sock;
970 struct sk_buff *skb;
971 struct net_device *dev;
972 __be16 proto;
973 int ifindex, err, reserve = 0;
40d4e3df
ED
974 void *ph;
975 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
976 int tp_len, size_max;
977 unsigned char *addr;
978 int len_sum = 0;
979 int status = 0;
980
981 sock = po->sk.sk_socket;
982
983 mutex_lock(&po->pg_vec_lock);
984
985 err = -EBUSY;
986 if (saddr == NULL) {
987 ifindex = po->ifindex;
988 proto = po->num;
989 addr = NULL;
990 } else {
991 err = -EINVAL;
992 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
993 goto out;
994 if (msg->msg_namelen < (saddr->sll_halen
995 + offsetof(struct sockaddr_ll,
996 sll_addr)))
997 goto out;
998 ifindex = saddr->sll_ifindex;
999 proto = saddr->sll_protocol;
1000 addr = saddr->sll_addr;
1001 }
1002
1003 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
1004 err = -ENXIO;
1005 if (unlikely(dev == NULL))
1006 goto out;
1007
1008 reserve = dev->hard_header_len;
1009
1010 err = -ENETDOWN;
1011 if (unlikely(!(dev->flags & IFF_UP)))
1012 goto out_put;
1013
1014 size_max = po->tx_ring.frame_size
1015 - sizeof(struct skb_shared_info)
1016 - po->tp_hdrlen
1017 - LL_ALLOCATED_SPACE(dev)
1018 - sizeof(struct sockaddr_ll);
1019
1020 if (size_max > dev->mtu + reserve)
1021 size_max = dev->mtu + reserve;
1022
1023 do {
1024 ph = packet_current_frame(po, &po->tx_ring,
1025 TP_STATUS_SEND_REQUEST);
1026
1027 if (unlikely(ph == NULL)) {
1028 schedule();
1029 continue;
1030 }
1031
1032 status = TP_STATUS_SEND_REQUEST;
1033 skb = sock_alloc_send_skb(&po->sk,
1034 LL_ALLOCATED_SPACE(dev)
1035 + sizeof(struct sockaddr_ll),
1036 0, &err);
1037
1038 if (unlikely(skb == NULL))
1039 goto out_status;
1040
1041 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1042 addr);
1043
1044 if (unlikely(tp_len < 0)) {
1045 if (po->tp_loss) {
1046 __packet_set_status(po, ph,
1047 TP_STATUS_AVAILABLE);
1048 packet_increment_head(&po->tx_ring);
1049 kfree_skb(skb);
1050 continue;
1051 } else {
1052 status = TP_STATUS_WRONG_FORMAT;
1053 err = tp_len;
1054 goto out_status;
1055 }
1056 }
1057
1058 skb->destructor = tpacket_destruct_skb;
1059 __packet_set_status(po, ph, TP_STATUS_SENDING);
1060 atomic_inc(&po->tx_ring.pending);
1061
1062 status = TP_STATUS_SEND_REQUEST;
1063 err = dev_queue_xmit(skb);
1064 if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0))
1065 goto out_xmit;
1066 packet_increment_head(&po->tx_ring);
1067 len_sum += tp_len;
40d4e3df 1068 } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT))
69e3c75f
JB
1069 && (atomic_read(&po->tx_ring.pending))))
1070 );
1071
1072 err = len_sum;
1073 goto out_put;
1074
1075out_xmit:
1076 skb->destructor = sock_wfree;
1077 atomic_dec(&po->tx_ring.pending);
1078out_status:
1079 __packet_set_status(po, ph, status);
1080 kfree_skb(skb);
1081out_put:
1082 dev_put(dev);
1083out:
1084 mutex_unlock(&po->pg_vec_lock);
1085 return err;
1086}
1087#endif
1088
1089static int packet_snd(struct socket *sock,
1da177e4
LT
1090 struct msghdr *msg, size_t len)
1091{
1092 struct sock *sk = sock->sk;
40d4e3df 1093 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1094 struct sk_buff *skb;
1095 struct net_device *dev;
0e11c91e 1096 __be16 proto;
1da177e4
LT
1097 unsigned char *addr;
1098 int ifindex, err, reserve = 0;
1099
1100 /*
1ce4f28b 1101 * Get and verify the address.
1da177e4 1102 */
1ce4f28b 1103
1da177e4
LT
1104 if (saddr == NULL) {
1105 struct packet_sock *po = pkt_sk(sk);
1106
1107 ifindex = po->ifindex;
1108 proto = po->num;
1109 addr = NULL;
1110 } else {
1111 err = -EINVAL;
1112 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1113 goto out;
0fb375fb
EB
1114 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1115 goto out;
1da177e4
LT
1116 ifindex = saddr->sll_ifindex;
1117 proto = saddr->sll_protocol;
1118 addr = saddr->sll_addr;
1119 }
1120
1121
3b1e0a65 1122 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1123 err = -ENXIO;
1124 if (dev == NULL)
1125 goto out_unlock;
1126 if (sock->type == SOCK_RAW)
1127 reserve = dev->hard_header_len;
1128
d5e76b0a
DM
1129 err = -ENETDOWN;
1130 if (!(dev->flags & IFF_UP))
1131 goto out_unlock;
1132
1da177e4
LT
1133 err = -EMSGSIZE;
1134 if (len > dev->mtu+reserve)
1135 goto out_unlock;
1136
f5184d26 1137 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
1da177e4 1138 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1139 if (skb == NULL)
1da177e4
LT
1140 goto out_unlock;
1141
1142 skb_reserve(skb, LL_RESERVED_SPACE(dev));
c1d2bbe1 1143 skb_reset_network_header(skb);
1da177e4 1144
0c4e8581
SH
1145 err = -EINVAL;
1146 if (sock->type == SOCK_DGRAM &&
1147 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
1148 goto out_free;
1da177e4
LT
1149
1150 /* Returns -EFAULT on error */
40d4e3df 1151 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1da177e4
LT
1152 if (err)
1153 goto out_free;
1154
1155 skb->protocol = proto;
1156 skb->dev = dev;
1157 skb->priority = sk->sk_priority;
2d37a186 1158 skb->mark = sk->sk_mark;
1da177e4 1159
1da177e4
LT
1160 /*
1161 * Now send it
1162 */
1163
1164 err = dev_queue_xmit(skb);
1165 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1166 goto out_unlock;
1167
1168 dev_put(dev);
1169
40d4e3df 1170 return len;
1da177e4
LT
1171
1172out_free:
1173 kfree_skb(skb);
1174out_unlock:
1175 if (dev)
1176 dev_put(dev);
1177out:
1178 return err;
1179}
1180
69e3c75f
JB
1181static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1182 struct msghdr *msg, size_t len)
1183{
1184#ifdef CONFIG_PACKET_MMAP
1185 struct sock *sk = sock->sk;
1186 struct packet_sock *po = pkt_sk(sk);
1187 if (po->tx_ring.pg_vec)
1188 return tpacket_snd(po, msg);
1189 else
1190#endif
1191 return packet_snd(sock, msg, len);
1192}
1193
1da177e4
LT
1194/*
1195 * Close a PACKET socket. This is fairly simple. We immediately go
1196 * to 'closed' state and remove our protocol entry in the device list.
1197 */
1198
1199static int packet_release(struct socket *sock)
1200{
1201 struct sock *sk = sock->sk;
1202 struct packet_sock *po;
d12d01d6 1203 struct net *net;
69e3c75f
JB
1204#ifdef CONFIG_PACKET_MMAP
1205 struct tpacket_req req;
1206#endif
1da177e4
LT
1207
1208 if (!sk)
1209 return 0;
1210
3b1e0a65 1211 net = sock_net(sk);
1da177e4
LT
1212 po = pkt_sk(sk);
1213
2aaef4e4 1214 write_lock_bh(&net->packet.sklist_lock);
1da177e4 1215 sk_del_node_init(sk);
920de804 1216 sock_prot_inuse_add(net, sk->sk_prot, -1);
2aaef4e4 1217 write_unlock_bh(&net->packet.sklist_lock);
1da177e4
LT
1218
1219 /*
1220 * Unhook packet receive handler.
1221 */
1222
1223 if (po->running) {
1224 /*
1225 * Remove the protocol hook
1226 */
1227 dev_remove_pack(&po->prot_hook);
1228 po->running = 0;
1229 po->num = 0;
1230 __sock_put(sk);
1231 }
1232
1da177e4 1233 packet_flush_mclist(sk);
1da177e4
LT
1234
1235#ifdef CONFIG_PACKET_MMAP
69e3c75f
JB
1236 memset(&req, 0, sizeof(req));
1237
1238 if (po->rx_ring.pg_vec)
1239 packet_set_ring(sk, &req, 1, 0);
1240
1241 if (po->tx_ring.pg_vec)
1242 packet_set_ring(sk, &req, 1, 1);
1da177e4
LT
1243#endif
1244
1245 /*
1246 * Now the socket is dead. No more input will appear.
1247 */
1248
1249 sock_orphan(sk);
1250 sock->sk = NULL;
1251
1252 /* Purge queues */
1253
1254 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1255 sk_refcnt_debug_release(sk);
1da177e4
LT
1256
1257 sock_put(sk);
1258 return 0;
1259}
1260
1261/*
1262 * Attach a packet hook.
1263 */
1264
0e11c91e 1265static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1266{
1267 struct packet_sock *po = pkt_sk(sk);
1268 /*
1269 * Detach an existing hook if present.
1270 */
1271
1272 lock_sock(sk);
1273
1274 spin_lock(&po->bind_lock);
1275 if (po->running) {
1276 __sock_put(sk);
1277 po->running = 0;
1278 po->num = 0;
1279 spin_unlock(&po->bind_lock);
1280 dev_remove_pack(&po->prot_hook);
1281 spin_lock(&po->bind_lock);
1282 }
1283
1284 po->num = protocol;
1285 po->prot_hook.type = protocol;
1286 po->prot_hook.dev = dev;
1287
1288 po->ifindex = dev ? dev->ifindex : 0;
1289
1290 if (protocol == 0)
1291 goto out_unlock;
1292
be85d4ad 1293 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1294 dev_add_pack(&po->prot_hook);
1295 sock_hold(sk);
1296 po->running = 1;
be85d4ad
UT
1297 } else {
1298 sk->sk_err = ENETDOWN;
1299 if (!sock_flag(sk, SOCK_DEAD))
1300 sk->sk_error_report(sk);
1da177e4
LT
1301 }
1302
1303out_unlock:
1304 spin_unlock(&po->bind_lock);
1305 release_sock(sk);
1306 return 0;
1307}
1308
1309/*
1310 * Bind a packet socket to a device
1311 */
1312
40d4e3df
ED
1313static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1314 int addr_len)
1da177e4 1315{
40d4e3df 1316 struct sock *sk = sock->sk;
1da177e4
LT
1317 char name[15];
1318 struct net_device *dev;
1319 int err = -ENODEV;
1ce4f28b 1320
1da177e4
LT
1321 /*
1322 * Check legality
1323 */
1ce4f28b 1324
8ae55f04 1325 if (addr_len != sizeof(struct sockaddr))
1da177e4 1326 return -EINVAL;
40d4e3df 1327 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1328
3b1e0a65 1329 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1330 if (dev) {
1331 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1332 dev_put(dev);
1333 }
1334 return err;
1335}
1da177e4
LT
1336
1337static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1338{
40d4e3df
ED
1339 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1340 struct sock *sk = sock->sk;
1da177e4
LT
1341 struct net_device *dev = NULL;
1342 int err;
1343
1344
1345 /*
1346 * Check legality
1347 */
1ce4f28b 1348
1da177e4
LT
1349 if (addr_len < sizeof(struct sockaddr_ll))
1350 return -EINVAL;
1351 if (sll->sll_family != AF_PACKET)
1352 return -EINVAL;
1353
1354 if (sll->sll_ifindex) {
1355 err = -ENODEV;
3b1e0a65 1356 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1357 if (dev == NULL)
1358 goto out;
1359 }
1360 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1361 if (dev)
1362 dev_put(dev);
1363
1364out:
1365 return err;
1366}
1367
1368static struct proto packet_proto = {
1369 .name = "PACKET",
1370 .owner = THIS_MODULE,
1371 .obj_size = sizeof(struct packet_sock),
1372};
1373
1374/*
1ce4f28b 1375 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1376 */
1377
1b8d7ae4 1378static int packet_create(struct net *net, struct socket *sock, int protocol)
1da177e4
LT
1379{
1380 struct sock *sk;
1381 struct packet_sock *po;
0e11c91e 1382 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1383 int err;
1384
1385 if (!capable(CAP_NET_RAW))
1386 return -EPERM;
be02097c
DM
1387 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1388 sock->type != SOCK_PACKET)
1da177e4
LT
1389 return -ESOCKTNOSUPPORT;
1390
1391 sock->state = SS_UNCONNECTED;
1392
1393 err = -ENOBUFS;
6257ff21 1394 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1395 if (sk == NULL)
1396 goto out;
1397
1398 sock->ops = &packet_ops;
1da177e4
LT
1399 if (sock->type == SOCK_PACKET)
1400 sock->ops = &packet_ops_spkt;
be02097c 1401
1da177e4
LT
1402 sock_init_data(sock, sk);
1403
1404 po = pkt_sk(sk);
1405 sk->sk_family = PF_PACKET;
0e11c91e 1406 po->num = proto;
1da177e4
LT
1407
1408 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1409 sk_refcnt_debug_inc(sk);
1da177e4
LT
1410
1411 /*
1412 * Attach a protocol block
1413 */
1414
1415 spin_lock_init(&po->bind_lock);
905db440 1416 mutex_init(&po->pg_vec_lock);
1da177e4 1417 po->prot_hook.func = packet_rcv;
be02097c 1418
1da177e4
LT
1419 if (sock->type == SOCK_PACKET)
1420 po->prot_hook.func = packet_rcv_spkt;
be02097c 1421
1da177e4
LT
1422 po->prot_hook.af_packet_priv = sk;
1423
0e11c91e
AV
1424 if (proto) {
1425 po->prot_hook.type = proto;
1da177e4
LT
1426 dev_add_pack(&po->prot_hook);
1427 sock_hold(sk);
1428 po->running = 1;
1429 }
1430
2aaef4e4
DL
1431 write_lock_bh(&net->packet.sklist_lock);
1432 sk_add_node(sk, &net->packet.sklist);
3680453c 1433 sock_prot_inuse_add(net, &packet_proto, 1);
920de804 1434 write_unlock_bh(&net->packet.sklist_lock);
40d4e3df 1435 return 0;
1da177e4
LT
1436out:
1437 return err;
1438}
1439
1440/*
1441 * Pull a packet from our receive queue and hand it to the user.
1442 * If necessary we block.
1443 */
1444
1445static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1446 struct msghdr *msg, size_t len, int flags)
1447{
1448 struct sock *sk = sock->sk;
1449 struct sk_buff *skb;
1450 int copied, err;
0fb375fb 1451 struct sockaddr_ll *sll;
97775007 1452 __u32 gap;
1da177e4
LT
1453
1454 err = -EINVAL;
1455 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1456 goto out;
1457
1458#if 0
1459 /* What error should we return now? EUNATTACH? */
1460 if (pkt_sk(sk)->ifindex < 0)
1461 return -ENODEV;
1462#endif
1463
1da177e4
LT
1464 /*
1465 * Call the generic datagram receiver. This handles all sorts
1466 * of horrible races and re-entrancy so we can forget about it
1467 * in the protocol layers.
1468 *
1469 * Now it will return ENETDOWN, if device have just gone down,
1470 * but then it will block.
1471 */
1472
40d4e3df 1473 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1474
1475 /*
1ce4f28b 1476 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1477 * handles the blocking we don't see and worry about blocking
1478 * retries.
1479 */
1480
8ae55f04 1481 if (skb == NULL)
1da177e4
LT
1482 goto out;
1483
0fb375fb
EB
1484 /*
1485 * If the address length field is there to be filled in, we fill
1486 * it in now.
1487 */
1488
ffbc6111 1489 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1490 if (sock->type == SOCK_PACKET)
1491 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1492 else
1493 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1494
1da177e4
LT
1495 /*
1496 * You lose any data beyond the buffer you gave. If it worries a
1497 * user program they can ask the device for its MTU anyway.
1498 */
1499
1500 copied = skb->len;
40d4e3df
ED
1501 if (copied > len) {
1502 copied = len;
1503 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1504 }
1505
1506 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1507 if (err)
1508 goto out_free;
1509
1510 sock_recv_timestamp(msg, sk, skb);
1511
1512 if (msg->msg_name)
ffbc6111
HX
1513 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1514 msg->msg_namelen);
1da177e4 1515
8dc41944 1516 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1517 struct tpacket_auxdata aux;
1518
1519 aux.tp_status = TP_STATUS_USER;
1520 if (skb->ip_summed == CHECKSUM_PARTIAL)
1521 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1522 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1523 aux.tp_snaplen = skb->len;
1524 aux.tp_mac = 0;
bbe735e4 1525 aux.tp_net = skb_network_offset(skb);
393e52e3 1526 aux.tp_vlan_tci = skb->vlan_tci;
ffbc6111
HX
1527
1528 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1529 }
1530
97775007
NH
1531 gap = check_packet_gap(skb);
1532 if (gap)
1533 put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap);
1534
1da177e4
LT
1535 /*
1536 * Free or return the buffer as appropriate. Again this
1537 * hides all the races and re-entrancy issues from us.
1538 */
1539 err = (flags&MSG_TRUNC) ? skb->len : copied;
1540
1541out_free:
1542 skb_free_datagram(sk, skb);
1543out:
1544 return err;
1545}
1546
1da177e4
LT
1547static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1548 int *uaddr_len, int peer)
1549{
1550 struct net_device *dev;
1551 struct sock *sk = sock->sk;
1552
1553 if (peer)
1554 return -EOPNOTSUPP;
1555
1556 uaddr->sa_family = AF_PACKET;
3b1e0a65 1557 dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1da177e4
LT
1558 if (dev) {
1559 strlcpy(uaddr->sa_data, dev->name, 15);
1560 dev_put(dev);
1561 } else
1562 memset(uaddr->sa_data, 0, 14);
1563 *uaddr_len = sizeof(*uaddr);
1564
1565 return 0;
1566}
1da177e4
LT
1567
1568static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1569 int *uaddr_len, int peer)
1570{
1571 struct net_device *dev;
1572 struct sock *sk = sock->sk;
1573 struct packet_sock *po = pkt_sk(sk);
40d4e3df 1574 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1da177e4
LT
1575
1576 if (peer)
1577 return -EOPNOTSUPP;
1578
1579 sll->sll_family = AF_PACKET;
1580 sll->sll_ifindex = po->ifindex;
1581 sll->sll_protocol = po->num;
3b1e0a65 1582 dev = dev_get_by_index(sock_net(sk), po->ifindex);
1da177e4
LT
1583 if (dev) {
1584 sll->sll_hatype = dev->type;
1585 sll->sll_halen = dev->addr_len;
1586 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1587 dev_put(dev);
1588 } else {
1589 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1590 sll->sll_halen = 0;
1591 }
0fb375fb 1592 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1593
1594 return 0;
1595}
1596
2aeb0b88
WC
1597static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1598 int what)
1da177e4
LT
1599{
1600 switch (i->type) {
1601 case PACKET_MR_MULTICAST:
1602 if (what > 0)
d95ed927 1603 return dev_mc_add(dev, i->addr, i->alen, 0);
1da177e4 1604 else
d95ed927 1605 return dev_mc_delete(dev, i->addr, i->alen, 0);
1da177e4
LT
1606 break;
1607 case PACKET_MR_PROMISC:
2aeb0b88 1608 return dev_set_promiscuity(dev, what);
1da177e4
LT
1609 break;
1610 case PACKET_MR_ALLMULTI:
2aeb0b88 1611 return dev_set_allmulti(dev, what);
1da177e4 1612 break;
d95ed927
EB
1613 case PACKET_MR_UNICAST:
1614 if (what > 0)
ccffad25 1615 return dev_unicast_add(dev, i->addr);
d95ed927 1616 else
ccffad25 1617 return dev_unicast_delete(dev, i->addr);
d95ed927 1618 break;
40d4e3df
ED
1619 default:
1620 break;
1da177e4 1621 }
2aeb0b88 1622 return 0;
1da177e4
LT
1623}
1624
1625static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1626{
40d4e3df 1627 for ( ; i; i = i->next) {
1da177e4
LT
1628 if (i->ifindex == dev->ifindex)
1629 packet_dev_mc(dev, i, what);
1630 }
1631}
1632
0fb375fb 1633static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1634{
1635 struct packet_sock *po = pkt_sk(sk);
1636 struct packet_mclist *ml, *i;
1637 struct net_device *dev;
1638 int err;
1639
1640 rtnl_lock();
1641
1642 err = -ENODEV;
3b1e0a65 1643 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1644 if (!dev)
1645 goto done;
1646
1647 err = -EINVAL;
1648 if (mreq->mr_alen > dev->addr_len)
1649 goto done;
1650
1651 err = -ENOBUFS;
8b3a7005 1652 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1653 if (i == NULL)
1654 goto done;
1655
1656 err = 0;
1657 for (ml = po->mclist; ml; ml = ml->next) {
1658 if (ml->ifindex == mreq->mr_ifindex &&
1659 ml->type == mreq->mr_type &&
1660 ml->alen == mreq->mr_alen &&
1661 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1662 ml->count++;
1663 /* Free the new element ... */
1664 kfree(i);
1665 goto done;
1666 }
1667 }
1668
1669 i->type = mreq->mr_type;
1670 i->ifindex = mreq->mr_ifindex;
1671 i->alen = mreq->mr_alen;
1672 memcpy(i->addr, mreq->mr_address, i->alen);
1673 i->count = 1;
1674 i->next = po->mclist;
1675 po->mclist = i;
2aeb0b88
WC
1676 err = packet_dev_mc(dev, i, 1);
1677 if (err) {
1678 po->mclist = i->next;
1679 kfree(i);
1680 }
1da177e4
LT
1681
1682done:
1683 rtnl_unlock();
1684 return err;
1685}
1686
0fb375fb 1687static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1688{
1689 struct packet_mclist *ml, **mlp;
1690
1691 rtnl_lock();
1692
1693 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1694 if (ml->ifindex == mreq->mr_ifindex &&
1695 ml->type == mreq->mr_type &&
1696 ml->alen == mreq->mr_alen &&
1697 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1698 if (--ml->count == 0) {
1699 struct net_device *dev;
1700 *mlp = ml->next;
3b1e0a65 1701 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1da177e4
LT
1702 if (dev) {
1703 packet_dev_mc(dev, ml, -1);
1704 dev_put(dev);
1705 }
1706 kfree(ml);
1707 }
1708 rtnl_unlock();
1709 return 0;
1710 }
1711 }
1712 rtnl_unlock();
1713 return -EADDRNOTAVAIL;
1714}
1715
1716static void packet_flush_mclist(struct sock *sk)
1717{
1718 struct packet_sock *po = pkt_sk(sk);
1719 struct packet_mclist *ml;
1720
1721 if (!po->mclist)
1722 return;
1723
1724 rtnl_lock();
1725 while ((ml = po->mclist) != NULL) {
1726 struct net_device *dev;
1727
1728 po->mclist = ml->next;
40d4e3df
ED
1729 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1730 if (dev != NULL) {
1da177e4
LT
1731 packet_dev_mc(dev, ml, -1);
1732 dev_put(dev);
1733 }
1734 kfree(ml);
1735 }
1736 rtnl_unlock();
1737}
1da177e4
LT
1738
1739static int
b7058842 1740packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1741{
1742 struct sock *sk = sock->sk;
8dc41944 1743 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1744 int ret;
1745
1746 if (level != SOL_PACKET)
1747 return -ENOPROTOOPT;
1748
69e3c75f 1749 switch (optname) {
1ce4f28b 1750 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1751 case PACKET_DROP_MEMBERSHIP:
1752 {
0fb375fb
EB
1753 struct packet_mreq_max mreq;
1754 int len = optlen;
1755 memset(&mreq, 0, sizeof(mreq));
1756 if (len < sizeof(struct packet_mreq))
1da177e4 1757 return -EINVAL;
0fb375fb
EB
1758 if (len > sizeof(mreq))
1759 len = sizeof(mreq);
40d4e3df 1760 if (copy_from_user(&mreq, optval, len))
1da177e4 1761 return -EFAULT;
0fb375fb
EB
1762 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1763 return -EINVAL;
1da177e4
LT
1764 if (optname == PACKET_ADD_MEMBERSHIP)
1765 ret = packet_mc_add(sk, &mreq);
1766 else
1767 ret = packet_mc_drop(sk, &mreq);
1768 return ret;
1769 }
a2efcfa0 1770
1da177e4
LT
1771#ifdef CONFIG_PACKET_MMAP
1772 case PACKET_RX_RING:
69e3c75f 1773 case PACKET_TX_RING:
1da177e4
LT
1774 {
1775 struct tpacket_req req;
1776
40d4e3df 1777 if (optlen < sizeof(req))
1da177e4 1778 return -EINVAL;
40d4e3df 1779 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1780 return -EFAULT;
69e3c75f 1781 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1782 }
1783 case PACKET_COPY_THRESH:
1784 {
1785 int val;
1786
40d4e3df 1787 if (optlen != sizeof(val))
1da177e4 1788 return -EINVAL;
40d4e3df 1789 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1790 return -EFAULT;
1791
1792 pkt_sk(sk)->copy_thresh = val;
1793 return 0;
1794 }
bbd6ef87
PM
1795 case PACKET_VERSION:
1796 {
1797 int val;
1798
1799 if (optlen != sizeof(val))
1800 return -EINVAL;
69e3c75f 1801 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1802 return -EBUSY;
1803 if (copy_from_user(&val, optval, sizeof(val)))
1804 return -EFAULT;
1805 switch (val) {
1806 case TPACKET_V1:
1807 case TPACKET_V2:
1808 po->tp_version = val;
1809 return 0;
1810 default:
1811 return -EINVAL;
1812 }
1813 }
8913336a
PM
1814 case PACKET_RESERVE:
1815 {
1816 unsigned int val;
1817
1818 if (optlen != sizeof(val))
1819 return -EINVAL;
69e3c75f 1820 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
1821 return -EBUSY;
1822 if (copy_from_user(&val, optval, sizeof(val)))
1823 return -EFAULT;
1824 po->tp_reserve = val;
1825 return 0;
1826 }
69e3c75f
JB
1827 case PACKET_LOSS:
1828 {
1829 unsigned int val;
1830
1831 if (optlen != sizeof(val))
1832 return -EINVAL;
1833 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1834 return -EBUSY;
1835 if (copy_from_user(&val, optval, sizeof(val)))
1836 return -EFAULT;
1837 po->tp_loss = !!val;
1838 return 0;
1839 }
1da177e4 1840#endif
8dc41944
HX
1841 case PACKET_AUXDATA:
1842 {
1843 int val;
1844
1845 if (optlen < sizeof(val))
1846 return -EINVAL;
1847 if (copy_from_user(&val, optval, sizeof(val)))
1848 return -EFAULT;
1849
1850 po->auxdata = !!val;
1851 return 0;
1852 }
80feaacb
PWJ
1853 case PACKET_ORIGDEV:
1854 {
1855 int val;
1856
1857 if (optlen < sizeof(val))
1858 return -EINVAL;
1859 if (copy_from_user(&val, optval, sizeof(val)))
1860 return -EFAULT;
1861
1862 po->origdev = !!val;
1863 return 0;
1864 }
1da177e4
LT
1865 default:
1866 return -ENOPROTOOPT;
1867 }
1868}
1869
1870static int packet_getsockopt(struct socket *sock, int level, int optname,
1871 char __user *optval, int __user *optlen)
1872{
1873 int len;
8dc41944 1874 int val;
1da177e4
LT
1875 struct sock *sk = sock->sk;
1876 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
1877 void *data;
1878 struct tpacket_stats st;
1da177e4
LT
1879
1880 if (level != SOL_PACKET)
1881 return -ENOPROTOOPT;
1882
8ae55f04
KK
1883 if (get_user(len, optlen))
1884 return -EFAULT;
1da177e4
LT
1885
1886 if (len < 0)
1887 return -EINVAL;
1ce4f28b 1888
69e3c75f 1889 switch (optname) {
1da177e4 1890 case PACKET_STATISTICS:
1da177e4
LT
1891 if (len > sizeof(struct tpacket_stats))
1892 len = sizeof(struct tpacket_stats);
1893 spin_lock_bh(&sk->sk_receive_queue.lock);
1894 st = po->stats;
1895 memset(&po->stats, 0, sizeof(st));
1896 spin_unlock_bh(&sk->sk_receive_queue.lock);
1897 st.tp_packets += st.tp_drops;
1898
8dc41944
HX
1899 data = &st;
1900 break;
1901 case PACKET_AUXDATA:
1902 if (len > sizeof(int))
1903 len = sizeof(int);
1904 val = po->auxdata;
1905
80feaacb
PWJ
1906 data = &val;
1907 break;
1908 case PACKET_ORIGDEV:
1909 if (len > sizeof(int))
1910 len = sizeof(int);
1911 val = po->origdev;
1912
8dc41944 1913 data = &val;
1da177e4 1914 break;
bbd6ef87
PM
1915#ifdef CONFIG_PACKET_MMAP
1916 case PACKET_VERSION:
1917 if (len > sizeof(int))
1918 len = sizeof(int);
1919 val = po->tp_version;
1920 data = &val;
1921 break;
1922 case PACKET_HDRLEN:
1923 if (len > sizeof(int))
1924 len = sizeof(int);
1925 if (copy_from_user(&val, optval, len))
1926 return -EFAULT;
1927 switch (val) {
1928 case TPACKET_V1:
1929 val = sizeof(struct tpacket_hdr);
1930 break;
1931 case TPACKET_V2:
1932 val = sizeof(struct tpacket2_hdr);
1933 break;
1934 default:
1935 return -EINVAL;
1936 }
1937 data = &val;
1938 break;
8913336a
PM
1939 case PACKET_RESERVE:
1940 if (len > sizeof(unsigned int))
1941 len = sizeof(unsigned int);
1942 val = po->tp_reserve;
1943 data = &val;
1944 break;
69e3c75f
JB
1945 case PACKET_LOSS:
1946 if (len > sizeof(unsigned int))
1947 len = sizeof(unsigned int);
1948 val = po->tp_loss;
1949 data = &val;
1950 break;
bbd6ef87 1951#endif
1da177e4
LT
1952 default:
1953 return -ENOPROTOOPT;
1954 }
1955
8ae55f04
KK
1956 if (put_user(len, optlen))
1957 return -EFAULT;
8dc41944
HX
1958 if (copy_to_user(optval, data, len))
1959 return -EFAULT;
8ae55f04 1960 return 0;
1da177e4
LT
1961}
1962
1963
1964static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1965{
1966 struct sock *sk;
1967 struct hlist_node *node;
ad930650 1968 struct net_device *dev = data;
c346dca1 1969 struct net *net = dev_net(dev);
1da177e4 1970
2aaef4e4
DL
1971 read_lock(&net->packet.sklist_lock);
1972 sk_for_each(sk, node, &net->packet.sklist) {
1da177e4
LT
1973 struct packet_sock *po = pkt_sk(sk);
1974
1975 switch (msg) {
1976 case NETDEV_UNREGISTER:
1da177e4
LT
1977 if (po->mclist)
1978 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
1979 /* fallthrough */
1980
1da177e4
LT
1981 case NETDEV_DOWN:
1982 if (dev->ifindex == po->ifindex) {
1983 spin_lock(&po->bind_lock);
1984 if (po->running) {
1985 __dev_remove_pack(&po->prot_hook);
1986 __sock_put(sk);
1987 po->running = 0;
1988 sk->sk_err = ENETDOWN;
1989 if (!sock_flag(sk, SOCK_DEAD))
1990 sk->sk_error_report(sk);
1991 }
1992 if (msg == NETDEV_UNREGISTER) {
1993 po->ifindex = -1;
1994 po->prot_hook.dev = NULL;
1995 }
1996 spin_unlock(&po->bind_lock);
1997 }
1998 break;
1999 case NETDEV_UP:
2000 spin_lock(&po->bind_lock);
2001 if (dev->ifindex == po->ifindex && po->num &&
2002 !po->running) {
2003 dev_add_pack(&po->prot_hook);
2004 sock_hold(sk);
2005 po->running = 1;
2006 }
2007 spin_unlock(&po->bind_lock);
2008 break;
2009 }
2010 }
2aaef4e4 2011 read_unlock(&net->packet.sklist_lock);
1da177e4
LT
2012 return NOTIFY_DONE;
2013}
2014
2015
2016static int packet_ioctl(struct socket *sock, unsigned int cmd,
2017 unsigned long arg)
2018{
2019 struct sock *sk = sock->sk;
2020
69e3c75f 2021 switch (cmd) {
40d4e3df
ED
2022 case SIOCOUTQ:
2023 {
2024 int amount = sk_wmem_alloc_get(sk);
31e6d363 2025
40d4e3df
ED
2026 return put_user(amount, (int __user *)arg);
2027 }
2028 case SIOCINQ:
2029 {
2030 struct sk_buff *skb;
2031 int amount = 0;
2032
2033 spin_lock_bh(&sk->sk_receive_queue.lock);
2034 skb = skb_peek(&sk->sk_receive_queue);
2035 if (skb)
2036 amount = skb->len;
2037 spin_unlock_bh(&sk->sk_receive_queue.lock);
2038 return put_user(amount, (int __user *)arg);
2039 }
2040 case SIOCGSTAMP:
2041 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2042 case SIOCGSTAMPNS:
2043 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2044
1da177e4 2045#ifdef CONFIG_INET
40d4e3df
ED
2046 case SIOCADDRT:
2047 case SIOCDELRT:
2048 case SIOCDARP:
2049 case SIOCGARP:
2050 case SIOCSARP:
2051 case SIOCGIFADDR:
2052 case SIOCSIFADDR:
2053 case SIOCGIFBRDADDR:
2054 case SIOCSIFBRDADDR:
2055 case SIOCGIFNETMASK:
2056 case SIOCSIFNETMASK:
2057 case SIOCGIFDSTADDR:
2058 case SIOCSIFDSTADDR:
2059 case SIOCSIFFLAGS:
2060 if (!net_eq(sock_net(sk), &init_net))
2061 return -ENOIOCTLCMD;
2062 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2063#endif
2064
40d4e3df
ED
2065 default:
2066 return -ENOIOCTLCMD;
1da177e4
LT
2067 }
2068 return 0;
2069}
2070
2071#ifndef CONFIG_PACKET_MMAP
2072#define packet_mmap sock_no_mmap
2073#define packet_poll datagram_poll
2074#else
2075
40d4e3df 2076static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2077 poll_table *wait)
2078{
2079 struct sock *sk = sock->sk;
2080 struct packet_sock *po = pkt_sk(sk);
2081 unsigned int mask = datagram_poll(file, sock, wait);
2082
2083 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2084 if (po->rx_ring.pg_vec) {
2085 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2086 mask |= POLLIN | POLLRDNORM;
2087 }
2088 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2089 spin_lock_bh(&sk->sk_write_queue.lock);
2090 if (po->tx_ring.pg_vec) {
2091 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2092 mask |= POLLOUT | POLLWRNORM;
2093 }
2094 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2095 return mask;
2096}
2097
2098
2099/* Dirty? Well, I still did not learn better way to account
2100 * for user mmaps.
2101 */
2102
2103static void packet_mm_open(struct vm_area_struct *vma)
2104{
2105 struct file *file = vma->vm_file;
40d4e3df 2106 struct socket *sock = file->private_data;
1da177e4 2107 struct sock *sk = sock->sk;
1ce4f28b 2108
1da177e4
LT
2109 if (sk)
2110 atomic_inc(&pkt_sk(sk)->mapped);
2111}
2112
2113static void packet_mm_close(struct vm_area_struct *vma)
2114{
2115 struct file *file = vma->vm_file;
40d4e3df 2116 struct socket *sock = file->private_data;
1da177e4 2117 struct sock *sk = sock->sk;
1ce4f28b 2118
1da177e4
LT
2119 if (sk)
2120 atomic_dec(&pkt_sk(sk)->mapped);
2121}
2122
f0f37e2f 2123static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2124 .open = packet_mm_open,
2125 .close = packet_mm_close,
1da177e4
LT
2126};
2127
4ebf0ae2 2128static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1da177e4
LT
2129{
2130 int i;
2131
4ebf0ae2
DM
2132 for (i = 0; i < len; i++) {
2133 if (likely(pg_vec[i]))
2134 free_pages((unsigned long) pg_vec[i], order);
1da177e4
LT
2135 }
2136 kfree(pg_vec);
2137}
2138
4ebf0ae2
DM
2139static inline char *alloc_one_pg_vec_page(unsigned long order)
2140{
719bfeaa
ED
2141 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2142
2143 return (char *) __get_free_pages(gfp_flags, order);
4ebf0ae2
DM
2144}
2145
2146static char **alloc_pg_vec(struct tpacket_req *req, int order)
2147{
2148 unsigned int block_nr = req->tp_block_nr;
2149 char **pg_vec;
2150 int i;
2151
2152 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2153 if (unlikely(!pg_vec))
2154 goto out;
2155
2156 for (i = 0; i < block_nr; i++) {
2157 pg_vec[i] = alloc_one_pg_vec_page(order);
2158 if (unlikely(!pg_vec[i]))
2159 goto out_free_pgvec;
2160 }
2161
2162out:
2163 return pg_vec;
2164
2165out_free_pgvec:
2166 free_pg_vec(pg_vec, order, block_nr);
2167 pg_vec = NULL;
2168 goto out;
2169}
1da177e4 2170
69e3c75f
JB
2171static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2172 int closing, int tx_ring)
1da177e4
LT
2173{
2174 char **pg_vec = NULL;
2175 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2176 int was_running, order = 0;
69e3c75f
JB
2177 struct packet_ring_buffer *rb;
2178 struct sk_buff_head *rb_queue;
0e11c91e 2179 __be16 num;
69e3c75f 2180 int err;
1ce4f28b 2181
69e3c75f
JB
2182 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2183 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2184
69e3c75f
JB
2185 err = -EBUSY;
2186 if (!closing) {
2187 if (atomic_read(&po->mapped))
2188 goto out;
2189 if (atomic_read(&rb->pending))
2190 goto out;
2191 }
1da177e4 2192
69e3c75f
JB
2193 if (req->tp_block_nr) {
2194 /* Sanity tests and some calculations */
2195 err = -EBUSY;
2196 if (unlikely(rb->pg_vec))
2197 goto out;
1da177e4 2198
bbd6ef87
PM
2199 switch (po->tp_version) {
2200 case TPACKET_V1:
2201 po->tp_hdrlen = TPACKET_HDRLEN;
2202 break;
2203 case TPACKET_V2:
2204 po->tp_hdrlen = TPACKET2_HDRLEN;
2205 break;
2206 }
2207
69e3c75f 2208 err = -EINVAL;
4ebf0ae2 2209 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2210 goto out;
4ebf0ae2 2211 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2212 goto out;
8913336a 2213 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2214 po->tp_reserve))
2215 goto out;
4ebf0ae2 2216 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2217 goto out;
1da177e4 2218
69e3c75f
JB
2219 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2220 if (unlikely(rb->frames_per_block <= 0))
2221 goto out;
2222 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2223 req->tp_frame_nr))
2224 goto out;
1da177e4
LT
2225
2226 err = -ENOMEM;
4ebf0ae2
DM
2227 order = get_order(req->tp_block_size);
2228 pg_vec = alloc_pg_vec(req, order);
2229 if (unlikely(!pg_vec))
1da177e4 2230 goto out;
69e3c75f
JB
2231 }
2232 /* Done */
2233 else {
2234 err = -EINVAL;
4ebf0ae2 2235 if (unlikely(req->tp_frame_nr))
69e3c75f 2236 goto out;
1da177e4
LT
2237 }
2238
2239 lock_sock(sk);
2240
2241 /* Detach socket from network */
2242 spin_lock(&po->bind_lock);
2243 was_running = po->running;
2244 num = po->num;
2245 if (was_running) {
2246 __dev_remove_pack(&po->prot_hook);
2247 po->num = 0;
2248 po->running = 0;
2249 __sock_put(sk);
2250 }
2251 spin_unlock(&po->bind_lock);
1ce4f28b 2252
1da177e4
LT
2253 synchronize_net();
2254
2255 err = -EBUSY;
905db440 2256 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2257 if (closing || atomic_read(&po->mapped) == 0) {
2258 err = 0;
2259#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
69e3c75f
JB
2260 spin_lock_bh(&rb_queue->lock);
2261 pg_vec = XC(rb->pg_vec, pg_vec);
2262 rb->frame_max = (req->tp_frame_nr - 1);
2263 rb->head = 0;
2264 rb->frame_size = req->tp_frame_size;
2265 spin_unlock_bh(&rb_queue->lock);
2266
2267 order = XC(rb->pg_vec_order, order);
2268 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2269
2270 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2271 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2272 tpacket_rcv : packet_rcv;
2273 skb_queue_purge(rb_queue);
1da177e4
LT
2274#undef XC
2275 if (atomic_read(&po->mapped))
40d4e3df
ED
2276 pr_err("packet_mmap: vma is busy: %d\n",
2277 atomic_read(&po->mapped));
1da177e4 2278 }
905db440 2279 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2280
2281 spin_lock(&po->bind_lock);
2282 if (was_running && !po->running) {
2283 sock_hold(sk);
2284 po->running = 1;
2285 po->num = num;
2286 dev_add_pack(&po->prot_hook);
2287 }
2288 spin_unlock(&po->bind_lock);
2289
2290 release_sock(sk);
2291
1da177e4
LT
2292 if (pg_vec)
2293 free_pg_vec(pg_vec, order, req->tp_block_nr);
2294out:
2295 return err;
2296}
2297
69e3c75f
JB
2298static int packet_mmap(struct file *file, struct socket *sock,
2299 struct vm_area_struct *vma)
1da177e4
LT
2300{
2301 struct sock *sk = sock->sk;
2302 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2303 unsigned long size, expected_size;
2304 struct packet_ring_buffer *rb;
1da177e4
LT
2305 unsigned long start;
2306 int err = -EINVAL;
2307 int i;
2308
2309 if (vma->vm_pgoff)
2310 return -EINVAL;
2311
905db440 2312 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2313
2314 expected_size = 0;
2315 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2316 if (rb->pg_vec) {
2317 expected_size += rb->pg_vec_len
2318 * rb->pg_vec_pages
2319 * PAGE_SIZE;
2320 }
2321 }
2322
2323 if (expected_size == 0)
1da177e4 2324 goto out;
69e3c75f
JB
2325
2326 size = vma->vm_end - vma->vm_start;
2327 if (size != expected_size)
1da177e4
LT
2328 goto out;
2329
1da177e4 2330 start = vma->vm_start;
69e3c75f
JB
2331 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2332 if (rb->pg_vec == NULL)
2333 continue;
2334
2335 for (i = 0; i < rb->pg_vec_len; i++) {
2336 struct page *page = virt_to_page(rb->pg_vec[i]);
2337 int pg_num;
2338
2339 for (pg_num = 0; pg_num < rb->pg_vec_pages;
40d4e3df 2340 pg_num++, page++) {
69e3c75f
JB
2341 err = vm_insert_page(vma, start, page);
2342 if (unlikely(err))
2343 goto out;
2344 start += PAGE_SIZE;
2345 }
4ebf0ae2 2346 }
1da177e4 2347 }
69e3c75f 2348
4ebf0ae2 2349 atomic_inc(&po->mapped);
1da177e4
LT
2350 vma->vm_ops = &packet_mmap_ops;
2351 err = 0;
2352
2353out:
905db440 2354 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2355 return err;
2356}
2357#endif
2358
2359
90ddc4f0 2360static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2361 .family = PF_PACKET,
2362 .owner = THIS_MODULE,
2363 .release = packet_release,
2364 .bind = packet_bind_spkt,
2365 .connect = sock_no_connect,
2366 .socketpair = sock_no_socketpair,
2367 .accept = sock_no_accept,
2368 .getname = packet_getname_spkt,
2369 .poll = datagram_poll,
2370 .ioctl = packet_ioctl,
2371 .listen = sock_no_listen,
2372 .shutdown = sock_no_shutdown,
2373 .setsockopt = sock_no_setsockopt,
2374 .getsockopt = sock_no_getsockopt,
2375 .sendmsg = packet_sendmsg_spkt,
2376 .recvmsg = packet_recvmsg,
2377 .mmap = sock_no_mmap,
2378 .sendpage = sock_no_sendpage,
2379};
1da177e4 2380
90ddc4f0 2381static const struct proto_ops packet_ops = {
1da177e4
LT
2382 .family = PF_PACKET,
2383 .owner = THIS_MODULE,
2384 .release = packet_release,
2385 .bind = packet_bind,
2386 .connect = sock_no_connect,
2387 .socketpair = sock_no_socketpair,
2388 .accept = sock_no_accept,
1ce4f28b 2389 .getname = packet_getname,
1da177e4
LT
2390 .poll = packet_poll,
2391 .ioctl = packet_ioctl,
2392 .listen = sock_no_listen,
2393 .shutdown = sock_no_shutdown,
2394 .setsockopt = packet_setsockopt,
2395 .getsockopt = packet_getsockopt,
2396 .sendmsg = packet_sendmsg,
2397 .recvmsg = packet_recvmsg,
2398 .mmap = packet_mmap,
2399 .sendpage = sock_no_sendpage,
2400};
2401
2402static struct net_proto_family packet_family_ops = {
2403 .family = PF_PACKET,
2404 .create = packet_create,
2405 .owner = THIS_MODULE,
2406};
2407
2408static struct notifier_block packet_netdev_notifier = {
40d4e3df 2409 .notifier_call = packet_notifier,
1da177e4
LT
2410};
2411
2412#ifdef CONFIG_PROC_FS
d12d01d6 2413static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1da177e4
LT
2414{
2415 struct sock *s;
2416 struct hlist_node *node;
2417
2aaef4e4 2418 sk_for_each(s, node, &net->packet.sklist) {
1da177e4
LT
2419 if (!off--)
2420 return s;
2421 }
2422 return NULL;
2423}
2424
2425static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
40ccbf52 2426 __acquires(seq_file_net(seq)->packet.sklist_lock)
1da177e4 2427{
e372c414 2428 struct net *net = seq_file_net(seq);
2aaef4e4 2429 read_lock(&net->packet.sklist_lock);
d12d01d6 2430 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1da177e4
LT
2431}
2432
2433static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2434{
1bf40954 2435 struct net *net = seq_file_net(seq);
1da177e4 2436 ++*pos;
1ce4f28b 2437 return (v == SEQ_START_TOKEN)
2aaef4e4 2438 ? sk_head(&net->packet.sklist)
40d4e3df 2439 : sk_next((struct sock *)v) ;
1da177e4
LT
2440}
2441
2442static void packet_seq_stop(struct seq_file *seq, void *v)
40ccbf52 2443 __releases(seq_file_net(seq)->packet.sklist_lock)
1da177e4 2444{
1bf40954 2445 struct net *net = seq_file_net(seq);
2aaef4e4 2446 read_unlock(&net->packet.sklist_lock);
1da177e4
LT
2447}
2448
1ce4f28b 2449static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2450{
2451 if (v == SEQ_START_TOKEN)
2452 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2453 else {
2454 struct sock *s = v;
2455 const struct packet_sock *po = pkt_sk(s);
2456
2457 seq_printf(seq,
2458 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2459 s,
2460 atomic_read(&s->sk_refcnt),
2461 s->sk_type,
2462 ntohs(po->num),
2463 po->ifindex,
2464 po->running,
2465 atomic_read(&s->sk_rmem_alloc),
2466 sock_i_uid(s),
40d4e3df 2467 sock_i_ino(s));
1da177e4
LT
2468 }
2469
2470 return 0;
2471}
2472
56b3d975 2473static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2474 .start = packet_seq_start,
2475 .next = packet_seq_next,
2476 .stop = packet_seq_stop,
2477 .show = packet_seq_show,
2478};
2479
2480static int packet_seq_open(struct inode *inode, struct file *file)
2481{
e372c414
DL
2482 return seq_open_net(inode, file, &packet_seq_ops,
2483 sizeof(struct seq_net_private));
1da177e4
LT
2484}
2485
da7071d7 2486static const struct file_operations packet_seq_fops = {
1da177e4
LT
2487 .owner = THIS_MODULE,
2488 .open = packet_seq_open,
2489 .read = seq_read,
2490 .llseek = seq_lseek,
e372c414 2491 .release = seq_release_net,
1da177e4
LT
2492};
2493
2494#endif
2495
d12d01d6
DL
2496static int packet_net_init(struct net *net)
2497{
2aaef4e4
DL
2498 rwlock_init(&net->packet.sklist_lock);
2499 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2500
2501 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2502 return -ENOMEM;
2503
2504 return 0;
2505}
2506
2507static void packet_net_exit(struct net *net)
2508{
2509 proc_net_remove(net, "packet");
2510}
2511
2512static struct pernet_operations packet_net_ops = {
2513 .init = packet_net_init,
2514 .exit = packet_net_exit,
2515};
2516
2517
1da177e4
LT
2518static void __exit packet_exit(void)
2519{
1da177e4 2520 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2521 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2522 sock_unregister(PF_PACKET);
2523 proto_unregister(&packet_proto);
2524}
2525
2526static int __init packet_init(void)
2527{
2528 int rc = proto_register(&packet_proto, 0);
2529
2530 if (rc != 0)
2531 goto out;
2532
2533 sock_register(&packet_family_ops);
d12d01d6 2534 register_pernet_subsys(&packet_net_ops);
1da177e4 2535 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2536out:
2537 return rc;
2538}
2539
2540module_init(packet_init);
2541module_exit(packet_exit);
2542MODULE_LICENSE("GPL");
2543MODULE_ALIAS_NETPROTO(PF_PACKET);
This page took 0.718909 seconds and 5 git commands to generate.