af-packet: Hold reference to bound network devices.
[deliverable/linux.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167struct pgv {
168 char *buffer;
0e3125c7
NH
169};
170
69e3c75f 171struct packet_ring_buffer {
0e3125c7 172 struct pgv *pg_vec;
69e3c75f
JB
173 unsigned int head;
174 unsigned int frames_per_block;
175 unsigned int frame_size;
176 unsigned int frame_max;
177
178 unsigned int pg_vec_order;
179 unsigned int pg_vec_pages;
180 unsigned int pg_vec_len;
181
182 atomic_t pending;
183};
184
185struct packet_sock;
186static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
187
188static void packet_flush_mclist(struct sock *sk);
189
190struct packet_sock {
191 /* struct sock has to be the first member of packet_sock */
192 struct sock sk;
193 struct tpacket_stats stats;
69e3c75f
JB
194 struct packet_ring_buffer rx_ring;
195 struct packet_ring_buffer tx_ring;
1da177e4 196 int copy_thresh;
1da177e4 197 spinlock_t bind_lock;
905db440 198 struct mutex pg_vec_lock;
8dc41944 199 unsigned int running:1, /* prot_hook is attached*/
80feaacb 200 auxdata:1,
bfd5f4a3
SS
201 origdev:1,
202 has_vnet_hdr:1;
1da177e4 203 int ifindex; /* bound device */
0e11c91e 204 __be16 num;
1da177e4 205 struct packet_mclist *mclist;
1da177e4 206 atomic_t mapped;
bbd6ef87
PM
207 enum tpacket_versions tp_version;
208 unsigned int tp_hdrlen;
8913336a 209 unsigned int tp_reserve;
69e3c75f 210 unsigned int tp_loss:1;
614f60fa 211 unsigned int tp_tstamp;
94b05952 212 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
213};
214
ffbc6111
HX
215struct packet_skb_cb {
216 unsigned int origlen;
217 union {
218 struct sockaddr_pkt pkt;
219 struct sockaddr_ll ll;
220 } sa;
221};
222
223#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 224
f6dafa95 225static inline __pure struct page *pgv_to_page(void *addr)
0af55bb5
CG
226{
227 if (is_vmalloc_addr(addr))
228 return vmalloc_to_page(addr);
229 return virt_to_page(addr);
230}
231
69e3c75f 232static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 233{
bbd6ef87
PM
234 union {
235 struct tpacket_hdr *h1;
236 struct tpacket2_hdr *h2;
237 void *raw;
238 } h;
1da177e4 239
69e3c75f 240 h.raw = frame;
bbd6ef87
PM
241 switch (po->tp_version) {
242 case TPACKET_V1:
69e3c75f 243 h.h1->tp_status = status;
0af55bb5 244 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
245 break;
246 case TPACKET_V2:
69e3c75f 247 h.h2->tp_status = status;
0af55bb5 248 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 249 break;
69e3c75f 250 default:
40d4e3df 251 pr_err("TPACKET version not supported\n");
69e3c75f 252 BUG();
bbd6ef87 253 }
69e3c75f
JB
254
255 smp_wmb();
bbd6ef87
PM
256}
257
69e3c75f 258static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
259{
260 union {
261 struct tpacket_hdr *h1;
262 struct tpacket2_hdr *h2;
263 void *raw;
264 } h;
265
69e3c75f
JB
266 smp_rmb();
267
bbd6ef87
PM
268 h.raw = frame;
269 switch (po->tp_version) {
270 case TPACKET_V1:
0af55bb5 271 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 272 return h.h1->tp_status;
bbd6ef87 273 case TPACKET_V2:
0af55bb5 274 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
275 return h.h2->tp_status;
276 default:
40d4e3df 277 pr_err("TPACKET version not supported\n");
69e3c75f
JB
278 BUG();
279 return 0;
bbd6ef87 280 }
1da177e4 281}
69e3c75f
JB
282
283static void *packet_lookup_frame(struct packet_sock *po,
284 struct packet_ring_buffer *rb,
285 unsigned int position,
286 int status)
287{
288 unsigned int pg_vec_pos, frame_offset;
289 union {
290 struct tpacket_hdr *h1;
291 struct tpacket2_hdr *h2;
292 void *raw;
293 } h;
294
295 pg_vec_pos = position / rb->frames_per_block;
296 frame_offset = position % rb->frames_per_block;
297
0e3125c7
NH
298 h.raw = rb->pg_vec[pg_vec_pos].buffer +
299 (frame_offset * rb->frame_size);
69e3c75f
JB
300
301 if (status != __packet_get_status(po, h.raw))
302 return NULL;
303
304 return h.raw;
305}
306
307static inline void *packet_current_frame(struct packet_sock *po,
308 struct packet_ring_buffer *rb,
309 int status)
310{
311 return packet_lookup_frame(po, rb, rb->head, status);
312}
313
314static inline void *packet_previous_frame(struct packet_sock *po,
315 struct packet_ring_buffer *rb,
316 int status)
317{
318 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
319 return packet_lookup_frame(po, rb, previous, status);
320}
321
322static inline void packet_increment_head(struct packet_ring_buffer *buff)
323{
324 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
325}
326
1da177e4
LT
327static inline struct packet_sock *pkt_sk(struct sock *sk)
328{
329 return (struct packet_sock *)sk;
330}
331
332static void packet_sock_destruct(struct sock *sk)
333{
ed85b565
RC
334 skb_queue_purge(&sk->sk_error_queue);
335
547b792c
IJ
336 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
337 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
338
339 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 340 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
341 return;
342 }
343
17ab56a2 344 sk_refcnt_debug_dec(sk);
1da177e4
LT
345}
346
347
90ddc4f0 348static const struct proto_ops packet_ops;
1da177e4 349
90ddc4f0 350static const struct proto_ops packet_ops_spkt;
1da177e4 351
40d4e3df
ED
352static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
353 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
354{
355 struct sock *sk;
356 struct sockaddr_pkt *spkt;
357
358 /*
359 * When we registered the protocol we saved the socket in the data
360 * field for just this event.
361 */
362
363 sk = pt->af_packet_priv;
1ce4f28b 364
1da177e4
LT
365 /*
366 * Yank back the headers [hope the device set this
367 * right or kerboom...]
368 *
369 * Incoming packets have ll header pulled,
370 * push it back.
371 *
98e399f8 372 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
373 * so that this procedure is noop.
374 */
375
376 if (skb->pkt_type == PACKET_LOOPBACK)
377 goto out;
378
09ad9bc7 379 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
380 goto out;
381
40d4e3df
ED
382 skb = skb_share_check(skb, GFP_ATOMIC);
383 if (skb == NULL)
1da177e4
LT
384 goto oom;
385
386 /* drop any routing info */
adf30907 387 skb_dst_drop(skb);
1da177e4 388
84531c24
PO
389 /* drop conntrack reference */
390 nf_reset(skb);
391
ffbc6111 392 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 393
98e399f8 394 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
395
396 /*
397 * The SOCK_PACKET socket receives _all_ frames.
398 */
399
400 spkt->spkt_family = dev->type;
401 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
402 spkt->spkt_protocol = skb->protocol;
403
404 /*
405 * Charge the memory to the socket. This is done specifically
406 * to prevent sockets using all the memory up.
407 */
408
40d4e3df 409 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
410 return 0;
411
412out:
413 kfree_skb(skb);
414oom:
415 return 0;
416}
417
418
419/*
420 * Output a raw packet to a device layer. This bypasses all the other
421 * protocol layers and you must therefore supply it with a complete frame
422 */
1ce4f28b 423
1da177e4
LT
424static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
425 struct msghdr *msg, size_t len)
426{
427 struct sock *sk = sock->sk;
40d4e3df 428 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 429 struct sk_buff *skb = NULL;
1da177e4 430 struct net_device *dev;
40d4e3df 431 __be16 proto = 0;
1da177e4 432 int err;
1ce4f28b 433
1da177e4 434 /*
1ce4f28b 435 * Get and verify the address.
1da177e4
LT
436 */
437
40d4e3df 438 if (saddr) {
1da177e4 439 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
440 return -EINVAL;
441 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
442 proto = saddr->spkt_protocol;
443 } else
444 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
445
446 /*
1ce4f28b 447 * Find the device first to size check it
1da177e4
LT
448 */
449
450 saddr->spkt_device[13] = 0;
1a35ca80 451retry:
654d1f8a
ED
452 rcu_read_lock();
453 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
454 err = -ENODEV;
455 if (dev == NULL)
456 goto out_unlock;
1ce4f28b 457
d5e76b0a
DM
458 err = -ENETDOWN;
459 if (!(dev->flags & IFF_UP))
460 goto out_unlock;
461
1da177e4 462 /*
40d4e3df
ED
463 * You may not queue a frame bigger than the mtu. This is the lowest level
464 * raw protocol and you must do your own fragmentation at this level.
1da177e4 465 */
1ce4f28b 466
1da177e4 467 err = -EMSGSIZE;
57f89bfa 468 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN)
1da177e4
LT
469 goto out_unlock;
470
1a35ca80
ED
471 if (!skb) {
472 size_t reserved = LL_RESERVED_SPACE(dev);
473 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
474
475 rcu_read_unlock();
476 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
477 if (skb == NULL)
478 return -ENOBUFS;
479 /* FIXME: Save some space for broken drivers that write a hard
480 * header at transmission time by themselves. PPP is the notable
481 * one here. This should really be fixed at the driver level.
482 */
483 skb_reserve(skb, reserved);
484 skb_reset_network_header(skb);
485
486 /* Try to align data part correctly */
487 if (hhlen) {
488 skb->data -= hhlen;
489 skb->tail -= hhlen;
490 if (len < hhlen)
491 skb_reset_network_header(skb);
492 }
493 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
494 if (err)
495 goto out_free;
496 goto retry;
1da177e4
LT
497 }
498
57f89bfa
BG
499 if (len > (dev->mtu + dev->hard_header_len)) {
500 /* Earlier code assumed this would be a VLAN pkt,
501 * double-check this now that we have the actual
502 * packet in hand.
503 */
504 struct ethhdr *ehdr;
505 skb_reset_mac_header(skb);
506 ehdr = eth_hdr(skb);
507 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
508 err = -EMSGSIZE;
509 goto out_unlock;
510 }
511 }
1a35ca80 512
1da177e4
LT
513 skb->protocol = proto;
514 skb->dev = dev;
515 skb->priority = sk->sk_priority;
2d37a186 516 skb->mark = sk->sk_mark;
2244d07b 517 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
518 if (err < 0)
519 goto out_unlock;
1da177e4
LT
520
521 dev_queue_xmit(skb);
654d1f8a 522 rcu_read_unlock();
40d4e3df 523 return len;
1da177e4 524
1da177e4 525out_unlock:
654d1f8a 526 rcu_read_unlock();
1a35ca80
ED
527out_free:
528 kfree_skb(skb);
1da177e4
LT
529 return err;
530}
1da177e4 531
62ab0812
ED
532static inline unsigned int run_filter(const struct sk_buff *skb,
533 const struct sock *sk,
dbcb5855 534 unsigned int res)
1da177e4
LT
535{
536 struct sk_filter *filter;
fda9ef5d 537
80f8f102
ED
538 rcu_read_lock();
539 filter = rcu_dereference(sk->sk_filter);
dbcb5855 540 if (filter != NULL)
0a14842f 541 res = SK_RUN_FILTER(filter, skb);
80f8f102 542 rcu_read_unlock();
1da177e4 543
dbcb5855 544 return res;
1da177e4
LT
545}
546
547/*
62ab0812
ED
548 * This function makes lazy skb cloning in hope that most of packets
549 * are discarded by BPF.
550 *
551 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
552 * and skb->cb are mangled. It works because (and until) packets
553 * falling here are owned by current CPU. Output packets are cloned
554 * by dev_queue_xmit_nit(), input packets are processed by net_bh
555 * sequencially, so that if we return skb to original state on exit,
556 * we will not harm anyone.
1da177e4
LT
557 */
558
40d4e3df
ED
559static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
560 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
561{
562 struct sock *sk;
563 struct sockaddr_ll *sll;
564 struct packet_sock *po;
40d4e3df 565 u8 *skb_head = skb->data;
1da177e4 566 int skb_len = skb->len;
dbcb5855 567 unsigned int snaplen, res;
1da177e4
LT
568
569 if (skb->pkt_type == PACKET_LOOPBACK)
570 goto drop;
571
572 sk = pt->af_packet_priv;
573 po = pkt_sk(sk);
574
09ad9bc7 575 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
576 goto drop;
577
1da177e4
LT
578 skb->dev = dev;
579
3b04ddde 580 if (dev->header_ops) {
1da177e4 581 /* The device has an explicit notion of ll header,
62ab0812
ED
582 * exported to higher levels.
583 *
584 * Otherwise, the device hides details of its frame
585 * structure, so that corresponding packet head is
586 * never delivered to user.
1da177e4
LT
587 */
588 if (sk->sk_type != SOCK_DGRAM)
98e399f8 589 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
590 else if (skb->pkt_type == PACKET_OUTGOING) {
591 /* Special case: outgoing packets have ll header at head */
bbe735e4 592 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
593 }
594 }
595
596 snaplen = skb->len;
597
dbcb5855
DM
598 res = run_filter(skb, sk, snaplen);
599 if (!res)
fda9ef5d 600 goto drop_n_restore;
dbcb5855
DM
601 if (snaplen > res)
602 snaplen = res;
1da177e4
LT
603
604 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
605 (unsigned)sk->sk_rcvbuf)
606 goto drop_n_acct;
607
608 if (skb_shared(skb)) {
609 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
610 if (nskb == NULL)
611 goto drop_n_acct;
612
613 if (skb_head != skb->data) {
614 skb->data = skb_head;
615 skb->len = skb_len;
616 }
617 kfree_skb(skb);
618 skb = nskb;
619 }
620
ffbc6111
HX
621 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
622 sizeof(skb->cb));
623
624 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
625 sll->sll_family = AF_PACKET;
626 sll->sll_hatype = dev->type;
627 sll->sll_protocol = skb->protocol;
628 sll->sll_pkttype = skb->pkt_type;
8032b464 629 if (unlikely(po->origdev))
80feaacb
PWJ
630 sll->sll_ifindex = orig_dev->ifindex;
631 else
632 sll->sll_ifindex = dev->ifindex;
1da177e4 633
b95cce35 634 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 635
ffbc6111 636 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 637
1da177e4
LT
638 if (pskb_trim(skb, snaplen))
639 goto drop_n_acct;
640
641 skb_set_owner_r(skb, sk);
642 skb->dev = NULL;
adf30907 643 skb_dst_drop(skb);
1da177e4 644
84531c24
PO
645 /* drop conntrack reference */
646 nf_reset(skb);
647
1da177e4
LT
648 spin_lock(&sk->sk_receive_queue.lock);
649 po->stats.tp_packets++;
3b885787 650 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
651 __skb_queue_tail(&sk->sk_receive_queue, skb);
652 spin_unlock(&sk->sk_receive_queue.lock);
653 sk->sk_data_ready(sk, skb->len);
654 return 0;
655
656drop_n_acct:
3b885787 657 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
658
659drop_n_restore:
660 if (skb_head != skb->data && skb_shared(skb)) {
661 skb->data = skb_head;
662 skb->len = skb_len;
663 }
664drop:
ead2ceb0 665 consume_skb(skb);
1da177e4
LT
666 return 0;
667}
668
40d4e3df
ED
669static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
670 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
671{
672 struct sock *sk;
673 struct packet_sock *po;
674 struct sockaddr_ll *sll;
bbd6ef87
PM
675 union {
676 struct tpacket_hdr *h1;
677 struct tpacket2_hdr *h2;
678 void *raw;
679 } h;
40d4e3df 680 u8 *skb_head = skb->data;
1da177e4 681 int skb_len = skb->len;
dbcb5855 682 unsigned int snaplen, res;
1da177e4 683 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 684 unsigned short macoff, netoff, hdrlen;
1da177e4 685 struct sk_buff *copy_skb = NULL;
b7aa0bf7 686 struct timeval tv;
bbd6ef87 687 struct timespec ts;
614f60fa 688 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
689
690 if (skb->pkt_type == PACKET_LOOPBACK)
691 goto drop;
692
693 sk = pt->af_packet_priv;
694 po = pkt_sk(sk);
695
09ad9bc7 696 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
697 goto drop;
698
3b04ddde 699 if (dev->header_ops) {
1da177e4 700 if (sk->sk_type != SOCK_DGRAM)
98e399f8 701 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
702 else if (skb->pkt_type == PACKET_OUTGOING) {
703 /* Special case: outgoing packets have ll header at head */
bbe735e4 704 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
705 }
706 }
707
8dc41944
HX
708 if (skb->ip_summed == CHECKSUM_PARTIAL)
709 status |= TP_STATUS_CSUMNOTREADY;
710
1da177e4
LT
711 snaplen = skb->len;
712
dbcb5855
DM
713 res = run_filter(skb, sk, snaplen);
714 if (!res)
fda9ef5d 715 goto drop_n_restore;
dbcb5855
DM
716 if (snaplen > res)
717 snaplen = res;
1da177e4
LT
718
719 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
720 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
721 po->tp_reserve;
1da177e4 722 } else {
bbe735e4 723 unsigned maclen = skb_network_offset(skb);
bbd6ef87 724 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
725 (maclen < 16 ? 16 : maclen)) +
726 po->tp_reserve;
1da177e4
LT
727 macoff = netoff - maclen;
728 }
729
69e3c75f 730 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
731 if (po->copy_thresh &&
732 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
733 (unsigned)sk->sk_rcvbuf) {
734 if (skb_shared(skb)) {
735 copy_skb = skb_clone(skb, GFP_ATOMIC);
736 } else {
737 copy_skb = skb_get(skb);
738 skb_head = skb->data;
739 }
740 if (copy_skb)
741 skb_set_owner_r(copy_skb, sk);
742 }
69e3c75f 743 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
744 if ((int)snaplen < 0)
745 snaplen = 0;
746 }
1da177e4
LT
747
748 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 749 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 750 if (!h.raw)
1da177e4 751 goto ring_is_full;
69e3c75f 752 packet_increment_head(&po->rx_ring);
1da177e4
LT
753 po->stats.tp_packets++;
754 if (copy_skb) {
755 status |= TP_STATUS_COPY;
756 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
757 }
758 if (!po->stats.tp_drops)
759 status &= ~TP_STATUS_LOSING;
760 spin_unlock(&sk->sk_receive_queue.lock);
761
bbd6ef87 762 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 763
bbd6ef87
PM
764 switch (po->tp_version) {
765 case TPACKET_V1:
766 h.h1->tp_len = skb->len;
767 h.h1->tp_snaplen = snaplen;
768 h.h1->tp_mac = macoff;
769 h.h1->tp_net = netoff;
614f60fa
SM
770 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
771 && shhwtstamps->syststamp.tv64)
772 tv = ktime_to_timeval(shhwtstamps->syststamp);
773 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
774 && shhwtstamps->hwtstamp.tv64)
775 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
776 else if (skb->tstamp.tv64)
bbd6ef87
PM
777 tv = ktime_to_timeval(skb->tstamp);
778 else
779 do_gettimeofday(&tv);
780 h.h1->tp_sec = tv.tv_sec;
781 h.h1->tp_usec = tv.tv_usec;
782 hdrlen = sizeof(*h.h1);
783 break;
784 case TPACKET_V2:
785 h.h2->tp_len = skb->len;
786 h.h2->tp_snaplen = snaplen;
787 h.h2->tp_mac = macoff;
788 h.h2->tp_net = netoff;
614f60fa
SM
789 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
790 && shhwtstamps->syststamp.tv64)
791 ts = ktime_to_timespec(shhwtstamps->syststamp);
792 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
793 && shhwtstamps->hwtstamp.tv64)
794 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
795 else if (skb->tstamp.tv64)
bbd6ef87
PM
796 ts = ktime_to_timespec(skb->tstamp);
797 else
798 getnstimeofday(&ts);
799 h.h2->tp_sec = ts.tv_sec;
800 h.h2->tp_nsec = ts.tv_nsec;
a3bcc23e
BG
801 if (vlan_tx_tag_present(skb)) {
802 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
803 status |= TP_STATUS_VLAN_VALID;
804 } else {
805 h.h2->tp_vlan_tci = 0;
806 }
bbd6ef87
PM
807 hdrlen = sizeof(*h.h2);
808 break;
809 default:
810 BUG();
811 }
1da177e4 812
bbd6ef87 813 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 814 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
815 sll->sll_family = AF_PACKET;
816 sll->sll_hatype = dev->type;
817 sll->sll_protocol = skb->protocol;
818 sll->sll_pkttype = skb->pkt_type;
8032b464 819 if (unlikely(po->origdev))
80feaacb
PWJ
820 sll->sll_ifindex = orig_dev->ifindex;
821 else
822 sll->sll_ifindex = dev->ifindex;
1da177e4 823
bbd6ef87 824 __packet_set_status(po, h.raw, status);
e16aa207 825 smp_mb();
f6dafa95 826#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1da177e4 827 {
0af55bb5
CG
828 u8 *start, *end;
829
830 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
831 for (start = h.raw; start < end; start += PAGE_SIZE)
832 flush_dcache_page(pgv_to_page(start));
1da177e4 833 }
f6dafa95 834#endif
1da177e4
LT
835
836 sk->sk_data_ready(sk, 0);
837
838drop_n_restore:
839 if (skb_head != skb->data && skb_shared(skb)) {
840 skb->data = skb_head;
841 skb->len = skb_len;
842 }
843drop:
1ce4f28b 844 kfree_skb(skb);
1da177e4
LT
845 return 0;
846
847ring_is_full:
848 po->stats.tp_drops++;
849 spin_unlock(&sk->sk_receive_queue.lock);
850
851 sk->sk_data_ready(sk, 0);
acb5d75b 852 kfree_skb(copy_skb);
1da177e4
LT
853 goto drop_n_restore;
854}
855
69e3c75f
JB
856static void tpacket_destruct_skb(struct sk_buff *skb)
857{
858 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 859 void *ph;
1da177e4 860
69e3c75f 861 BUG_ON(skb == NULL);
1da177e4 862
69e3c75f
JB
863 if (likely(po->tx_ring.pg_vec)) {
864 ph = skb_shinfo(skb)->destructor_arg;
865 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
866 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
867 atomic_dec(&po->tx_ring.pending);
868 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
869 }
870
871 sock_wfree(skb);
872}
873
40d4e3df
ED
874static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
875 void *frame, struct net_device *dev, int size_max,
876 __be16 proto, unsigned char *addr)
69e3c75f
JB
877{
878 union {
879 struct tpacket_hdr *h1;
880 struct tpacket2_hdr *h2;
881 void *raw;
882 } ph;
883 int to_write, offset, len, tp_len, nr_frags, len_max;
884 struct socket *sock = po->sk.sk_socket;
885 struct page *page;
886 void *data;
887 int err;
888
889 ph.raw = frame;
890
891 skb->protocol = proto;
892 skb->dev = dev;
893 skb->priority = po->sk.sk_priority;
2d37a186 894 skb->mark = po->sk.sk_mark;
69e3c75f
JB
895 skb_shinfo(skb)->destructor_arg = ph.raw;
896
897 switch (po->tp_version) {
898 case TPACKET_V2:
899 tp_len = ph.h2->tp_len;
900 break;
901 default:
902 tp_len = ph.h1->tp_len;
903 break;
904 }
905 if (unlikely(tp_len > size_max)) {
40d4e3df 906 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
907 return -EMSGSIZE;
908 }
909
910 skb_reserve(skb, LL_RESERVED_SPACE(dev));
911 skb_reset_network_header(skb);
912
913 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
914 to_write = tp_len;
915
916 if (sock->type == SOCK_DGRAM) {
917 err = dev_hard_header(skb, dev, ntohs(proto), addr,
918 NULL, tp_len);
919 if (unlikely(err < 0))
920 return -EINVAL;
40d4e3df 921 } else if (dev->hard_header_len) {
69e3c75f
JB
922 /* net device doesn't like empty head */
923 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
924 pr_err("packet size is too short (%d < %d)\n",
925 tp_len, dev->hard_header_len);
69e3c75f
JB
926 return -EINVAL;
927 }
928
929 skb_push(skb, dev->hard_header_len);
930 err = skb_store_bits(skb, 0, data,
931 dev->hard_header_len);
932 if (unlikely(err))
933 return err;
934
935 data += dev->hard_header_len;
936 to_write -= dev->hard_header_len;
937 }
938
939 err = -EFAULT;
69e3c75f
JB
940 offset = offset_in_page(data);
941 len_max = PAGE_SIZE - offset;
942 len = ((to_write > len_max) ? len_max : to_write);
943
944 skb->data_len = to_write;
945 skb->len += to_write;
946 skb->truesize += to_write;
947 atomic_add(to_write, &po->sk.sk_wmem_alloc);
948
949 while (likely(to_write)) {
950 nr_frags = skb_shinfo(skb)->nr_frags;
951
952 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
953 pr_err("Packet exceed the number of skb frags(%lu)\n",
954 MAX_SKB_FRAGS);
69e3c75f
JB
955 return -EFAULT;
956 }
957
0af55bb5
CG
958 page = pgv_to_page(data);
959 data += len;
69e3c75f
JB
960 flush_dcache_page(page);
961 get_page(page);
0af55bb5 962 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
963 to_write -= len;
964 offset = 0;
965 len_max = PAGE_SIZE;
966 len = ((to_write > len_max) ? len_max : to_write);
967 }
968
969 return tp_len;
970}
971
972static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
973{
69e3c75f
JB
974 struct sk_buff *skb;
975 struct net_device *dev;
976 __be16 proto;
977 int ifindex, err, reserve = 0;
40d4e3df
ED
978 void *ph;
979 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
980 int tp_len, size_max;
981 unsigned char *addr;
982 int len_sum = 0;
983 int status = 0;
984
69e3c75f
JB
985 mutex_lock(&po->pg_vec_lock);
986
987 err = -EBUSY;
988 if (saddr == NULL) {
989 ifindex = po->ifindex;
990 proto = po->num;
991 addr = NULL;
992 } else {
993 err = -EINVAL;
994 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
995 goto out;
996 if (msg->msg_namelen < (saddr->sll_halen
997 + offsetof(struct sockaddr_ll,
998 sll_addr)))
999 goto out;
1000 ifindex = saddr->sll_ifindex;
1001 proto = saddr->sll_protocol;
1002 addr = saddr->sll_addr;
1003 }
1004
1005 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
1006 err = -ENXIO;
1007 if (unlikely(dev == NULL))
1008 goto out;
1009
1010 reserve = dev->hard_header_len;
1011
1012 err = -ENETDOWN;
1013 if (unlikely(!(dev->flags & IFF_UP)))
1014 goto out_put;
1015
1016 size_max = po->tx_ring.frame_size
b5dd884e 1017 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1018
1019 if (size_max > dev->mtu + reserve)
1020 size_max = dev->mtu + reserve;
1021
1022 do {
1023 ph = packet_current_frame(po, &po->tx_ring,
1024 TP_STATUS_SEND_REQUEST);
1025
1026 if (unlikely(ph == NULL)) {
1027 schedule();
1028 continue;
1029 }
1030
1031 status = TP_STATUS_SEND_REQUEST;
1032 skb = sock_alloc_send_skb(&po->sk,
1033 LL_ALLOCATED_SPACE(dev)
1034 + sizeof(struct sockaddr_ll),
1035 0, &err);
1036
1037 if (unlikely(skb == NULL))
1038 goto out_status;
1039
1040 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1041 addr);
1042
1043 if (unlikely(tp_len < 0)) {
1044 if (po->tp_loss) {
1045 __packet_set_status(po, ph,
1046 TP_STATUS_AVAILABLE);
1047 packet_increment_head(&po->tx_ring);
1048 kfree_skb(skb);
1049 continue;
1050 } else {
1051 status = TP_STATUS_WRONG_FORMAT;
1052 err = tp_len;
1053 goto out_status;
1054 }
1055 }
1056
1057 skb->destructor = tpacket_destruct_skb;
1058 __packet_set_status(po, ph, TP_STATUS_SENDING);
1059 atomic_inc(&po->tx_ring.pending);
1060
1061 status = TP_STATUS_SEND_REQUEST;
1062 err = dev_queue_xmit(skb);
eb70df13
JP
1063 if (unlikely(err > 0)) {
1064 err = net_xmit_errno(err);
1065 if (err && __packet_get_status(po, ph) ==
1066 TP_STATUS_AVAILABLE) {
1067 /* skb was destructed already */
1068 skb = NULL;
1069 goto out_status;
1070 }
1071 /*
1072 * skb was dropped but not destructed yet;
1073 * let's treat it like congestion or err < 0
1074 */
1075 err = 0;
1076 }
69e3c75f
JB
1077 packet_increment_head(&po->tx_ring);
1078 len_sum += tp_len;
f64f9e71
JP
1079 } while (likely((ph != NULL) ||
1080 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1081 (atomic_read(&po->tx_ring.pending))))
1082 );
69e3c75f
JB
1083
1084 err = len_sum;
1085 goto out_put;
1086
69e3c75f
JB
1087out_status:
1088 __packet_set_status(po, ph, status);
1089 kfree_skb(skb);
1090out_put:
1091 dev_put(dev);
1092out:
1093 mutex_unlock(&po->pg_vec_lock);
1094 return err;
1095}
69e3c75f 1096
bfd5f4a3
SS
1097static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1098 size_t reserve, size_t len,
1099 size_t linear, int noblock,
1100 int *err)
1101{
1102 struct sk_buff *skb;
1103
1104 /* Under a page? Don't bother with paged skb. */
1105 if (prepad + len < PAGE_SIZE || !linear)
1106 linear = len;
1107
1108 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1109 err);
1110 if (!skb)
1111 return NULL;
1112
1113 skb_reserve(skb, reserve);
1114 skb_put(skb, linear);
1115 skb->data_len = len - linear;
1116 skb->len += len - linear;
1117
1118 return skb;
1119}
1120
69e3c75f 1121static int packet_snd(struct socket *sock,
1da177e4
LT
1122 struct msghdr *msg, size_t len)
1123{
1124 struct sock *sk = sock->sk;
40d4e3df 1125 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1126 struct sk_buff *skb;
1127 struct net_device *dev;
0e11c91e 1128 __be16 proto;
1da177e4
LT
1129 unsigned char *addr;
1130 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1131 struct virtio_net_hdr vnet_hdr = { 0 };
1132 int offset = 0;
1133 int vnet_hdr_len;
1134 struct packet_sock *po = pkt_sk(sk);
1135 unsigned short gso_type = 0;
1da177e4
LT
1136
1137 /*
1ce4f28b 1138 * Get and verify the address.
1da177e4 1139 */
1ce4f28b 1140
1da177e4 1141 if (saddr == NULL) {
1da177e4
LT
1142 ifindex = po->ifindex;
1143 proto = po->num;
1144 addr = NULL;
1145 } else {
1146 err = -EINVAL;
1147 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1148 goto out;
0fb375fb
EB
1149 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1150 goto out;
1da177e4
LT
1151 ifindex = saddr->sll_ifindex;
1152 proto = saddr->sll_protocol;
1153 addr = saddr->sll_addr;
1154 }
1155
1156
3b1e0a65 1157 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1158 err = -ENXIO;
1159 if (dev == NULL)
1160 goto out_unlock;
1161 if (sock->type == SOCK_RAW)
1162 reserve = dev->hard_header_len;
1163
d5e76b0a
DM
1164 err = -ENETDOWN;
1165 if (!(dev->flags & IFF_UP))
1166 goto out_unlock;
1167
bfd5f4a3
SS
1168 if (po->has_vnet_hdr) {
1169 vnet_hdr_len = sizeof(vnet_hdr);
1170
1171 err = -EINVAL;
1172 if (len < vnet_hdr_len)
1173 goto out_unlock;
1174
1175 len -= vnet_hdr_len;
1176
1177 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1178 vnet_hdr_len);
1179 if (err < 0)
1180 goto out_unlock;
1181
1182 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1183 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1184 vnet_hdr.hdr_len))
1185 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1186 vnet_hdr.csum_offset + 2;
1187
1188 err = -EINVAL;
1189 if (vnet_hdr.hdr_len > len)
1190 goto out_unlock;
1191
1192 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1193 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1194 case VIRTIO_NET_HDR_GSO_TCPV4:
1195 gso_type = SKB_GSO_TCPV4;
1196 break;
1197 case VIRTIO_NET_HDR_GSO_TCPV6:
1198 gso_type = SKB_GSO_TCPV6;
1199 break;
1200 case VIRTIO_NET_HDR_GSO_UDP:
1201 gso_type = SKB_GSO_UDP;
1202 break;
1203 default:
1204 goto out_unlock;
1205 }
1206
1207 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1208 gso_type |= SKB_GSO_TCP_ECN;
1209
1210 if (vnet_hdr.gso_size == 0)
1211 goto out_unlock;
1212
1213 }
1214 }
1215
1da177e4 1216 err = -EMSGSIZE;
57f89bfa 1217 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN))
1da177e4
LT
1218 goto out_unlock;
1219
bfd5f4a3
SS
1220 err = -ENOBUFS;
1221 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1222 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1223 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1224 if (skb == NULL)
1da177e4
LT
1225 goto out_unlock;
1226
bfd5f4a3 1227 skb_set_network_header(skb, reserve);
1da177e4 1228
0c4e8581
SH
1229 err = -EINVAL;
1230 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1231 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1232 goto out_free;
1da177e4
LT
1233
1234 /* Returns -EFAULT on error */
bfd5f4a3 1235 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1236 if (err)
1237 goto out_free;
2244d07b 1238 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1239 if (err < 0)
1240 goto out_free;
1da177e4 1241
57f89bfa
BG
1242 if (!gso_type && (len > dev->mtu + reserve)) {
1243 /* Earlier code assumed this would be a VLAN pkt,
1244 * double-check this now that we have the actual
1245 * packet in hand.
1246 */
1247 struct ethhdr *ehdr;
1248 skb_reset_mac_header(skb);
1249 ehdr = eth_hdr(skb);
1250 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1251 err = -EMSGSIZE;
1252 goto out_free;
1253 }
1254 }
1255
1da177e4
LT
1256 skb->protocol = proto;
1257 skb->dev = dev;
1258 skb->priority = sk->sk_priority;
2d37a186 1259 skb->mark = sk->sk_mark;
1da177e4 1260
bfd5f4a3
SS
1261 if (po->has_vnet_hdr) {
1262 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1263 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1264 vnet_hdr.csum_offset)) {
1265 err = -EINVAL;
1266 goto out_free;
1267 }
1268 }
1269
1270 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1271 skb_shinfo(skb)->gso_type = gso_type;
1272
1273 /* Header must be checked, and gso_segs computed. */
1274 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1275 skb_shinfo(skb)->gso_segs = 0;
1276
1277 len += vnet_hdr_len;
1278 }
1279
1da177e4
LT
1280 /*
1281 * Now send it
1282 */
1283
1284 err = dev_queue_xmit(skb);
1285 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1286 goto out_unlock;
1287
1288 dev_put(dev);
1289
40d4e3df 1290 return len;
1da177e4
LT
1291
1292out_free:
1293 kfree_skb(skb);
1294out_unlock:
1295 if (dev)
1296 dev_put(dev);
1297out:
1298 return err;
1299}
1300
69e3c75f
JB
1301static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1302 struct msghdr *msg, size_t len)
1303{
69e3c75f
JB
1304 struct sock *sk = sock->sk;
1305 struct packet_sock *po = pkt_sk(sk);
1306 if (po->tx_ring.pg_vec)
1307 return tpacket_snd(po, msg);
1308 else
69e3c75f
JB
1309 return packet_snd(sock, msg, len);
1310}
1311
1da177e4
LT
1312/*
1313 * Close a PACKET socket. This is fairly simple. We immediately go
1314 * to 'closed' state and remove our protocol entry in the device list.
1315 */
1316
1317static int packet_release(struct socket *sock)
1318{
1319 struct sock *sk = sock->sk;
1320 struct packet_sock *po;
d12d01d6 1321 struct net *net;
69e3c75f 1322 struct tpacket_req req;
1da177e4
LT
1323
1324 if (!sk)
1325 return 0;
1326
3b1e0a65 1327 net = sock_net(sk);
1da177e4
LT
1328 po = pkt_sk(sk);
1329
808f5114 1330 spin_lock_bh(&net->packet.sklist_lock);
1331 sk_del_node_init_rcu(sk);
920de804 1332 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1333 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1334
808f5114 1335 spin_lock(&po->bind_lock);
1da177e4
LT
1336 if (po->running) {
1337 /*
808f5114 1338 * Remove from protocol table
1da177e4 1339 */
1da177e4
LT
1340 po->running = 0;
1341 po->num = 0;
808f5114 1342 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1343 __sock_put(sk);
1344 }
160ff18a
BG
1345 if (po->prot_hook.dev) {
1346 dev_put(po->prot_hook.dev);
1347 po->prot_hook.dev = NULL;
1348 }
808f5114 1349 spin_unlock(&po->bind_lock);
1da177e4 1350
1da177e4 1351 packet_flush_mclist(sk);
1da177e4 1352
69e3c75f
JB
1353 memset(&req, 0, sizeof(req));
1354
1355 if (po->rx_ring.pg_vec)
1356 packet_set_ring(sk, &req, 1, 0);
1357
1358 if (po->tx_ring.pg_vec)
1359 packet_set_ring(sk, &req, 1, 1);
1da177e4 1360
808f5114 1361 synchronize_net();
1da177e4
LT
1362 /*
1363 * Now the socket is dead. No more input will appear.
1364 */
1da177e4
LT
1365 sock_orphan(sk);
1366 sock->sk = NULL;
1367
1368 /* Purge queues */
1369
1370 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1371 sk_refcnt_debug_release(sk);
1da177e4
LT
1372
1373 sock_put(sk);
1374 return 0;
1375}
1376
1377/*
1378 * Attach a packet hook.
1379 */
1380
0e11c91e 1381static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1382{
1383 struct packet_sock *po = pkt_sk(sk);
1384 /*
1385 * Detach an existing hook if present.
1386 */
1387
1388 lock_sock(sk);
1389
1390 spin_lock(&po->bind_lock);
1391 if (po->running) {
1392 __sock_put(sk);
1393 po->running = 0;
1394 po->num = 0;
1395 spin_unlock(&po->bind_lock);
1396 dev_remove_pack(&po->prot_hook);
1397 spin_lock(&po->bind_lock);
1398 }
1399
1400 po->num = protocol;
1401 po->prot_hook.type = protocol;
160ff18a
BG
1402 if (po->prot_hook.dev)
1403 dev_put(po->prot_hook.dev);
1da177e4
LT
1404 po->prot_hook.dev = dev;
1405
1406 po->ifindex = dev ? dev->ifindex : 0;
1407
1408 if (protocol == 0)
1409 goto out_unlock;
1410
be85d4ad 1411 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1412 dev_add_pack(&po->prot_hook);
1413 sock_hold(sk);
1414 po->running = 1;
be85d4ad
UT
1415 } else {
1416 sk->sk_err = ENETDOWN;
1417 if (!sock_flag(sk, SOCK_DEAD))
1418 sk->sk_error_report(sk);
1da177e4
LT
1419 }
1420
1421out_unlock:
1422 spin_unlock(&po->bind_lock);
1423 release_sock(sk);
1424 return 0;
1425}
1426
1427/*
1428 * Bind a packet socket to a device
1429 */
1430
40d4e3df
ED
1431static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1432 int addr_len)
1da177e4 1433{
40d4e3df 1434 struct sock *sk = sock->sk;
1da177e4
LT
1435 char name[15];
1436 struct net_device *dev;
1437 int err = -ENODEV;
1ce4f28b 1438
1da177e4
LT
1439 /*
1440 * Check legality
1441 */
1ce4f28b 1442
8ae55f04 1443 if (addr_len != sizeof(struct sockaddr))
1da177e4 1444 return -EINVAL;
40d4e3df 1445 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1446
3b1e0a65 1447 dev = dev_get_by_name(sock_net(sk), name);
160ff18a 1448 if (dev)
1da177e4 1449 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1da177e4
LT
1450 return err;
1451}
1da177e4
LT
1452
1453static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1454{
40d4e3df
ED
1455 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1456 struct sock *sk = sock->sk;
1da177e4
LT
1457 struct net_device *dev = NULL;
1458 int err;
1459
1460
1461 /*
1462 * Check legality
1463 */
1ce4f28b 1464
1da177e4
LT
1465 if (addr_len < sizeof(struct sockaddr_ll))
1466 return -EINVAL;
1467 if (sll->sll_family != AF_PACKET)
1468 return -EINVAL;
1469
1470 if (sll->sll_ifindex) {
1471 err = -ENODEV;
3b1e0a65 1472 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1473 if (dev == NULL)
1474 goto out;
1475 }
1476 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1da177e4
LT
1477
1478out:
1479 return err;
1480}
1481
1482static struct proto packet_proto = {
1483 .name = "PACKET",
1484 .owner = THIS_MODULE,
1485 .obj_size = sizeof(struct packet_sock),
1486};
1487
1488/*
1ce4f28b 1489 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1490 */
1491
3f378b68
EP
1492static int packet_create(struct net *net, struct socket *sock, int protocol,
1493 int kern)
1da177e4
LT
1494{
1495 struct sock *sk;
1496 struct packet_sock *po;
0e11c91e 1497 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1498 int err;
1499
1500 if (!capable(CAP_NET_RAW))
1501 return -EPERM;
be02097c
DM
1502 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1503 sock->type != SOCK_PACKET)
1da177e4
LT
1504 return -ESOCKTNOSUPPORT;
1505
1506 sock->state = SS_UNCONNECTED;
1507
1508 err = -ENOBUFS;
6257ff21 1509 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1510 if (sk == NULL)
1511 goto out;
1512
1513 sock->ops = &packet_ops;
1da177e4
LT
1514 if (sock->type == SOCK_PACKET)
1515 sock->ops = &packet_ops_spkt;
be02097c 1516
1da177e4
LT
1517 sock_init_data(sock, sk);
1518
1519 po = pkt_sk(sk);
1520 sk->sk_family = PF_PACKET;
0e11c91e 1521 po->num = proto;
1da177e4
LT
1522
1523 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1524 sk_refcnt_debug_inc(sk);
1da177e4
LT
1525
1526 /*
1527 * Attach a protocol block
1528 */
1529
1530 spin_lock_init(&po->bind_lock);
905db440 1531 mutex_init(&po->pg_vec_lock);
1da177e4 1532 po->prot_hook.func = packet_rcv;
be02097c 1533
1da177e4
LT
1534 if (sock->type == SOCK_PACKET)
1535 po->prot_hook.func = packet_rcv_spkt;
be02097c 1536
1da177e4
LT
1537 po->prot_hook.af_packet_priv = sk;
1538
0e11c91e
AV
1539 if (proto) {
1540 po->prot_hook.type = proto;
1da177e4
LT
1541 dev_add_pack(&po->prot_hook);
1542 sock_hold(sk);
1543 po->running = 1;
1544 }
1545
808f5114 1546 spin_lock_bh(&net->packet.sklist_lock);
1547 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1548 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1549 spin_unlock_bh(&net->packet.sklist_lock);
1550
40d4e3df 1551 return 0;
1da177e4
LT
1552out:
1553 return err;
1554}
1555
ed85b565
RC
1556static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1557{
1558 struct sock_exterr_skb *serr;
1559 struct sk_buff *skb, *skb2;
1560 int copied, err;
1561
1562 err = -EAGAIN;
1563 skb = skb_dequeue(&sk->sk_error_queue);
1564 if (skb == NULL)
1565 goto out;
1566
1567 copied = skb->len;
1568 if (copied > len) {
1569 msg->msg_flags |= MSG_TRUNC;
1570 copied = len;
1571 }
1572 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1573 if (err)
1574 goto out_free_skb;
1575
1576 sock_recv_timestamp(msg, sk, skb);
1577
1578 serr = SKB_EXT_ERR(skb);
1579 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1580 sizeof(serr->ee), &serr->ee);
1581
1582 msg->msg_flags |= MSG_ERRQUEUE;
1583 err = copied;
1584
1585 /* Reset and regenerate socket error */
1586 spin_lock_bh(&sk->sk_error_queue.lock);
1587 sk->sk_err = 0;
1588 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1589 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1590 spin_unlock_bh(&sk->sk_error_queue.lock);
1591 sk->sk_error_report(sk);
1592 } else
1593 spin_unlock_bh(&sk->sk_error_queue.lock);
1594
1595out_free_skb:
1596 kfree_skb(skb);
1597out:
1598 return err;
1599}
1600
1da177e4
LT
1601/*
1602 * Pull a packet from our receive queue and hand it to the user.
1603 * If necessary we block.
1604 */
1605
1606static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1607 struct msghdr *msg, size_t len, int flags)
1608{
1609 struct sock *sk = sock->sk;
1610 struct sk_buff *skb;
1611 int copied, err;
0fb375fb 1612 struct sockaddr_ll *sll;
bfd5f4a3 1613 int vnet_hdr_len = 0;
1da177e4
LT
1614
1615 err = -EINVAL;
ed85b565 1616 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1617 goto out;
1618
1619#if 0
1620 /* What error should we return now? EUNATTACH? */
1621 if (pkt_sk(sk)->ifindex < 0)
1622 return -ENODEV;
1623#endif
1624
ed85b565
RC
1625 if (flags & MSG_ERRQUEUE) {
1626 err = packet_recv_error(sk, msg, len);
1627 goto out;
1628 }
1629
1da177e4
LT
1630 /*
1631 * Call the generic datagram receiver. This handles all sorts
1632 * of horrible races and re-entrancy so we can forget about it
1633 * in the protocol layers.
1634 *
1635 * Now it will return ENETDOWN, if device have just gone down,
1636 * but then it will block.
1637 */
1638
40d4e3df 1639 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1640
1641 /*
1ce4f28b 1642 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1643 * handles the blocking we don't see and worry about blocking
1644 * retries.
1645 */
1646
8ae55f04 1647 if (skb == NULL)
1da177e4
LT
1648 goto out;
1649
bfd5f4a3
SS
1650 if (pkt_sk(sk)->has_vnet_hdr) {
1651 struct virtio_net_hdr vnet_hdr = { 0 };
1652
1653 err = -EINVAL;
1654 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1655 if (len < vnet_hdr_len)
bfd5f4a3
SS
1656 goto out_free;
1657
1f18b717
MK
1658 len -= vnet_hdr_len;
1659
bfd5f4a3
SS
1660 if (skb_is_gso(skb)) {
1661 struct skb_shared_info *sinfo = skb_shinfo(skb);
1662
1663 /* This is a hint as to how much should be linear. */
1664 vnet_hdr.hdr_len = skb_headlen(skb);
1665 vnet_hdr.gso_size = sinfo->gso_size;
1666 if (sinfo->gso_type & SKB_GSO_TCPV4)
1667 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1668 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1669 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1670 else if (sinfo->gso_type & SKB_GSO_UDP)
1671 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1672 else if (sinfo->gso_type & SKB_GSO_FCOE)
1673 goto out_free;
1674 else
1675 BUG();
1676 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1677 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1678 } else
1679 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1680
1681 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1682 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
55508d60 1683 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
bfd5f4a3
SS
1684 vnet_hdr.csum_offset = skb->csum_offset;
1685 } /* else everything is zero */
1686
1687 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1688 vnet_hdr_len);
1689 if (err < 0)
1690 goto out_free;
1691 }
1692
0fb375fb
EB
1693 /*
1694 * If the address length field is there to be filled in, we fill
1695 * it in now.
1696 */
1697
ffbc6111 1698 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1699 if (sock->type == SOCK_PACKET)
1700 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1701 else
1702 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1703
1da177e4
LT
1704 /*
1705 * You lose any data beyond the buffer you gave. If it worries a
1706 * user program they can ask the device for its MTU anyway.
1707 */
1708
1709 copied = skb->len;
40d4e3df
ED
1710 if (copied > len) {
1711 copied = len;
1712 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1713 }
1714
1715 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1716 if (err)
1717 goto out_free;
1718
3b885787 1719 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1720
1721 if (msg->msg_name)
ffbc6111
HX
1722 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1723 msg->msg_namelen);
1da177e4 1724
8dc41944 1725 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1726 struct tpacket_auxdata aux;
1727
1728 aux.tp_status = TP_STATUS_USER;
1729 if (skb->ip_summed == CHECKSUM_PARTIAL)
1730 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1731 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1732 aux.tp_snaplen = skb->len;
1733 aux.tp_mac = 0;
bbe735e4 1734 aux.tp_net = skb_network_offset(skb);
a3bcc23e
BG
1735 if (vlan_tx_tag_present(skb)) {
1736 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1737 aux.tp_status |= TP_STATUS_VLAN_VALID;
1738 } else {
1739 aux.tp_vlan_tci = 0;
1740 }
ffbc6111 1741 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1742 }
1743
1da177e4
LT
1744 /*
1745 * Free or return the buffer as appropriate. Again this
1746 * hides all the races and re-entrancy issues from us.
1747 */
bfd5f4a3 1748 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1749
1750out_free:
1751 skb_free_datagram(sk, skb);
1752out:
1753 return err;
1754}
1755
1da177e4
LT
1756static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1757 int *uaddr_len, int peer)
1758{
1759 struct net_device *dev;
1760 struct sock *sk = sock->sk;
1761
1762 if (peer)
1763 return -EOPNOTSUPP;
1764
1765 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1766 rcu_read_lock();
1767 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1768 if (dev)
67286640 1769 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1770 else
1da177e4 1771 memset(uaddr->sa_data, 0, 14);
654d1f8a 1772 rcu_read_unlock();
1da177e4
LT
1773 *uaddr_len = sizeof(*uaddr);
1774
1775 return 0;
1776}
1da177e4
LT
1777
1778static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1779 int *uaddr_len, int peer)
1780{
1781 struct net_device *dev;
1782 struct sock *sk = sock->sk;
1783 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1784 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1785
1786 if (peer)
1787 return -EOPNOTSUPP;
1788
1789 sll->sll_family = AF_PACKET;
1790 sll->sll_ifindex = po->ifindex;
1791 sll->sll_protocol = po->num;
67286640 1792 sll->sll_pkttype = 0;
654d1f8a
ED
1793 rcu_read_lock();
1794 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1795 if (dev) {
1796 sll->sll_hatype = dev->type;
1797 sll->sll_halen = dev->addr_len;
1798 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1799 } else {
1800 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1801 sll->sll_halen = 0;
1802 }
654d1f8a 1803 rcu_read_unlock();
0fb375fb 1804 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1805
1806 return 0;
1807}
1808
2aeb0b88
WC
1809static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1810 int what)
1da177e4
LT
1811{
1812 switch (i->type) {
1813 case PACKET_MR_MULTICAST:
1162563f
JP
1814 if (i->alen != dev->addr_len)
1815 return -EINVAL;
1da177e4 1816 if (what > 0)
22bedad3 1817 return dev_mc_add(dev, i->addr);
1da177e4 1818 else
22bedad3 1819 return dev_mc_del(dev, i->addr);
1da177e4
LT
1820 break;
1821 case PACKET_MR_PROMISC:
2aeb0b88 1822 return dev_set_promiscuity(dev, what);
1da177e4
LT
1823 break;
1824 case PACKET_MR_ALLMULTI:
2aeb0b88 1825 return dev_set_allmulti(dev, what);
1da177e4 1826 break;
d95ed927 1827 case PACKET_MR_UNICAST:
1162563f
JP
1828 if (i->alen != dev->addr_len)
1829 return -EINVAL;
d95ed927 1830 if (what > 0)
a748ee24 1831 return dev_uc_add(dev, i->addr);
d95ed927 1832 else
a748ee24 1833 return dev_uc_del(dev, i->addr);
d95ed927 1834 break;
40d4e3df
ED
1835 default:
1836 break;
1da177e4 1837 }
2aeb0b88 1838 return 0;
1da177e4
LT
1839}
1840
1841static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1842{
40d4e3df 1843 for ( ; i; i = i->next) {
1da177e4
LT
1844 if (i->ifindex == dev->ifindex)
1845 packet_dev_mc(dev, i, what);
1846 }
1847}
1848
0fb375fb 1849static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1850{
1851 struct packet_sock *po = pkt_sk(sk);
1852 struct packet_mclist *ml, *i;
1853 struct net_device *dev;
1854 int err;
1855
1856 rtnl_lock();
1857
1858 err = -ENODEV;
3b1e0a65 1859 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1860 if (!dev)
1861 goto done;
1862
1863 err = -EINVAL;
1162563f 1864 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1865 goto done;
1866
1867 err = -ENOBUFS;
8b3a7005 1868 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1869 if (i == NULL)
1870 goto done;
1871
1872 err = 0;
1873 for (ml = po->mclist; ml; ml = ml->next) {
1874 if (ml->ifindex == mreq->mr_ifindex &&
1875 ml->type == mreq->mr_type &&
1876 ml->alen == mreq->mr_alen &&
1877 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1878 ml->count++;
1879 /* Free the new element ... */
1880 kfree(i);
1881 goto done;
1882 }
1883 }
1884
1885 i->type = mreq->mr_type;
1886 i->ifindex = mreq->mr_ifindex;
1887 i->alen = mreq->mr_alen;
1888 memcpy(i->addr, mreq->mr_address, i->alen);
1889 i->count = 1;
1890 i->next = po->mclist;
1891 po->mclist = i;
2aeb0b88
WC
1892 err = packet_dev_mc(dev, i, 1);
1893 if (err) {
1894 po->mclist = i->next;
1895 kfree(i);
1896 }
1da177e4
LT
1897
1898done:
1899 rtnl_unlock();
1900 return err;
1901}
1902
0fb375fb 1903static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1904{
1905 struct packet_mclist *ml, **mlp;
1906
1907 rtnl_lock();
1908
1909 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1910 if (ml->ifindex == mreq->mr_ifindex &&
1911 ml->type == mreq->mr_type &&
1912 ml->alen == mreq->mr_alen &&
1913 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1914 if (--ml->count == 0) {
1915 struct net_device *dev;
1916 *mlp = ml->next;
ad959e76
ED
1917 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1918 if (dev)
1da177e4 1919 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1920 kfree(ml);
1921 }
1922 rtnl_unlock();
1923 return 0;
1924 }
1925 }
1926 rtnl_unlock();
1927 return -EADDRNOTAVAIL;
1928}
1929
1930static void packet_flush_mclist(struct sock *sk)
1931{
1932 struct packet_sock *po = pkt_sk(sk);
1933 struct packet_mclist *ml;
1934
1935 if (!po->mclist)
1936 return;
1937
1938 rtnl_lock();
1939 while ((ml = po->mclist) != NULL) {
1940 struct net_device *dev;
1941
1942 po->mclist = ml->next;
ad959e76
ED
1943 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1944 if (dev != NULL)
1da177e4 1945 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1946 kfree(ml);
1947 }
1948 rtnl_unlock();
1949}
1da177e4
LT
1950
1951static int
b7058842 1952packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1953{
1954 struct sock *sk = sock->sk;
8dc41944 1955 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1956 int ret;
1957
1958 if (level != SOL_PACKET)
1959 return -ENOPROTOOPT;
1960
69e3c75f 1961 switch (optname) {
1ce4f28b 1962 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1963 case PACKET_DROP_MEMBERSHIP:
1964 {
0fb375fb
EB
1965 struct packet_mreq_max mreq;
1966 int len = optlen;
1967 memset(&mreq, 0, sizeof(mreq));
1968 if (len < sizeof(struct packet_mreq))
1da177e4 1969 return -EINVAL;
0fb375fb
EB
1970 if (len > sizeof(mreq))
1971 len = sizeof(mreq);
40d4e3df 1972 if (copy_from_user(&mreq, optval, len))
1da177e4 1973 return -EFAULT;
0fb375fb
EB
1974 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1975 return -EINVAL;
1da177e4
LT
1976 if (optname == PACKET_ADD_MEMBERSHIP)
1977 ret = packet_mc_add(sk, &mreq);
1978 else
1979 ret = packet_mc_drop(sk, &mreq);
1980 return ret;
1981 }
a2efcfa0 1982
1da177e4 1983 case PACKET_RX_RING:
69e3c75f 1984 case PACKET_TX_RING:
1da177e4
LT
1985 {
1986 struct tpacket_req req;
1987
40d4e3df 1988 if (optlen < sizeof(req))
1da177e4 1989 return -EINVAL;
bfd5f4a3
SS
1990 if (pkt_sk(sk)->has_vnet_hdr)
1991 return -EINVAL;
40d4e3df 1992 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1993 return -EFAULT;
69e3c75f 1994 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1995 }
1996 case PACKET_COPY_THRESH:
1997 {
1998 int val;
1999
40d4e3df 2000 if (optlen != sizeof(val))
1da177e4 2001 return -EINVAL;
40d4e3df 2002 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
2003 return -EFAULT;
2004
2005 pkt_sk(sk)->copy_thresh = val;
2006 return 0;
2007 }
bbd6ef87
PM
2008 case PACKET_VERSION:
2009 {
2010 int val;
2011
2012 if (optlen != sizeof(val))
2013 return -EINVAL;
69e3c75f 2014 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
2015 return -EBUSY;
2016 if (copy_from_user(&val, optval, sizeof(val)))
2017 return -EFAULT;
2018 switch (val) {
2019 case TPACKET_V1:
2020 case TPACKET_V2:
2021 po->tp_version = val;
2022 return 0;
2023 default:
2024 return -EINVAL;
2025 }
2026 }
8913336a
PM
2027 case PACKET_RESERVE:
2028 {
2029 unsigned int val;
2030
2031 if (optlen != sizeof(val))
2032 return -EINVAL;
69e3c75f 2033 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
2034 return -EBUSY;
2035 if (copy_from_user(&val, optval, sizeof(val)))
2036 return -EFAULT;
2037 po->tp_reserve = val;
2038 return 0;
2039 }
69e3c75f
JB
2040 case PACKET_LOSS:
2041 {
2042 unsigned int val;
2043
2044 if (optlen != sizeof(val))
2045 return -EINVAL;
2046 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2047 return -EBUSY;
2048 if (copy_from_user(&val, optval, sizeof(val)))
2049 return -EFAULT;
2050 po->tp_loss = !!val;
2051 return 0;
2052 }
8dc41944
HX
2053 case PACKET_AUXDATA:
2054 {
2055 int val;
2056
2057 if (optlen < sizeof(val))
2058 return -EINVAL;
2059 if (copy_from_user(&val, optval, sizeof(val)))
2060 return -EFAULT;
2061
2062 po->auxdata = !!val;
2063 return 0;
2064 }
80feaacb
PWJ
2065 case PACKET_ORIGDEV:
2066 {
2067 int val;
2068
2069 if (optlen < sizeof(val))
2070 return -EINVAL;
2071 if (copy_from_user(&val, optval, sizeof(val)))
2072 return -EFAULT;
2073
2074 po->origdev = !!val;
2075 return 0;
2076 }
bfd5f4a3
SS
2077 case PACKET_VNET_HDR:
2078 {
2079 int val;
2080
2081 if (sock->type != SOCK_RAW)
2082 return -EINVAL;
2083 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2084 return -EBUSY;
2085 if (optlen < sizeof(val))
2086 return -EINVAL;
2087 if (copy_from_user(&val, optval, sizeof(val)))
2088 return -EFAULT;
2089
2090 po->has_vnet_hdr = !!val;
2091 return 0;
2092 }
614f60fa
SM
2093 case PACKET_TIMESTAMP:
2094 {
2095 int val;
2096
2097 if (optlen != sizeof(val))
2098 return -EINVAL;
2099 if (copy_from_user(&val, optval, sizeof(val)))
2100 return -EFAULT;
2101
2102 po->tp_tstamp = val;
2103 return 0;
2104 }
1da177e4
LT
2105 default:
2106 return -ENOPROTOOPT;
2107 }
2108}
2109
2110static int packet_getsockopt(struct socket *sock, int level, int optname,
2111 char __user *optval, int __user *optlen)
2112{
2113 int len;
8dc41944 2114 int val;
1da177e4
LT
2115 struct sock *sk = sock->sk;
2116 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2117 void *data;
2118 struct tpacket_stats st;
1da177e4
LT
2119
2120 if (level != SOL_PACKET)
2121 return -ENOPROTOOPT;
2122
8ae55f04
KK
2123 if (get_user(len, optlen))
2124 return -EFAULT;
1da177e4
LT
2125
2126 if (len < 0)
2127 return -EINVAL;
1ce4f28b 2128
69e3c75f 2129 switch (optname) {
1da177e4 2130 case PACKET_STATISTICS:
1da177e4
LT
2131 if (len > sizeof(struct tpacket_stats))
2132 len = sizeof(struct tpacket_stats);
2133 spin_lock_bh(&sk->sk_receive_queue.lock);
2134 st = po->stats;
2135 memset(&po->stats, 0, sizeof(st));
2136 spin_unlock_bh(&sk->sk_receive_queue.lock);
2137 st.tp_packets += st.tp_drops;
2138
8dc41944
HX
2139 data = &st;
2140 break;
2141 case PACKET_AUXDATA:
2142 if (len > sizeof(int))
2143 len = sizeof(int);
2144 val = po->auxdata;
2145
80feaacb
PWJ
2146 data = &val;
2147 break;
2148 case PACKET_ORIGDEV:
2149 if (len > sizeof(int))
2150 len = sizeof(int);
2151 val = po->origdev;
2152
bfd5f4a3
SS
2153 data = &val;
2154 break;
2155 case PACKET_VNET_HDR:
2156 if (len > sizeof(int))
2157 len = sizeof(int);
2158 val = po->has_vnet_hdr;
2159
8dc41944 2160 data = &val;
1da177e4 2161 break;
bbd6ef87
PM
2162 case PACKET_VERSION:
2163 if (len > sizeof(int))
2164 len = sizeof(int);
2165 val = po->tp_version;
2166 data = &val;
2167 break;
2168 case PACKET_HDRLEN:
2169 if (len > sizeof(int))
2170 len = sizeof(int);
2171 if (copy_from_user(&val, optval, len))
2172 return -EFAULT;
2173 switch (val) {
2174 case TPACKET_V1:
2175 val = sizeof(struct tpacket_hdr);
2176 break;
2177 case TPACKET_V2:
2178 val = sizeof(struct tpacket2_hdr);
2179 break;
2180 default:
2181 return -EINVAL;
2182 }
2183 data = &val;
2184 break;
8913336a
PM
2185 case PACKET_RESERVE:
2186 if (len > sizeof(unsigned int))
2187 len = sizeof(unsigned int);
2188 val = po->tp_reserve;
2189 data = &val;
2190 break;
69e3c75f
JB
2191 case PACKET_LOSS:
2192 if (len > sizeof(unsigned int))
2193 len = sizeof(unsigned int);
2194 val = po->tp_loss;
2195 data = &val;
2196 break;
614f60fa
SM
2197 case PACKET_TIMESTAMP:
2198 if (len > sizeof(int))
2199 len = sizeof(int);
2200 val = po->tp_tstamp;
2201 data = &val;
2202 break;
1da177e4
LT
2203 default:
2204 return -ENOPROTOOPT;
2205 }
2206
8ae55f04
KK
2207 if (put_user(len, optlen))
2208 return -EFAULT;
8dc41944
HX
2209 if (copy_to_user(optval, data, len))
2210 return -EFAULT;
8ae55f04 2211 return 0;
1da177e4
LT
2212}
2213
2214
2215static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2216{
2217 struct sock *sk;
2218 struct hlist_node *node;
ad930650 2219 struct net_device *dev = data;
c346dca1 2220 struct net *net = dev_net(dev);
1da177e4 2221
808f5114 2222 rcu_read_lock();
2223 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2224 struct packet_sock *po = pkt_sk(sk);
2225
2226 switch (msg) {
2227 case NETDEV_UNREGISTER:
1da177e4
LT
2228 if (po->mclist)
2229 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2230 /* fallthrough */
2231
1da177e4
LT
2232 case NETDEV_DOWN:
2233 if (dev->ifindex == po->ifindex) {
2234 spin_lock(&po->bind_lock);
2235 if (po->running) {
2236 __dev_remove_pack(&po->prot_hook);
2237 __sock_put(sk);
2238 po->running = 0;
2239 sk->sk_err = ENETDOWN;
2240 if (!sock_flag(sk, SOCK_DEAD))
2241 sk->sk_error_report(sk);
2242 }
2243 if (msg == NETDEV_UNREGISTER) {
2244 po->ifindex = -1;
160ff18a
BG
2245 if (po->prot_hook.dev)
2246 dev_put(po->prot_hook.dev);
1da177e4
LT
2247 po->prot_hook.dev = NULL;
2248 }
2249 spin_unlock(&po->bind_lock);
2250 }
2251 break;
2252 case NETDEV_UP:
808f5114 2253 if (dev->ifindex == po->ifindex) {
2254 spin_lock(&po->bind_lock);
2255 if (po->num && !po->running) {
2256 dev_add_pack(&po->prot_hook);
2257 sock_hold(sk);
2258 po->running = 1;
2259 }
2260 spin_unlock(&po->bind_lock);
1da177e4 2261 }
1da177e4
LT
2262 break;
2263 }
2264 }
808f5114 2265 rcu_read_unlock();
1da177e4
LT
2266 return NOTIFY_DONE;
2267}
2268
2269
2270static int packet_ioctl(struct socket *sock, unsigned int cmd,
2271 unsigned long arg)
2272{
2273 struct sock *sk = sock->sk;
2274
69e3c75f 2275 switch (cmd) {
40d4e3df
ED
2276 case SIOCOUTQ:
2277 {
2278 int amount = sk_wmem_alloc_get(sk);
31e6d363 2279
40d4e3df
ED
2280 return put_user(amount, (int __user *)arg);
2281 }
2282 case SIOCINQ:
2283 {
2284 struct sk_buff *skb;
2285 int amount = 0;
2286
2287 spin_lock_bh(&sk->sk_receive_queue.lock);
2288 skb = skb_peek(&sk->sk_receive_queue);
2289 if (skb)
2290 amount = skb->len;
2291 spin_unlock_bh(&sk->sk_receive_queue.lock);
2292 return put_user(amount, (int __user *)arg);
2293 }
2294 case SIOCGSTAMP:
2295 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2296 case SIOCGSTAMPNS:
2297 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2298
1da177e4 2299#ifdef CONFIG_INET
40d4e3df
ED
2300 case SIOCADDRT:
2301 case SIOCDELRT:
2302 case SIOCDARP:
2303 case SIOCGARP:
2304 case SIOCSARP:
2305 case SIOCGIFADDR:
2306 case SIOCSIFADDR:
2307 case SIOCGIFBRDADDR:
2308 case SIOCSIFBRDADDR:
2309 case SIOCGIFNETMASK:
2310 case SIOCSIFNETMASK:
2311 case SIOCGIFDSTADDR:
2312 case SIOCSIFDSTADDR:
2313 case SIOCSIFFLAGS:
40d4e3df 2314 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2315#endif
2316
40d4e3df
ED
2317 default:
2318 return -ENOIOCTLCMD;
1da177e4
LT
2319 }
2320 return 0;
2321}
2322
40d4e3df 2323static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2324 poll_table *wait)
2325{
2326 struct sock *sk = sock->sk;
2327 struct packet_sock *po = pkt_sk(sk);
2328 unsigned int mask = datagram_poll(file, sock, wait);
2329
2330 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2331 if (po->rx_ring.pg_vec) {
2332 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2333 mask |= POLLIN | POLLRDNORM;
2334 }
2335 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2336 spin_lock_bh(&sk->sk_write_queue.lock);
2337 if (po->tx_ring.pg_vec) {
2338 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2339 mask |= POLLOUT | POLLWRNORM;
2340 }
2341 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2342 return mask;
2343}
2344
2345
2346/* Dirty? Well, I still did not learn better way to account
2347 * for user mmaps.
2348 */
2349
2350static void packet_mm_open(struct vm_area_struct *vma)
2351{
2352 struct file *file = vma->vm_file;
40d4e3df 2353 struct socket *sock = file->private_data;
1da177e4 2354 struct sock *sk = sock->sk;
1ce4f28b 2355
1da177e4
LT
2356 if (sk)
2357 atomic_inc(&pkt_sk(sk)->mapped);
2358}
2359
2360static void packet_mm_close(struct vm_area_struct *vma)
2361{
2362 struct file *file = vma->vm_file;
40d4e3df 2363 struct socket *sock = file->private_data;
1da177e4 2364 struct sock *sk = sock->sk;
1ce4f28b 2365
1da177e4
LT
2366 if (sk)
2367 atomic_dec(&pkt_sk(sk)->mapped);
2368}
2369
f0f37e2f 2370static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2371 .open = packet_mm_open,
2372 .close = packet_mm_close,
1da177e4
LT
2373};
2374
0e3125c7
NH
2375static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2376 unsigned int len)
1da177e4
LT
2377{
2378 int i;
2379
4ebf0ae2 2380 for (i = 0; i < len; i++) {
0e3125c7 2381 if (likely(pg_vec[i].buffer)) {
c56b4d90 2382 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2383 vfree(pg_vec[i].buffer);
2384 else
2385 free_pages((unsigned long)pg_vec[i].buffer,
2386 order);
2387 pg_vec[i].buffer = NULL;
2388 }
1da177e4
LT
2389 }
2390 kfree(pg_vec);
2391}
2392
c56b4d90 2393static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2394{
0e3125c7
NH
2395 char *buffer = NULL;
2396 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2397 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2398
2399 buffer = (char *) __get_free_pages(gfp_flags, order);
2400
2401 if (buffer)
2402 return buffer;
2403
2404 /*
2405 * __get_free_pages failed, fall back to vmalloc
2406 */
bbce5a59 2407 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2408
0e3125c7
NH
2409 if (buffer)
2410 return buffer;
2411
2412 /*
2413 * vmalloc failed, lets dig into swap here
2414 */
0e3125c7
NH
2415 gfp_flags &= ~__GFP_NORETRY;
2416 buffer = (char *)__get_free_pages(gfp_flags, order);
2417 if (buffer)
2418 return buffer;
2419
2420 /*
2421 * complete and utter failure
2422 */
2423 return NULL;
4ebf0ae2
DM
2424}
2425
0e3125c7 2426static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2427{
2428 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2429 struct pgv *pg_vec;
4ebf0ae2
DM
2430 int i;
2431
0e3125c7 2432 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2433 if (unlikely(!pg_vec))
2434 goto out;
2435
2436 for (i = 0; i < block_nr; i++) {
c56b4d90 2437 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2438 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2439 goto out_free_pgvec;
2440 }
2441
2442out:
2443 return pg_vec;
2444
2445out_free_pgvec:
2446 free_pg_vec(pg_vec, order, block_nr);
2447 pg_vec = NULL;
2448 goto out;
2449}
1da177e4 2450
69e3c75f
JB
2451static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2452 int closing, int tx_ring)
1da177e4 2453{
0e3125c7 2454 struct pgv *pg_vec = NULL;
1da177e4 2455 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2456 int was_running, order = 0;
69e3c75f
JB
2457 struct packet_ring_buffer *rb;
2458 struct sk_buff_head *rb_queue;
0e11c91e 2459 __be16 num;
69e3c75f 2460 int err;
1ce4f28b 2461
69e3c75f
JB
2462 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2463 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2464
69e3c75f
JB
2465 err = -EBUSY;
2466 if (!closing) {
2467 if (atomic_read(&po->mapped))
2468 goto out;
2469 if (atomic_read(&rb->pending))
2470 goto out;
2471 }
1da177e4 2472
69e3c75f
JB
2473 if (req->tp_block_nr) {
2474 /* Sanity tests and some calculations */
2475 err = -EBUSY;
2476 if (unlikely(rb->pg_vec))
2477 goto out;
1da177e4 2478
bbd6ef87
PM
2479 switch (po->tp_version) {
2480 case TPACKET_V1:
2481 po->tp_hdrlen = TPACKET_HDRLEN;
2482 break;
2483 case TPACKET_V2:
2484 po->tp_hdrlen = TPACKET2_HDRLEN;
2485 break;
2486 }
2487
69e3c75f 2488 err = -EINVAL;
4ebf0ae2 2489 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2490 goto out;
4ebf0ae2 2491 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2492 goto out;
8913336a 2493 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2494 po->tp_reserve))
2495 goto out;
4ebf0ae2 2496 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2497 goto out;
1da177e4 2498
69e3c75f
JB
2499 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2500 if (unlikely(rb->frames_per_block <= 0))
2501 goto out;
2502 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2503 req->tp_frame_nr))
2504 goto out;
1da177e4
LT
2505
2506 err = -ENOMEM;
4ebf0ae2
DM
2507 order = get_order(req->tp_block_size);
2508 pg_vec = alloc_pg_vec(req, order);
2509 if (unlikely(!pg_vec))
1da177e4 2510 goto out;
69e3c75f
JB
2511 }
2512 /* Done */
2513 else {
2514 err = -EINVAL;
4ebf0ae2 2515 if (unlikely(req->tp_frame_nr))
69e3c75f 2516 goto out;
1da177e4
LT
2517 }
2518
2519 lock_sock(sk);
2520
2521 /* Detach socket from network */
2522 spin_lock(&po->bind_lock);
2523 was_running = po->running;
2524 num = po->num;
2525 if (was_running) {
2526 __dev_remove_pack(&po->prot_hook);
2527 po->num = 0;
2528 po->running = 0;
2529 __sock_put(sk);
2530 }
2531 spin_unlock(&po->bind_lock);
1ce4f28b 2532
1da177e4
LT
2533 synchronize_net();
2534
2535 err = -EBUSY;
905db440 2536 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2537 if (closing || atomic_read(&po->mapped) == 0) {
2538 err = 0;
69e3c75f 2539 spin_lock_bh(&rb_queue->lock);
c053fd96 2540 swap(rb->pg_vec, pg_vec);
69e3c75f
JB
2541 rb->frame_max = (req->tp_frame_nr - 1);
2542 rb->head = 0;
2543 rb->frame_size = req->tp_frame_size;
2544 spin_unlock_bh(&rb_queue->lock);
2545
c053fd96
CG
2546 swap(rb->pg_vec_order, order);
2547 swap(rb->pg_vec_len, req->tp_block_nr);
69e3c75f
JB
2548
2549 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2550 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2551 tpacket_rcv : packet_rcv;
2552 skb_queue_purge(rb_queue);
1da177e4 2553 if (atomic_read(&po->mapped))
40d4e3df
ED
2554 pr_err("packet_mmap: vma is busy: %d\n",
2555 atomic_read(&po->mapped));
1da177e4 2556 }
905db440 2557 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2558
2559 spin_lock(&po->bind_lock);
2560 if (was_running && !po->running) {
2561 sock_hold(sk);
2562 po->running = 1;
2563 po->num = num;
2564 dev_add_pack(&po->prot_hook);
2565 }
2566 spin_unlock(&po->bind_lock);
2567
2568 release_sock(sk);
2569
1da177e4
LT
2570 if (pg_vec)
2571 free_pg_vec(pg_vec, order, req->tp_block_nr);
2572out:
2573 return err;
2574}
2575
69e3c75f
JB
2576static int packet_mmap(struct file *file, struct socket *sock,
2577 struct vm_area_struct *vma)
1da177e4
LT
2578{
2579 struct sock *sk = sock->sk;
2580 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2581 unsigned long size, expected_size;
2582 struct packet_ring_buffer *rb;
1da177e4
LT
2583 unsigned long start;
2584 int err = -EINVAL;
2585 int i;
2586
2587 if (vma->vm_pgoff)
2588 return -EINVAL;
2589
905db440 2590 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2591
2592 expected_size = 0;
2593 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2594 if (rb->pg_vec) {
2595 expected_size += rb->pg_vec_len
2596 * rb->pg_vec_pages
2597 * PAGE_SIZE;
2598 }
2599 }
2600
2601 if (expected_size == 0)
1da177e4 2602 goto out;
69e3c75f
JB
2603
2604 size = vma->vm_end - vma->vm_start;
2605 if (size != expected_size)
1da177e4
LT
2606 goto out;
2607
1da177e4 2608 start = vma->vm_start;
69e3c75f
JB
2609 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2610 if (rb->pg_vec == NULL)
2611 continue;
2612
2613 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2614 struct page *page;
2615 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2616 int pg_num;
2617
c56b4d90
CG
2618 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2619 page = pgv_to_page(kaddr);
69e3c75f
JB
2620 err = vm_insert_page(vma, start, page);
2621 if (unlikely(err))
2622 goto out;
2623 start += PAGE_SIZE;
0e3125c7 2624 kaddr += PAGE_SIZE;
69e3c75f 2625 }
4ebf0ae2 2626 }
1da177e4 2627 }
69e3c75f 2628
4ebf0ae2 2629 atomic_inc(&po->mapped);
1da177e4
LT
2630 vma->vm_ops = &packet_mmap_ops;
2631 err = 0;
2632
2633out:
905db440 2634 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2635 return err;
2636}
1da177e4 2637
90ddc4f0 2638static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2639 .family = PF_PACKET,
2640 .owner = THIS_MODULE,
2641 .release = packet_release,
2642 .bind = packet_bind_spkt,
2643 .connect = sock_no_connect,
2644 .socketpair = sock_no_socketpair,
2645 .accept = sock_no_accept,
2646 .getname = packet_getname_spkt,
2647 .poll = datagram_poll,
2648 .ioctl = packet_ioctl,
2649 .listen = sock_no_listen,
2650 .shutdown = sock_no_shutdown,
2651 .setsockopt = sock_no_setsockopt,
2652 .getsockopt = sock_no_getsockopt,
2653 .sendmsg = packet_sendmsg_spkt,
2654 .recvmsg = packet_recvmsg,
2655 .mmap = sock_no_mmap,
2656 .sendpage = sock_no_sendpage,
2657};
1da177e4 2658
90ddc4f0 2659static const struct proto_ops packet_ops = {
1da177e4
LT
2660 .family = PF_PACKET,
2661 .owner = THIS_MODULE,
2662 .release = packet_release,
2663 .bind = packet_bind,
2664 .connect = sock_no_connect,
2665 .socketpair = sock_no_socketpair,
2666 .accept = sock_no_accept,
1ce4f28b 2667 .getname = packet_getname,
1da177e4
LT
2668 .poll = packet_poll,
2669 .ioctl = packet_ioctl,
2670 .listen = sock_no_listen,
2671 .shutdown = sock_no_shutdown,
2672 .setsockopt = packet_setsockopt,
2673 .getsockopt = packet_getsockopt,
2674 .sendmsg = packet_sendmsg,
2675 .recvmsg = packet_recvmsg,
2676 .mmap = packet_mmap,
2677 .sendpage = sock_no_sendpage,
2678};
2679
ec1b4cf7 2680static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2681 .family = PF_PACKET,
2682 .create = packet_create,
2683 .owner = THIS_MODULE,
2684};
2685
2686static struct notifier_block packet_netdev_notifier = {
40d4e3df 2687 .notifier_call = packet_notifier,
1da177e4
LT
2688};
2689
2690#ifdef CONFIG_PROC_FS
1da177e4
LT
2691
2692static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2693 __acquires(RCU)
1da177e4 2694{
e372c414 2695 struct net *net = seq_file_net(seq);
808f5114 2696
2697 rcu_read_lock();
2698 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2699}
2700
2701static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2702{
1bf40954 2703 struct net *net = seq_file_net(seq);
808f5114 2704 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2705}
2706
2707static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2708 __releases(RCU)
1da177e4 2709{
808f5114 2710 rcu_read_unlock();
1da177e4
LT
2711}
2712
1ce4f28b 2713static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2714{
2715 if (v == SEQ_START_TOKEN)
2716 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2717 else {
b7ceabd9 2718 struct sock *s = sk_entry(v);
1da177e4
LT
2719 const struct packet_sock *po = pkt_sk(s);
2720
2721 seq_printf(seq,
71338aa7 2722 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1da177e4
LT
2723 s,
2724 atomic_read(&s->sk_refcnt),
2725 s->sk_type,
2726 ntohs(po->num),
2727 po->ifindex,
2728 po->running,
2729 atomic_read(&s->sk_rmem_alloc),
2730 sock_i_uid(s),
40d4e3df 2731 sock_i_ino(s));
1da177e4
LT
2732 }
2733
2734 return 0;
2735}
2736
56b3d975 2737static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2738 .start = packet_seq_start,
2739 .next = packet_seq_next,
2740 .stop = packet_seq_stop,
2741 .show = packet_seq_show,
2742};
2743
2744static int packet_seq_open(struct inode *inode, struct file *file)
2745{
e372c414
DL
2746 return seq_open_net(inode, file, &packet_seq_ops,
2747 sizeof(struct seq_net_private));
1da177e4
LT
2748}
2749
da7071d7 2750static const struct file_operations packet_seq_fops = {
1da177e4
LT
2751 .owner = THIS_MODULE,
2752 .open = packet_seq_open,
2753 .read = seq_read,
2754 .llseek = seq_lseek,
e372c414 2755 .release = seq_release_net,
1da177e4
LT
2756};
2757
2758#endif
2759
2c8c1e72 2760static int __net_init packet_net_init(struct net *net)
d12d01d6 2761{
808f5114 2762 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2763 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2764
2765 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2766 return -ENOMEM;
2767
2768 return 0;
2769}
2770
2c8c1e72 2771static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2772{
2773 proc_net_remove(net, "packet");
2774}
2775
2776static struct pernet_operations packet_net_ops = {
2777 .init = packet_net_init,
2778 .exit = packet_net_exit,
2779};
2780
2781
1da177e4
LT
2782static void __exit packet_exit(void)
2783{
1da177e4 2784 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2785 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2786 sock_unregister(PF_PACKET);
2787 proto_unregister(&packet_proto);
2788}
2789
2790static int __init packet_init(void)
2791{
2792 int rc = proto_register(&packet_proto, 0);
2793
2794 if (rc != 0)
2795 goto out;
2796
2797 sock_register(&packet_family_ops);
d12d01d6 2798 register_pernet_subsys(&packet_net_ops);
1da177e4 2799 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2800out:
2801 return rc;
2802}
2803
2804module_init(packet_init);
2805module_exit(packet_exit);
2806MODULE_LICENSE("GPL");
2807MODULE_ALIAS_NETPROTO(PF_PACKET);
This page took 1.337317 seconds and 5 git commands to generate.