Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[deliverable/linux.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167struct pgv {
168 char *buffer;
0e3125c7
NH
169};
170
69e3c75f 171struct packet_ring_buffer {
0e3125c7 172 struct pgv *pg_vec;
69e3c75f
JB
173 unsigned int head;
174 unsigned int frames_per_block;
175 unsigned int frame_size;
176 unsigned int frame_max;
177
178 unsigned int pg_vec_order;
179 unsigned int pg_vec_pages;
180 unsigned int pg_vec_len;
181
182 atomic_t pending;
183};
184
185struct packet_sock;
186static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
187
188static void packet_flush_mclist(struct sock *sk);
189
190struct packet_sock {
191 /* struct sock has to be the first member of packet_sock */
192 struct sock sk;
193 struct tpacket_stats stats;
69e3c75f
JB
194 struct packet_ring_buffer rx_ring;
195 struct packet_ring_buffer tx_ring;
1da177e4 196 int copy_thresh;
1da177e4 197 spinlock_t bind_lock;
905db440 198 struct mutex pg_vec_lock;
8dc41944 199 unsigned int running:1, /* prot_hook is attached*/
80feaacb 200 auxdata:1,
bfd5f4a3
SS
201 origdev:1,
202 has_vnet_hdr:1;
1da177e4 203 int ifindex; /* bound device */
0e11c91e 204 __be16 num;
1da177e4 205 struct packet_mclist *mclist;
1da177e4 206 atomic_t mapped;
bbd6ef87
PM
207 enum tpacket_versions tp_version;
208 unsigned int tp_hdrlen;
8913336a 209 unsigned int tp_reserve;
69e3c75f 210 unsigned int tp_loss:1;
614f60fa 211 unsigned int tp_tstamp;
94b05952 212 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
213};
214
ffbc6111
HX
215struct packet_skb_cb {
216 unsigned int origlen;
217 union {
218 struct sockaddr_pkt pkt;
219 struct sockaddr_ll ll;
220 } sa;
221};
222
223#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 224
f6dafa95 225static inline __pure struct page *pgv_to_page(void *addr)
0af55bb5
CG
226{
227 if (is_vmalloc_addr(addr))
228 return vmalloc_to_page(addr);
229 return virt_to_page(addr);
230}
231
69e3c75f 232static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 233{
bbd6ef87
PM
234 union {
235 struct tpacket_hdr *h1;
236 struct tpacket2_hdr *h2;
237 void *raw;
238 } h;
1da177e4 239
69e3c75f 240 h.raw = frame;
bbd6ef87
PM
241 switch (po->tp_version) {
242 case TPACKET_V1:
69e3c75f 243 h.h1->tp_status = status;
0af55bb5 244 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
245 break;
246 case TPACKET_V2:
69e3c75f 247 h.h2->tp_status = status;
0af55bb5 248 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 249 break;
69e3c75f 250 default:
40d4e3df 251 pr_err("TPACKET version not supported\n");
69e3c75f 252 BUG();
bbd6ef87 253 }
69e3c75f
JB
254
255 smp_wmb();
bbd6ef87
PM
256}
257
69e3c75f 258static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
259{
260 union {
261 struct tpacket_hdr *h1;
262 struct tpacket2_hdr *h2;
263 void *raw;
264 } h;
265
69e3c75f
JB
266 smp_rmb();
267
bbd6ef87
PM
268 h.raw = frame;
269 switch (po->tp_version) {
270 case TPACKET_V1:
0af55bb5 271 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 272 return h.h1->tp_status;
bbd6ef87 273 case TPACKET_V2:
0af55bb5 274 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
275 return h.h2->tp_status;
276 default:
40d4e3df 277 pr_err("TPACKET version not supported\n");
69e3c75f
JB
278 BUG();
279 return 0;
bbd6ef87 280 }
1da177e4 281}
69e3c75f
JB
282
283static void *packet_lookup_frame(struct packet_sock *po,
284 struct packet_ring_buffer *rb,
285 unsigned int position,
286 int status)
287{
288 unsigned int pg_vec_pos, frame_offset;
289 union {
290 struct tpacket_hdr *h1;
291 struct tpacket2_hdr *h2;
292 void *raw;
293 } h;
294
295 pg_vec_pos = position / rb->frames_per_block;
296 frame_offset = position % rb->frames_per_block;
297
0e3125c7
NH
298 h.raw = rb->pg_vec[pg_vec_pos].buffer +
299 (frame_offset * rb->frame_size);
69e3c75f
JB
300
301 if (status != __packet_get_status(po, h.raw))
302 return NULL;
303
304 return h.raw;
305}
306
307static inline void *packet_current_frame(struct packet_sock *po,
308 struct packet_ring_buffer *rb,
309 int status)
310{
311 return packet_lookup_frame(po, rb, rb->head, status);
312}
313
314static inline void *packet_previous_frame(struct packet_sock *po,
315 struct packet_ring_buffer *rb,
316 int status)
317{
318 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
319 return packet_lookup_frame(po, rb, previous, status);
320}
321
322static inline void packet_increment_head(struct packet_ring_buffer *buff)
323{
324 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
325}
326
1da177e4
LT
327static inline struct packet_sock *pkt_sk(struct sock *sk)
328{
329 return (struct packet_sock *)sk;
330}
331
332static void packet_sock_destruct(struct sock *sk)
333{
ed85b565
RC
334 skb_queue_purge(&sk->sk_error_queue);
335
547b792c
IJ
336 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
337 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
338
339 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 340 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
341 return;
342 }
343
17ab56a2 344 sk_refcnt_debug_dec(sk);
1da177e4
LT
345}
346
347
90ddc4f0 348static const struct proto_ops packet_ops;
1da177e4 349
90ddc4f0 350static const struct proto_ops packet_ops_spkt;
1da177e4 351
40d4e3df
ED
352static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
353 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
354{
355 struct sock *sk;
356 struct sockaddr_pkt *spkt;
357
358 /*
359 * When we registered the protocol we saved the socket in the data
360 * field for just this event.
361 */
362
363 sk = pt->af_packet_priv;
1ce4f28b 364
1da177e4
LT
365 /*
366 * Yank back the headers [hope the device set this
367 * right or kerboom...]
368 *
369 * Incoming packets have ll header pulled,
370 * push it back.
371 *
98e399f8 372 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
373 * so that this procedure is noop.
374 */
375
376 if (skb->pkt_type == PACKET_LOOPBACK)
377 goto out;
378
09ad9bc7 379 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
380 goto out;
381
40d4e3df
ED
382 skb = skb_share_check(skb, GFP_ATOMIC);
383 if (skb == NULL)
1da177e4
LT
384 goto oom;
385
386 /* drop any routing info */
adf30907 387 skb_dst_drop(skb);
1da177e4 388
84531c24
PO
389 /* drop conntrack reference */
390 nf_reset(skb);
391
ffbc6111 392 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 393
98e399f8 394 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
395
396 /*
397 * The SOCK_PACKET socket receives _all_ frames.
398 */
399
400 spkt->spkt_family = dev->type;
401 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
402 spkt->spkt_protocol = skb->protocol;
403
404 /*
405 * Charge the memory to the socket. This is done specifically
406 * to prevent sockets using all the memory up.
407 */
408
40d4e3df 409 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
410 return 0;
411
412out:
413 kfree_skb(skb);
414oom:
415 return 0;
416}
417
418
419/*
420 * Output a raw packet to a device layer. This bypasses all the other
421 * protocol layers and you must therefore supply it with a complete frame
422 */
1ce4f28b 423
1da177e4
LT
424static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
425 struct msghdr *msg, size_t len)
426{
427 struct sock *sk = sock->sk;
40d4e3df 428 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 429 struct sk_buff *skb = NULL;
1da177e4 430 struct net_device *dev;
40d4e3df 431 __be16 proto = 0;
1da177e4 432 int err;
1ce4f28b 433
1da177e4 434 /*
1ce4f28b 435 * Get and verify the address.
1da177e4
LT
436 */
437
40d4e3df 438 if (saddr) {
1da177e4 439 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
440 return -EINVAL;
441 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
442 proto = saddr->spkt_protocol;
443 } else
444 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
445
446 /*
1ce4f28b 447 * Find the device first to size check it
1da177e4
LT
448 */
449
450 saddr->spkt_device[13] = 0;
1a35ca80 451retry:
654d1f8a
ED
452 rcu_read_lock();
453 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
454 err = -ENODEV;
455 if (dev == NULL)
456 goto out_unlock;
1ce4f28b 457
d5e76b0a
DM
458 err = -ENETDOWN;
459 if (!(dev->flags & IFF_UP))
460 goto out_unlock;
461
1da177e4 462 /*
40d4e3df
ED
463 * You may not queue a frame bigger than the mtu. This is the lowest level
464 * raw protocol and you must do your own fragmentation at this level.
1da177e4 465 */
1ce4f28b 466
1da177e4 467 err = -EMSGSIZE;
57f89bfa 468 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN)
1da177e4
LT
469 goto out_unlock;
470
1a35ca80
ED
471 if (!skb) {
472 size_t reserved = LL_RESERVED_SPACE(dev);
473 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
474
475 rcu_read_unlock();
476 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
477 if (skb == NULL)
478 return -ENOBUFS;
479 /* FIXME: Save some space for broken drivers that write a hard
480 * header at transmission time by themselves. PPP is the notable
481 * one here. This should really be fixed at the driver level.
482 */
483 skb_reserve(skb, reserved);
484 skb_reset_network_header(skb);
485
486 /* Try to align data part correctly */
487 if (hhlen) {
488 skb->data -= hhlen;
489 skb->tail -= hhlen;
490 if (len < hhlen)
491 skb_reset_network_header(skb);
492 }
493 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
494 if (err)
495 goto out_free;
496 goto retry;
1da177e4
LT
497 }
498
57f89bfa
BG
499 if (len > (dev->mtu + dev->hard_header_len)) {
500 /* Earlier code assumed this would be a VLAN pkt,
501 * double-check this now that we have the actual
502 * packet in hand.
503 */
504 struct ethhdr *ehdr;
505 skb_reset_mac_header(skb);
506 ehdr = eth_hdr(skb);
507 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
508 err = -EMSGSIZE;
509 goto out_unlock;
510 }
511 }
1a35ca80 512
1da177e4
LT
513 skb->protocol = proto;
514 skb->dev = dev;
515 skb->priority = sk->sk_priority;
2d37a186 516 skb->mark = sk->sk_mark;
2244d07b 517 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
518 if (err < 0)
519 goto out_unlock;
1da177e4
LT
520
521 dev_queue_xmit(skb);
654d1f8a 522 rcu_read_unlock();
40d4e3df 523 return len;
1da177e4 524
1da177e4 525out_unlock:
654d1f8a 526 rcu_read_unlock();
1a35ca80
ED
527out_free:
528 kfree_skb(skb);
1da177e4
LT
529 return err;
530}
1da177e4 531
62ab0812
ED
532static inline unsigned int run_filter(const struct sk_buff *skb,
533 const struct sock *sk,
dbcb5855 534 unsigned int res)
1da177e4
LT
535{
536 struct sk_filter *filter;
fda9ef5d 537
80f8f102
ED
538 rcu_read_lock();
539 filter = rcu_dereference(sk->sk_filter);
dbcb5855 540 if (filter != NULL)
0a14842f 541 res = SK_RUN_FILTER(filter, skb);
80f8f102 542 rcu_read_unlock();
1da177e4 543
dbcb5855 544 return res;
1da177e4
LT
545}
546
547/*
62ab0812
ED
548 * This function makes lazy skb cloning in hope that most of packets
549 * are discarded by BPF.
550 *
551 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
552 * and skb->cb are mangled. It works because (and until) packets
553 * falling here are owned by current CPU. Output packets are cloned
554 * by dev_queue_xmit_nit(), input packets are processed by net_bh
555 * sequencially, so that if we return skb to original state on exit,
556 * we will not harm anyone.
1da177e4
LT
557 */
558
40d4e3df
ED
559static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
560 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
561{
562 struct sock *sk;
563 struct sockaddr_ll *sll;
564 struct packet_sock *po;
40d4e3df 565 u8 *skb_head = skb->data;
1da177e4 566 int skb_len = skb->len;
dbcb5855 567 unsigned int snaplen, res;
1da177e4
LT
568
569 if (skb->pkt_type == PACKET_LOOPBACK)
570 goto drop;
571
572 sk = pt->af_packet_priv;
573 po = pkt_sk(sk);
574
09ad9bc7 575 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
576 goto drop;
577
1da177e4
LT
578 skb->dev = dev;
579
3b04ddde 580 if (dev->header_ops) {
1da177e4 581 /* The device has an explicit notion of ll header,
62ab0812
ED
582 * exported to higher levels.
583 *
584 * Otherwise, the device hides details of its frame
585 * structure, so that corresponding packet head is
586 * never delivered to user.
1da177e4
LT
587 */
588 if (sk->sk_type != SOCK_DGRAM)
98e399f8 589 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
590 else if (skb->pkt_type == PACKET_OUTGOING) {
591 /* Special case: outgoing packets have ll header at head */
bbe735e4 592 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
593 }
594 }
595
596 snaplen = skb->len;
597
dbcb5855
DM
598 res = run_filter(skb, sk, snaplen);
599 if (!res)
fda9ef5d 600 goto drop_n_restore;
dbcb5855
DM
601 if (snaplen > res)
602 snaplen = res;
1da177e4
LT
603
604 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
605 (unsigned)sk->sk_rcvbuf)
606 goto drop_n_acct;
607
608 if (skb_shared(skb)) {
609 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
610 if (nskb == NULL)
611 goto drop_n_acct;
612
613 if (skb_head != skb->data) {
614 skb->data = skb_head;
615 skb->len = skb_len;
616 }
617 kfree_skb(skb);
618 skb = nskb;
619 }
620
ffbc6111
HX
621 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
622 sizeof(skb->cb));
623
624 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
625 sll->sll_family = AF_PACKET;
626 sll->sll_hatype = dev->type;
627 sll->sll_protocol = skb->protocol;
628 sll->sll_pkttype = skb->pkt_type;
8032b464 629 if (unlikely(po->origdev))
80feaacb
PWJ
630 sll->sll_ifindex = orig_dev->ifindex;
631 else
632 sll->sll_ifindex = dev->ifindex;
1da177e4 633
b95cce35 634 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 635
ffbc6111 636 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 637
1da177e4
LT
638 if (pskb_trim(skb, snaplen))
639 goto drop_n_acct;
640
641 skb_set_owner_r(skb, sk);
642 skb->dev = NULL;
adf30907 643 skb_dst_drop(skb);
1da177e4 644
84531c24
PO
645 /* drop conntrack reference */
646 nf_reset(skb);
647
1da177e4
LT
648 spin_lock(&sk->sk_receive_queue.lock);
649 po->stats.tp_packets++;
3b885787 650 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
651 __skb_queue_tail(&sk->sk_receive_queue, skb);
652 spin_unlock(&sk->sk_receive_queue.lock);
653 sk->sk_data_ready(sk, skb->len);
654 return 0;
655
656drop_n_acct:
3b885787 657 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
658
659drop_n_restore:
660 if (skb_head != skb->data && skb_shared(skb)) {
661 skb->data = skb_head;
662 skb->len = skb_len;
663 }
664drop:
ead2ceb0 665 consume_skb(skb);
1da177e4
LT
666 return 0;
667}
668
40d4e3df
ED
669static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
670 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
671{
672 struct sock *sk;
673 struct packet_sock *po;
674 struct sockaddr_ll *sll;
bbd6ef87
PM
675 union {
676 struct tpacket_hdr *h1;
677 struct tpacket2_hdr *h2;
678 void *raw;
679 } h;
40d4e3df 680 u8 *skb_head = skb->data;
1da177e4 681 int skb_len = skb->len;
dbcb5855 682 unsigned int snaplen, res;
1da177e4 683 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 684 unsigned short macoff, netoff, hdrlen;
1da177e4 685 struct sk_buff *copy_skb = NULL;
b7aa0bf7 686 struct timeval tv;
bbd6ef87 687 struct timespec ts;
614f60fa 688 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
689
690 if (skb->pkt_type == PACKET_LOOPBACK)
691 goto drop;
692
693 sk = pt->af_packet_priv;
694 po = pkt_sk(sk);
695
09ad9bc7 696 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
697 goto drop;
698
3b04ddde 699 if (dev->header_ops) {
1da177e4 700 if (sk->sk_type != SOCK_DGRAM)
98e399f8 701 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
702 else if (skb->pkt_type == PACKET_OUTGOING) {
703 /* Special case: outgoing packets have ll header at head */
bbe735e4 704 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
705 }
706 }
707
8dc41944
HX
708 if (skb->ip_summed == CHECKSUM_PARTIAL)
709 status |= TP_STATUS_CSUMNOTREADY;
710
1da177e4
LT
711 snaplen = skb->len;
712
dbcb5855
DM
713 res = run_filter(skb, sk, snaplen);
714 if (!res)
fda9ef5d 715 goto drop_n_restore;
dbcb5855
DM
716 if (snaplen > res)
717 snaplen = res;
1da177e4
LT
718
719 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
720 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
721 po->tp_reserve;
1da177e4 722 } else {
bbe735e4 723 unsigned maclen = skb_network_offset(skb);
bbd6ef87 724 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
725 (maclen < 16 ? 16 : maclen)) +
726 po->tp_reserve;
1da177e4
LT
727 macoff = netoff - maclen;
728 }
729
69e3c75f 730 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
731 if (po->copy_thresh &&
732 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
733 (unsigned)sk->sk_rcvbuf) {
734 if (skb_shared(skb)) {
735 copy_skb = skb_clone(skb, GFP_ATOMIC);
736 } else {
737 copy_skb = skb_get(skb);
738 skb_head = skb->data;
739 }
740 if (copy_skb)
741 skb_set_owner_r(copy_skb, sk);
742 }
69e3c75f 743 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
744 if ((int)snaplen < 0)
745 snaplen = 0;
746 }
1da177e4
LT
747
748 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 749 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 750 if (!h.raw)
1da177e4 751 goto ring_is_full;
69e3c75f 752 packet_increment_head(&po->rx_ring);
1da177e4
LT
753 po->stats.tp_packets++;
754 if (copy_skb) {
755 status |= TP_STATUS_COPY;
756 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
757 }
758 if (!po->stats.tp_drops)
759 status &= ~TP_STATUS_LOSING;
760 spin_unlock(&sk->sk_receive_queue.lock);
761
bbd6ef87 762 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 763
bbd6ef87
PM
764 switch (po->tp_version) {
765 case TPACKET_V1:
766 h.h1->tp_len = skb->len;
767 h.h1->tp_snaplen = snaplen;
768 h.h1->tp_mac = macoff;
769 h.h1->tp_net = netoff;
614f60fa
SM
770 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
771 && shhwtstamps->syststamp.tv64)
772 tv = ktime_to_timeval(shhwtstamps->syststamp);
773 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
774 && shhwtstamps->hwtstamp.tv64)
775 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
776 else if (skb->tstamp.tv64)
bbd6ef87
PM
777 tv = ktime_to_timeval(skb->tstamp);
778 else
779 do_gettimeofday(&tv);
780 h.h1->tp_sec = tv.tv_sec;
781 h.h1->tp_usec = tv.tv_usec;
782 hdrlen = sizeof(*h.h1);
783 break;
784 case TPACKET_V2:
785 h.h2->tp_len = skb->len;
786 h.h2->tp_snaplen = snaplen;
787 h.h2->tp_mac = macoff;
788 h.h2->tp_net = netoff;
614f60fa
SM
789 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
790 && shhwtstamps->syststamp.tv64)
791 ts = ktime_to_timespec(shhwtstamps->syststamp);
792 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
793 && shhwtstamps->hwtstamp.tv64)
794 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
795 else if (skb->tstamp.tv64)
bbd6ef87
PM
796 ts = ktime_to_timespec(skb->tstamp);
797 else
798 getnstimeofday(&ts);
799 h.h2->tp_sec = ts.tv_sec;
800 h.h2->tp_nsec = ts.tv_nsec;
a3bcc23e
BG
801 if (vlan_tx_tag_present(skb)) {
802 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
803 status |= TP_STATUS_VLAN_VALID;
804 } else {
805 h.h2->tp_vlan_tci = 0;
806 }
bbd6ef87
PM
807 hdrlen = sizeof(*h.h2);
808 break;
809 default:
810 BUG();
811 }
1da177e4 812
bbd6ef87 813 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 814 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
815 sll->sll_family = AF_PACKET;
816 sll->sll_hatype = dev->type;
817 sll->sll_protocol = skb->protocol;
818 sll->sll_pkttype = skb->pkt_type;
8032b464 819 if (unlikely(po->origdev))
80feaacb
PWJ
820 sll->sll_ifindex = orig_dev->ifindex;
821 else
822 sll->sll_ifindex = dev->ifindex;
1da177e4 823
bbd6ef87 824 __packet_set_status(po, h.raw, status);
e16aa207 825 smp_mb();
f6dafa95 826#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1da177e4 827 {
0af55bb5
CG
828 u8 *start, *end;
829
830 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
831 for (start = h.raw; start < end; start += PAGE_SIZE)
832 flush_dcache_page(pgv_to_page(start));
1da177e4 833 }
f6dafa95 834#endif
1da177e4
LT
835
836 sk->sk_data_ready(sk, 0);
837
838drop_n_restore:
839 if (skb_head != skb->data && skb_shared(skb)) {
840 skb->data = skb_head;
841 skb->len = skb_len;
842 }
843drop:
1ce4f28b 844 kfree_skb(skb);
1da177e4
LT
845 return 0;
846
847ring_is_full:
848 po->stats.tp_drops++;
849 spin_unlock(&sk->sk_receive_queue.lock);
850
851 sk->sk_data_ready(sk, 0);
acb5d75b 852 kfree_skb(copy_skb);
1da177e4
LT
853 goto drop_n_restore;
854}
855
69e3c75f
JB
856static void tpacket_destruct_skb(struct sk_buff *skb)
857{
858 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 859 void *ph;
1da177e4 860
69e3c75f 861 BUG_ON(skb == NULL);
1da177e4 862
69e3c75f
JB
863 if (likely(po->tx_ring.pg_vec)) {
864 ph = skb_shinfo(skb)->destructor_arg;
865 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
866 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
867 atomic_dec(&po->tx_ring.pending);
868 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
869 }
870
871 sock_wfree(skb);
872}
873
40d4e3df
ED
874static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
875 void *frame, struct net_device *dev, int size_max,
876 __be16 proto, unsigned char *addr)
69e3c75f
JB
877{
878 union {
879 struct tpacket_hdr *h1;
880 struct tpacket2_hdr *h2;
881 void *raw;
882 } ph;
883 int to_write, offset, len, tp_len, nr_frags, len_max;
884 struct socket *sock = po->sk.sk_socket;
885 struct page *page;
886 void *data;
887 int err;
888
889 ph.raw = frame;
890
891 skb->protocol = proto;
892 skb->dev = dev;
893 skb->priority = po->sk.sk_priority;
2d37a186 894 skb->mark = po->sk.sk_mark;
69e3c75f
JB
895 skb_shinfo(skb)->destructor_arg = ph.raw;
896
897 switch (po->tp_version) {
898 case TPACKET_V2:
899 tp_len = ph.h2->tp_len;
900 break;
901 default:
902 tp_len = ph.h1->tp_len;
903 break;
904 }
905 if (unlikely(tp_len > size_max)) {
40d4e3df 906 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
907 return -EMSGSIZE;
908 }
909
910 skb_reserve(skb, LL_RESERVED_SPACE(dev));
911 skb_reset_network_header(skb);
912
913 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
914 to_write = tp_len;
915
916 if (sock->type == SOCK_DGRAM) {
917 err = dev_hard_header(skb, dev, ntohs(proto), addr,
918 NULL, tp_len);
919 if (unlikely(err < 0))
920 return -EINVAL;
40d4e3df 921 } else if (dev->hard_header_len) {
69e3c75f
JB
922 /* net device doesn't like empty head */
923 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
924 pr_err("packet size is too short (%d < %d)\n",
925 tp_len, dev->hard_header_len);
69e3c75f
JB
926 return -EINVAL;
927 }
928
929 skb_push(skb, dev->hard_header_len);
930 err = skb_store_bits(skb, 0, data,
931 dev->hard_header_len);
932 if (unlikely(err))
933 return err;
934
935 data += dev->hard_header_len;
936 to_write -= dev->hard_header_len;
937 }
938
939 err = -EFAULT;
69e3c75f
JB
940 offset = offset_in_page(data);
941 len_max = PAGE_SIZE - offset;
942 len = ((to_write > len_max) ? len_max : to_write);
943
944 skb->data_len = to_write;
945 skb->len += to_write;
946 skb->truesize += to_write;
947 atomic_add(to_write, &po->sk.sk_wmem_alloc);
948
949 while (likely(to_write)) {
950 nr_frags = skb_shinfo(skb)->nr_frags;
951
952 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
953 pr_err("Packet exceed the number of skb frags(%lu)\n",
954 MAX_SKB_FRAGS);
69e3c75f
JB
955 return -EFAULT;
956 }
957
0af55bb5
CG
958 page = pgv_to_page(data);
959 data += len;
69e3c75f
JB
960 flush_dcache_page(page);
961 get_page(page);
0af55bb5 962 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
963 to_write -= len;
964 offset = 0;
965 len_max = PAGE_SIZE;
966 len = ((to_write > len_max) ? len_max : to_write);
967 }
968
969 return tp_len;
970}
971
972static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
973{
69e3c75f
JB
974 struct sk_buff *skb;
975 struct net_device *dev;
976 __be16 proto;
977 int ifindex, err, reserve = 0;
40d4e3df
ED
978 void *ph;
979 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
980 int tp_len, size_max;
981 unsigned char *addr;
982 int len_sum = 0;
983 int status = 0;
984
69e3c75f
JB
985 mutex_lock(&po->pg_vec_lock);
986
987 err = -EBUSY;
988 if (saddr == NULL) {
989 ifindex = po->ifindex;
990 proto = po->num;
991 addr = NULL;
992 } else {
993 err = -EINVAL;
994 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
995 goto out;
996 if (msg->msg_namelen < (saddr->sll_halen
997 + offsetof(struct sockaddr_ll,
998 sll_addr)))
999 goto out;
1000 ifindex = saddr->sll_ifindex;
1001 proto = saddr->sll_protocol;
1002 addr = saddr->sll_addr;
1003 }
1004
1005 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
1006 err = -ENXIO;
1007 if (unlikely(dev == NULL))
1008 goto out;
1009
1010 reserve = dev->hard_header_len;
1011
1012 err = -ENETDOWN;
1013 if (unlikely(!(dev->flags & IFF_UP)))
1014 goto out_put;
1015
1016 size_max = po->tx_ring.frame_size
b5dd884e 1017 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1018
1019 if (size_max > dev->mtu + reserve)
1020 size_max = dev->mtu + reserve;
1021
1022 do {
1023 ph = packet_current_frame(po, &po->tx_ring,
1024 TP_STATUS_SEND_REQUEST);
1025
1026 if (unlikely(ph == NULL)) {
1027 schedule();
1028 continue;
1029 }
1030
1031 status = TP_STATUS_SEND_REQUEST;
1032 skb = sock_alloc_send_skb(&po->sk,
1033 LL_ALLOCATED_SPACE(dev)
1034 + sizeof(struct sockaddr_ll),
1035 0, &err);
1036
1037 if (unlikely(skb == NULL))
1038 goto out_status;
1039
1040 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1041 addr);
1042
1043 if (unlikely(tp_len < 0)) {
1044 if (po->tp_loss) {
1045 __packet_set_status(po, ph,
1046 TP_STATUS_AVAILABLE);
1047 packet_increment_head(&po->tx_ring);
1048 kfree_skb(skb);
1049 continue;
1050 } else {
1051 status = TP_STATUS_WRONG_FORMAT;
1052 err = tp_len;
1053 goto out_status;
1054 }
1055 }
1056
1057 skb->destructor = tpacket_destruct_skb;
1058 __packet_set_status(po, ph, TP_STATUS_SENDING);
1059 atomic_inc(&po->tx_ring.pending);
1060
1061 status = TP_STATUS_SEND_REQUEST;
1062 err = dev_queue_xmit(skb);
eb70df13
JP
1063 if (unlikely(err > 0)) {
1064 err = net_xmit_errno(err);
1065 if (err && __packet_get_status(po, ph) ==
1066 TP_STATUS_AVAILABLE) {
1067 /* skb was destructed already */
1068 skb = NULL;
1069 goto out_status;
1070 }
1071 /*
1072 * skb was dropped but not destructed yet;
1073 * let's treat it like congestion or err < 0
1074 */
1075 err = 0;
1076 }
69e3c75f
JB
1077 packet_increment_head(&po->tx_ring);
1078 len_sum += tp_len;
f64f9e71
JP
1079 } while (likely((ph != NULL) ||
1080 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1081 (atomic_read(&po->tx_ring.pending))))
1082 );
69e3c75f
JB
1083
1084 err = len_sum;
1085 goto out_put;
1086
69e3c75f
JB
1087out_status:
1088 __packet_set_status(po, ph, status);
1089 kfree_skb(skb);
1090out_put:
1091 dev_put(dev);
1092out:
1093 mutex_unlock(&po->pg_vec_lock);
1094 return err;
1095}
69e3c75f 1096
bfd5f4a3
SS
1097static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1098 size_t reserve, size_t len,
1099 size_t linear, int noblock,
1100 int *err)
1101{
1102 struct sk_buff *skb;
1103
1104 /* Under a page? Don't bother with paged skb. */
1105 if (prepad + len < PAGE_SIZE || !linear)
1106 linear = len;
1107
1108 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1109 err);
1110 if (!skb)
1111 return NULL;
1112
1113 skb_reserve(skb, reserve);
1114 skb_put(skb, linear);
1115 skb->data_len = len - linear;
1116 skb->len += len - linear;
1117
1118 return skb;
1119}
1120
69e3c75f 1121static int packet_snd(struct socket *sock,
1da177e4
LT
1122 struct msghdr *msg, size_t len)
1123{
1124 struct sock *sk = sock->sk;
40d4e3df 1125 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1126 struct sk_buff *skb;
1127 struct net_device *dev;
0e11c91e 1128 __be16 proto;
1da177e4
LT
1129 unsigned char *addr;
1130 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1131 struct virtio_net_hdr vnet_hdr = { 0 };
1132 int offset = 0;
1133 int vnet_hdr_len;
1134 struct packet_sock *po = pkt_sk(sk);
1135 unsigned short gso_type = 0;
1da177e4
LT
1136
1137 /*
1ce4f28b 1138 * Get and verify the address.
1da177e4 1139 */
1ce4f28b 1140
1da177e4 1141 if (saddr == NULL) {
1da177e4
LT
1142 ifindex = po->ifindex;
1143 proto = po->num;
1144 addr = NULL;
1145 } else {
1146 err = -EINVAL;
1147 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1148 goto out;
0fb375fb
EB
1149 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1150 goto out;
1da177e4
LT
1151 ifindex = saddr->sll_ifindex;
1152 proto = saddr->sll_protocol;
1153 addr = saddr->sll_addr;
1154 }
1155
1156
3b1e0a65 1157 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1158 err = -ENXIO;
1159 if (dev == NULL)
1160 goto out_unlock;
1161 if (sock->type == SOCK_RAW)
1162 reserve = dev->hard_header_len;
1163
d5e76b0a
DM
1164 err = -ENETDOWN;
1165 if (!(dev->flags & IFF_UP))
1166 goto out_unlock;
1167
bfd5f4a3
SS
1168 if (po->has_vnet_hdr) {
1169 vnet_hdr_len = sizeof(vnet_hdr);
1170
1171 err = -EINVAL;
1172 if (len < vnet_hdr_len)
1173 goto out_unlock;
1174
1175 len -= vnet_hdr_len;
1176
1177 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1178 vnet_hdr_len);
1179 if (err < 0)
1180 goto out_unlock;
1181
1182 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1183 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1184 vnet_hdr.hdr_len))
1185 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1186 vnet_hdr.csum_offset + 2;
1187
1188 err = -EINVAL;
1189 if (vnet_hdr.hdr_len > len)
1190 goto out_unlock;
1191
1192 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1193 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1194 case VIRTIO_NET_HDR_GSO_TCPV4:
1195 gso_type = SKB_GSO_TCPV4;
1196 break;
1197 case VIRTIO_NET_HDR_GSO_TCPV6:
1198 gso_type = SKB_GSO_TCPV6;
1199 break;
1200 case VIRTIO_NET_HDR_GSO_UDP:
1201 gso_type = SKB_GSO_UDP;
1202 break;
1203 default:
1204 goto out_unlock;
1205 }
1206
1207 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1208 gso_type |= SKB_GSO_TCP_ECN;
1209
1210 if (vnet_hdr.gso_size == 0)
1211 goto out_unlock;
1212
1213 }
1214 }
1215
1da177e4 1216 err = -EMSGSIZE;
57f89bfa 1217 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN))
1da177e4
LT
1218 goto out_unlock;
1219
bfd5f4a3
SS
1220 err = -ENOBUFS;
1221 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1222 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1223 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1224 if (skb == NULL)
1da177e4
LT
1225 goto out_unlock;
1226
bfd5f4a3 1227 skb_set_network_header(skb, reserve);
1da177e4 1228
0c4e8581
SH
1229 err = -EINVAL;
1230 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1231 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1232 goto out_free;
1da177e4
LT
1233
1234 /* Returns -EFAULT on error */
bfd5f4a3 1235 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1236 if (err)
1237 goto out_free;
2244d07b 1238 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1239 if (err < 0)
1240 goto out_free;
1da177e4 1241
57f89bfa
BG
1242 if (!gso_type && (len > dev->mtu + reserve)) {
1243 /* Earlier code assumed this would be a VLAN pkt,
1244 * double-check this now that we have the actual
1245 * packet in hand.
1246 */
1247 struct ethhdr *ehdr;
1248 skb_reset_mac_header(skb);
1249 ehdr = eth_hdr(skb);
1250 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1251 err = -EMSGSIZE;
1252 goto out_free;
1253 }
1254 }
1255
1da177e4
LT
1256 skb->protocol = proto;
1257 skb->dev = dev;
1258 skb->priority = sk->sk_priority;
2d37a186 1259 skb->mark = sk->sk_mark;
1da177e4 1260
bfd5f4a3
SS
1261 if (po->has_vnet_hdr) {
1262 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1263 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1264 vnet_hdr.csum_offset)) {
1265 err = -EINVAL;
1266 goto out_free;
1267 }
1268 }
1269
1270 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1271 skb_shinfo(skb)->gso_type = gso_type;
1272
1273 /* Header must be checked, and gso_segs computed. */
1274 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1275 skb_shinfo(skb)->gso_segs = 0;
1276
1277 len += vnet_hdr_len;
1278 }
1279
1da177e4
LT
1280 /*
1281 * Now send it
1282 */
1283
1284 err = dev_queue_xmit(skb);
1285 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1286 goto out_unlock;
1287
1288 dev_put(dev);
1289
40d4e3df 1290 return len;
1da177e4
LT
1291
1292out_free:
1293 kfree_skb(skb);
1294out_unlock:
1295 if (dev)
1296 dev_put(dev);
1297out:
1298 return err;
1299}
1300
69e3c75f
JB
1301static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1302 struct msghdr *msg, size_t len)
1303{
69e3c75f
JB
1304 struct sock *sk = sock->sk;
1305 struct packet_sock *po = pkt_sk(sk);
1306 if (po->tx_ring.pg_vec)
1307 return tpacket_snd(po, msg);
1308 else
69e3c75f
JB
1309 return packet_snd(sock, msg, len);
1310}
1311
1da177e4
LT
1312/*
1313 * Close a PACKET socket. This is fairly simple. We immediately go
1314 * to 'closed' state and remove our protocol entry in the device list.
1315 */
1316
1317static int packet_release(struct socket *sock)
1318{
1319 struct sock *sk = sock->sk;
1320 struct packet_sock *po;
d12d01d6 1321 struct net *net;
69e3c75f 1322 struct tpacket_req req;
1da177e4
LT
1323
1324 if (!sk)
1325 return 0;
1326
3b1e0a65 1327 net = sock_net(sk);
1da177e4
LT
1328 po = pkt_sk(sk);
1329
808f5114 1330 spin_lock_bh(&net->packet.sklist_lock);
1331 sk_del_node_init_rcu(sk);
920de804 1332 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1333 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1334
808f5114 1335 spin_lock(&po->bind_lock);
1da177e4
LT
1336 if (po->running) {
1337 /*
808f5114 1338 * Remove from protocol table
1da177e4 1339 */
1da177e4
LT
1340 po->running = 0;
1341 po->num = 0;
808f5114 1342 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1343 __sock_put(sk);
1344 }
808f5114 1345 spin_unlock(&po->bind_lock);
1da177e4 1346
1da177e4 1347 packet_flush_mclist(sk);
1da177e4 1348
69e3c75f
JB
1349 memset(&req, 0, sizeof(req));
1350
1351 if (po->rx_ring.pg_vec)
1352 packet_set_ring(sk, &req, 1, 0);
1353
1354 if (po->tx_ring.pg_vec)
1355 packet_set_ring(sk, &req, 1, 1);
1da177e4 1356
808f5114 1357 synchronize_net();
1da177e4
LT
1358 /*
1359 * Now the socket is dead. No more input will appear.
1360 */
1da177e4
LT
1361 sock_orphan(sk);
1362 sock->sk = NULL;
1363
1364 /* Purge queues */
1365
1366 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1367 sk_refcnt_debug_release(sk);
1da177e4
LT
1368
1369 sock_put(sk);
1370 return 0;
1371}
1372
1373/*
1374 * Attach a packet hook.
1375 */
1376
0e11c91e 1377static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1378{
1379 struct packet_sock *po = pkt_sk(sk);
1380 /*
1381 * Detach an existing hook if present.
1382 */
1383
1384 lock_sock(sk);
1385
1386 spin_lock(&po->bind_lock);
1387 if (po->running) {
1388 __sock_put(sk);
1389 po->running = 0;
1390 po->num = 0;
1391 spin_unlock(&po->bind_lock);
1392 dev_remove_pack(&po->prot_hook);
1393 spin_lock(&po->bind_lock);
1394 }
1395
1396 po->num = protocol;
1397 po->prot_hook.type = protocol;
1398 po->prot_hook.dev = dev;
1399
1400 po->ifindex = dev ? dev->ifindex : 0;
1401
1402 if (protocol == 0)
1403 goto out_unlock;
1404
be85d4ad 1405 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1406 dev_add_pack(&po->prot_hook);
1407 sock_hold(sk);
1408 po->running = 1;
be85d4ad
UT
1409 } else {
1410 sk->sk_err = ENETDOWN;
1411 if (!sock_flag(sk, SOCK_DEAD))
1412 sk->sk_error_report(sk);
1da177e4
LT
1413 }
1414
1415out_unlock:
1416 spin_unlock(&po->bind_lock);
1417 release_sock(sk);
1418 return 0;
1419}
1420
1421/*
1422 * Bind a packet socket to a device
1423 */
1424
40d4e3df
ED
1425static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1426 int addr_len)
1da177e4 1427{
40d4e3df 1428 struct sock *sk = sock->sk;
1da177e4
LT
1429 char name[15];
1430 struct net_device *dev;
1431 int err = -ENODEV;
1ce4f28b 1432
1da177e4
LT
1433 /*
1434 * Check legality
1435 */
1ce4f28b 1436
8ae55f04 1437 if (addr_len != sizeof(struct sockaddr))
1da177e4 1438 return -EINVAL;
40d4e3df 1439 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1440
3b1e0a65 1441 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1442 if (dev) {
1443 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1444 dev_put(dev);
1445 }
1446 return err;
1447}
1da177e4
LT
1448
1449static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1450{
40d4e3df
ED
1451 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1452 struct sock *sk = sock->sk;
1da177e4
LT
1453 struct net_device *dev = NULL;
1454 int err;
1455
1456
1457 /*
1458 * Check legality
1459 */
1ce4f28b 1460
1da177e4
LT
1461 if (addr_len < sizeof(struct sockaddr_ll))
1462 return -EINVAL;
1463 if (sll->sll_family != AF_PACKET)
1464 return -EINVAL;
1465
1466 if (sll->sll_ifindex) {
1467 err = -ENODEV;
3b1e0a65 1468 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1469 if (dev == NULL)
1470 goto out;
1471 }
1472 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1473 if (dev)
1474 dev_put(dev);
1475
1476out:
1477 return err;
1478}
1479
1480static struct proto packet_proto = {
1481 .name = "PACKET",
1482 .owner = THIS_MODULE,
1483 .obj_size = sizeof(struct packet_sock),
1484};
1485
1486/*
1ce4f28b 1487 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1488 */
1489
3f378b68
EP
1490static int packet_create(struct net *net, struct socket *sock, int protocol,
1491 int kern)
1da177e4
LT
1492{
1493 struct sock *sk;
1494 struct packet_sock *po;
0e11c91e 1495 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1496 int err;
1497
1498 if (!capable(CAP_NET_RAW))
1499 return -EPERM;
be02097c
DM
1500 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1501 sock->type != SOCK_PACKET)
1da177e4
LT
1502 return -ESOCKTNOSUPPORT;
1503
1504 sock->state = SS_UNCONNECTED;
1505
1506 err = -ENOBUFS;
6257ff21 1507 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1508 if (sk == NULL)
1509 goto out;
1510
1511 sock->ops = &packet_ops;
1da177e4
LT
1512 if (sock->type == SOCK_PACKET)
1513 sock->ops = &packet_ops_spkt;
be02097c 1514
1da177e4
LT
1515 sock_init_data(sock, sk);
1516
1517 po = pkt_sk(sk);
1518 sk->sk_family = PF_PACKET;
0e11c91e 1519 po->num = proto;
1da177e4
LT
1520
1521 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1522 sk_refcnt_debug_inc(sk);
1da177e4
LT
1523
1524 /*
1525 * Attach a protocol block
1526 */
1527
1528 spin_lock_init(&po->bind_lock);
905db440 1529 mutex_init(&po->pg_vec_lock);
1da177e4 1530 po->prot_hook.func = packet_rcv;
be02097c 1531
1da177e4
LT
1532 if (sock->type == SOCK_PACKET)
1533 po->prot_hook.func = packet_rcv_spkt;
be02097c 1534
1da177e4
LT
1535 po->prot_hook.af_packet_priv = sk;
1536
0e11c91e
AV
1537 if (proto) {
1538 po->prot_hook.type = proto;
1da177e4
LT
1539 dev_add_pack(&po->prot_hook);
1540 sock_hold(sk);
1541 po->running = 1;
1542 }
1543
808f5114 1544 spin_lock_bh(&net->packet.sklist_lock);
1545 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1546 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1547 spin_unlock_bh(&net->packet.sklist_lock);
1548
40d4e3df 1549 return 0;
1da177e4
LT
1550out:
1551 return err;
1552}
1553
ed85b565
RC
1554static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1555{
1556 struct sock_exterr_skb *serr;
1557 struct sk_buff *skb, *skb2;
1558 int copied, err;
1559
1560 err = -EAGAIN;
1561 skb = skb_dequeue(&sk->sk_error_queue);
1562 if (skb == NULL)
1563 goto out;
1564
1565 copied = skb->len;
1566 if (copied > len) {
1567 msg->msg_flags |= MSG_TRUNC;
1568 copied = len;
1569 }
1570 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1571 if (err)
1572 goto out_free_skb;
1573
1574 sock_recv_timestamp(msg, sk, skb);
1575
1576 serr = SKB_EXT_ERR(skb);
1577 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1578 sizeof(serr->ee), &serr->ee);
1579
1580 msg->msg_flags |= MSG_ERRQUEUE;
1581 err = copied;
1582
1583 /* Reset and regenerate socket error */
1584 spin_lock_bh(&sk->sk_error_queue.lock);
1585 sk->sk_err = 0;
1586 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1587 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1588 spin_unlock_bh(&sk->sk_error_queue.lock);
1589 sk->sk_error_report(sk);
1590 } else
1591 spin_unlock_bh(&sk->sk_error_queue.lock);
1592
1593out_free_skb:
1594 kfree_skb(skb);
1595out:
1596 return err;
1597}
1598
1da177e4
LT
1599/*
1600 * Pull a packet from our receive queue and hand it to the user.
1601 * If necessary we block.
1602 */
1603
1604static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1605 struct msghdr *msg, size_t len, int flags)
1606{
1607 struct sock *sk = sock->sk;
1608 struct sk_buff *skb;
1609 int copied, err;
0fb375fb 1610 struct sockaddr_ll *sll;
bfd5f4a3 1611 int vnet_hdr_len = 0;
1da177e4
LT
1612
1613 err = -EINVAL;
ed85b565 1614 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1615 goto out;
1616
1617#if 0
1618 /* What error should we return now? EUNATTACH? */
1619 if (pkt_sk(sk)->ifindex < 0)
1620 return -ENODEV;
1621#endif
1622
ed85b565
RC
1623 if (flags & MSG_ERRQUEUE) {
1624 err = packet_recv_error(sk, msg, len);
1625 goto out;
1626 }
1627
1da177e4
LT
1628 /*
1629 * Call the generic datagram receiver. This handles all sorts
1630 * of horrible races and re-entrancy so we can forget about it
1631 * in the protocol layers.
1632 *
1633 * Now it will return ENETDOWN, if device have just gone down,
1634 * but then it will block.
1635 */
1636
40d4e3df 1637 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1638
1639 /*
1ce4f28b 1640 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1641 * handles the blocking we don't see and worry about blocking
1642 * retries.
1643 */
1644
8ae55f04 1645 if (skb == NULL)
1da177e4
LT
1646 goto out;
1647
bfd5f4a3
SS
1648 if (pkt_sk(sk)->has_vnet_hdr) {
1649 struct virtio_net_hdr vnet_hdr = { 0 };
1650
1651 err = -EINVAL;
1652 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1653 if (len < vnet_hdr_len)
bfd5f4a3
SS
1654 goto out_free;
1655
1f18b717
MK
1656 len -= vnet_hdr_len;
1657
bfd5f4a3
SS
1658 if (skb_is_gso(skb)) {
1659 struct skb_shared_info *sinfo = skb_shinfo(skb);
1660
1661 /* This is a hint as to how much should be linear. */
1662 vnet_hdr.hdr_len = skb_headlen(skb);
1663 vnet_hdr.gso_size = sinfo->gso_size;
1664 if (sinfo->gso_type & SKB_GSO_TCPV4)
1665 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1666 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1667 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1668 else if (sinfo->gso_type & SKB_GSO_UDP)
1669 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1670 else if (sinfo->gso_type & SKB_GSO_FCOE)
1671 goto out_free;
1672 else
1673 BUG();
1674 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1675 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1676 } else
1677 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1678
1679 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1680 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
55508d60 1681 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
bfd5f4a3
SS
1682 vnet_hdr.csum_offset = skb->csum_offset;
1683 } /* else everything is zero */
1684
1685 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1686 vnet_hdr_len);
1687 if (err < 0)
1688 goto out_free;
1689 }
1690
0fb375fb
EB
1691 /*
1692 * If the address length field is there to be filled in, we fill
1693 * it in now.
1694 */
1695
ffbc6111 1696 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1697 if (sock->type == SOCK_PACKET)
1698 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1699 else
1700 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1701
1da177e4
LT
1702 /*
1703 * You lose any data beyond the buffer you gave. If it worries a
1704 * user program they can ask the device for its MTU anyway.
1705 */
1706
1707 copied = skb->len;
40d4e3df
ED
1708 if (copied > len) {
1709 copied = len;
1710 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1711 }
1712
1713 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1714 if (err)
1715 goto out_free;
1716
3b885787 1717 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1718
1719 if (msg->msg_name)
ffbc6111
HX
1720 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1721 msg->msg_namelen);
1da177e4 1722
8dc41944 1723 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1724 struct tpacket_auxdata aux;
1725
1726 aux.tp_status = TP_STATUS_USER;
1727 if (skb->ip_summed == CHECKSUM_PARTIAL)
1728 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1729 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1730 aux.tp_snaplen = skb->len;
1731 aux.tp_mac = 0;
bbe735e4 1732 aux.tp_net = skb_network_offset(skb);
a3bcc23e
BG
1733 if (vlan_tx_tag_present(skb)) {
1734 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1735 aux.tp_status |= TP_STATUS_VLAN_VALID;
1736 } else {
1737 aux.tp_vlan_tci = 0;
1738 }
ffbc6111 1739 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1740 }
1741
1da177e4
LT
1742 /*
1743 * Free or return the buffer as appropriate. Again this
1744 * hides all the races and re-entrancy issues from us.
1745 */
bfd5f4a3 1746 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1747
1748out_free:
1749 skb_free_datagram(sk, skb);
1750out:
1751 return err;
1752}
1753
1da177e4
LT
1754static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1755 int *uaddr_len, int peer)
1756{
1757 struct net_device *dev;
1758 struct sock *sk = sock->sk;
1759
1760 if (peer)
1761 return -EOPNOTSUPP;
1762
1763 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1764 rcu_read_lock();
1765 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1766 if (dev)
67286640 1767 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1768 else
1da177e4 1769 memset(uaddr->sa_data, 0, 14);
654d1f8a 1770 rcu_read_unlock();
1da177e4
LT
1771 *uaddr_len = sizeof(*uaddr);
1772
1773 return 0;
1774}
1da177e4
LT
1775
1776static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1777 int *uaddr_len, int peer)
1778{
1779 struct net_device *dev;
1780 struct sock *sk = sock->sk;
1781 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1782 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1783
1784 if (peer)
1785 return -EOPNOTSUPP;
1786
1787 sll->sll_family = AF_PACKET;
1788 sll->sll_ifindex = po->ifindex;
1789 sll->sll_protocol = po->num;
67286640 1790 sll->sll_pkttype = 0;
654d1f8a
ED
1791 rcu_read_lock();
1792 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1793 if (dev) {
1794 sll->sll_hatype = dev->type;
1795 sll->sll_halen = dev->addr_len;
1796 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1797 } else {
1798 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1799 sll->sll_halen = 0;
1800 }
654d1f8a 1801 rcu_read_unlock();
0fb375fb 1802 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1803
1804 return 0;
1805}
1806
2aeb0b88
WC
1807static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1808 int what)
1da177e4
LT
1809{
1810 switch (i->type) {
1811 case PACKET_MR_MULTICAST:
1162563f
JP
1812 if (i->alen != dev->addr_len)
1813 return -EINVAL;
1da177e4 1814 if (what > 0)
22bedad3 1815 return dev_mc_add(dev, i->addr);
1da177e4 1816 else
22bedad3 1817 return dev_mc_del(dev, i->addr);
1da177e4
LT
1818 break;
1819 case PACKET_MR_PROMISC:
2aeb0b88 1820 return dev_set_promiscuity(dev, what);
1da177e4
LT
1821 break;
1822 case PACKET_MR_ALLMULTI:
2aeb0b88 1823 return dev_set_allmulti(dev, what);
1da177e4 1824 break;
d95ed927 1825 case PACKET_MR_UNICAST:
1162563f
JP
1826 if (i->alen != dev->addr_len)
1827 return -EINVAL;
d95ed927 1828 if (what > 0)
a748ee24 1829 return dev_uc_add(dev, i->addr);
d95ed927 1830 else
a748ee24 1831 return dev_uc_del(dev, i->addr);
d95ed927 1832 break;
40d4e3df
ED
1833 default:
1834 break;
1da177e4 1835 }
2aeb0b88 1836 return 0;
1da177e4
LT
1837}
1838
1839static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1840{
40d4e3df 1841 for ( ; i; i = i->next) {
1da177e4
LT
1842 if (i->ifindex == dev->ifindex)
1843 packet_dev_mc(dev, i, what);
1844 }
1845}
1846
0fb375fb 1847static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1848{
1849 struct packet_sock *po = pkt_sk(sk);
1850 struct packet_mclist *ml, *i;
1851 struct net_device *dev;
1852 int err;
1853
1854 rtnl_lock();
1855
1856 err = -ENODEV;
3b1e0a65 1857 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1858 if (!dev)
1859 goto done;
1860
1861 err = -EINVAL;
1162563f 1862 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1863 goto done;
1864
1865 err = -ENOBUFS;
8b3a7005 1866 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1867 if (i == NULL)
1868 goto done;
1869
1870 err = 0;
1871 for (ml = po->mclist; ml; ml = ml->next) {
1872 if (ml->ifindex == mreq->mr_ifindex &&
1873 ml->type == mreq->mr_type &&
1874 ml->alen == mreq->mr_alen &&
1875 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1876 ml->count++;
1877 /* Free the new element ... */
1878 kfree(i);
1879 goto done;
1880 }
1881 }
1882
1883 i->type = mreq->mr_type;
1884 i->ifindex = mreq->mr_ifindex;
1885 i->alen = mreq->mr_alen;
1886 memcpy(i->addr, mreq->mr_address, i->alen);
1887 i->count = 1;
1888 i->next = po->mclist;
1889 po->mclist = i;
2aeb0b88
WC
1890 err = packet_dev_mc(dev, i, 1);
1891 if (err) {
1892 po->mclist = i->next;
1893 kfree(i);
1894 }
1da177e4
LT
1895
1896done:
1897 rtnl_unlock();
1898 return err;
1899}
1900
0fb375fb 1901static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1902{
1903 struct packet_mclist *ml, **mlp;
1904
1905 rtnl_lock();
1906
1907 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1908 if (ml->ifindex == mreq->mr_ifindex &&
1909 ml->type == mreq->mr_type &&
1910 ml->alen == mreq->mr_alen &&
1911 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1912 if (--ml->count == 0) {
1913 struct net_device *dev;
1914 *mlp = ml->next;
ad959e76
ED
1915 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1916 if (dev)
1da177e4 1917 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1918 kfree(ml);
1919 }
1920 rtnl_unlock();
1921 return 0;
1922 }
1923 }
1924 rtnl_unlock();
1925 return -EADDRNOTAVAIL;
1926}
1927
1928static void packet_flush_mclist(struct sock *sk)
1929{
1930 struct packet_sock *po = pkt_sk(sk);
1931 struct packet_mclist *ml;
1932
1933 if (!po->mclist)
1934 return;
1935
1936 rtnl_lock();
1937 while ((ml = po->mclist) != NULL) {
1938 struct net_device *dev;
1939
1940 po->mclist = ml->next;
ad959e76
ED
1941 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1942 if (dev != NULL)
1da177e4 1943 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1944 kfree(ml);
1945 }
1946 rtnl_unlock();
1947}
1da177e4
LT
1948
1949static int
b7058842 1950packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1951{
1952 struct sock *sk = sock->sk;
8dc41944 1953 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1954 int ret;
1955
1956 if (level != SOL_PACKET)
1957 return -ENOPROTOOPT;
1958
69e3c75f 1959 switch (optname) {
1ce4f28b 1960 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1961 case PACKET_DROP_MEMBERSHIP:
1962 {
0fb375fb
EB
1963 struct packet_mreq_max mreq;
1964 int len = optlen;
1965 memset(&mreq, 0, sizeof(mreq));
1966 if (len < sizeof(struct packet_mreq))
1da177e4 1967 return -EINVAL;
0fb375fb
EB
1968 if (len > sizeof(mreq))
1969 len = sizeof(mreq);
40d4e3df 1970 if (copy_from_user(&mreq, optval, len))
1da177e4 1971 return -EFAULT;
0fb375fb
EB
1972 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1973 return -EINVAL;
1da177e4
LT
1974 if (optname == PACKET_ADD_MEMBERSHIP)
1975 ret = packet_mc_add(sk, &mreq);
1976 else
1977 ret = packet_mc_drop(sk, &mreq);
1978 return ret;
1979 }
a2efcfa0 1980
1da177e4 1981 case PACKET_RX_RING:
69e3c75f 1982 case PACKET_TX_RING:
1da177e4
LT
1983 {
1984 struct tpacket_req req;
1985
40d4e3df 1986 if (optlen < sizeof(req))
1da177e4 1987 return -EINVAL;
bfd5f4a3
SS
1988 if (pkt_sk(sk)->has_vnet_hdr)
1989 return -EINVAL;
40d4e3df 1990 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1991 return -EFAULT;
69e3c75f 1992 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1993 }
1994 case PACKET_COPY_THRESH:
1995 {
1996 int val;
1997
40d4e3df 1998 if (optlen != sizeof(val))
1da177e4 1999 return -EINVAL;
40d4e3df 2000 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
2001 return -EFAULT;
2002
2003 pkt_sk(sk)->copy_thresh = val;
2004 return 0;
2005 }
bbd6ef87
PM
2006 case PACKET_VERSION:
2007 {
2008 int val;
2009
2010 if (optlen != sizeof(val))
2011 return -EINVAL;
69e3c75f 2012 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
2013 return -EBUSY;
2014 if (copy_from_user(&val, optval, sizeof(val)))
2015 return -EFAULT;
2016 switch (val) {
2017 case TPACKET_V1:
2018 case TPACKET_V2:
2019 po->tp_version = val;
2020 return 0;
2021 default:
2022 return -EINVAL;
2023 }
2024 }
8913336a
PM
2025 case PACKET_RESERVE:
2026 {
2027 unsigned int val;
2028
2029 if (optlen != sizeof(val))
2030 return -EINVAL;
69e3c75f 2031 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
2032 return -EBUSY;
2033 if (copy_from_user(&val, optval, sizeof(val)))
2034 return -EFAULT;
2035 po->tp_reserve = val;
2036 return 0;
2037 }
69e3c75f
JB
2038 case PACKET_LOSS:
2039 {
2040 unsigned int val;
2041
2042 if (optlen != sizeof(val))
2043 return -EINVAL;
2044 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2045 return -EBUSY;
2046 if (copy_from_user(&val, optval, sizeof(val)))
2047 return -EFAULT;
2048 po->tp_loss = !!val;
2049 return 0;
2050 }
8dc41944
HX
2051 case PACKET_AUXDATA:
2052 {
2053 int val;
2054
2055 if (optlen < sizeof(val))
2056 return -EINVAL;
2057 if (copy_from_user(&val, optval, sizeof(val)))
2058 return -EFAULT;
2059
2060 po->auxdata = !!val;
2061 return 0;
2062 }
80feaacb
PWJ
2063 case PACKET_ORIGDEV:
2064 {
2065 int val;
2066
2067 if (optlen < sizeof(val))
2068 return -EINVAL;
2069 if (copy_from_user(&val, optval, sizeof(val)))
2070 return -EFAULT;
2071
2072 po->origdev = !!val;
2073 return 0;
2074 }
bfd5f4a3
SS
2075 case PACKET_VNET_HDR:
2076 {
2077 int val;
2078
2079 if (sock->type != SOCK_RAW)
2080 return -EINVAL;
2081 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2082 return -EBUSY;
2083 if (optlen < sizeof(val))
2084 return -EINVAL;
2085 if (copy_from_user(&val, optval, sizeof(val)))
2086 return -EFAULT;
2087
2088 po->has_vnet_hdr = !!val;
2089 return 0;
2090 }
614f60fa
SM
2091 case PACKET_TIMESTAMP:
2092 {
2093 int val;
2094
2095 if (optlen != sizeof(val))
2096 return -EINVAL;
2097 if (copy_from_user(&val, optval, sizeof(val)))
2098 return -EFAULT;
2099
2100 po->tp_tstamp = val;
2101 return 0;
2102 }
1da177e4
LT
2103 default:
2104 return -ENOPROTOOPT;
2105 }
2106}
2107
2108static int packet_getsockopt(struct socket *sock, int level, int optname,
2109 char __user *optval, int __user *optlen)
2110{
2111 int len;
8dc41944 2112 int val;
1da177e4
LT
2113 struct sock *sk = sock->sk;
2114 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2115 void *data;
2116 struct tpacket_stats st;
1da177e4
LT
2117
2118 if (level != SOL_PACKET)
2119 return -ENOPROTOOPT;
2120
8ae55f04
KK
2121 if (get_user(len, optlen))
2122 return -EFAULT;
1da177e4
LT
2123
2124 if (len < 0)
2125 return -EINVAL;
1ce4f28b 2126
69e3c75f 2127 switch (optname) {
1da177e4 2128 case PACKET_STATISTICS:
1da177e4
LT
2129 if (len > sizeof(struct tpacket_stats))
2130 len = sizeof(struct tpacket_stats);
2131 spin_lock_bh(&sk->sk_receive_queue.lock);
2132 st = po->stats;
2133 memset(&po->stats, 0, sizeof(st));
2134 spin_unlock_bh(&sk->sk_receive_queue.lock);
2135 st.tp_packets += st.tp_drops;
2136
8dc41944
HX
2137 data = &st;
2138 break;
2139 case PACKET_AUXDATA:
2140 if (len > sizeof(int))
2141 len = sizeof(int);
2142 val = po->auxdata;
2143
80feaacb
PWJ
2144 data = &val;
2145 break;
2146 case PACKET_ORIGDEV:
2147 if (len > sizeof(int))
2148 len = sizeof(int);
2149 val = po->origdev;
2150
bfd5f4a3
SS
2151 data = &val;
2152 break;
2153 case PACKET_VNET_HDR:
2154 if (len > sizeof(int))
2155 len = sizeof(int);
2156 val = po->has_vnet_hdr;
2157
8dc41944 2158 data = &val;
1da177e4 2159 break;
bbd6ef87
PM
2160 case PACKET_VERSION:
2161 if (len > sizeof(int))
2162 len = sizeof(int);
2163 val = po->tp_version;
2164 data = &val;
2165 break;
2166 case PACKET_HDRLEN:
2167 if (len > sizeof(int))
2168 len = sizeof(int);
2169 if (copy_from_user(&val, optval, len))
2170 return -EFAULT;
2171 switch (val) {
2172 case TPACKET_V1:
2173 val = sizeof(struct tpacket_hdr);
2174 break;
2175 case TPACKET_V2:
2176 val = sizeof(struct tpacket2_hdr);
2177 break;
2178 default:
2179 return -EINVAL;
2180 }
2181 data = &val;
2182 break;
8913336a
PM
2183 case PACKET_RESERVE:
2184 if (len > sizeof(unsigned int))
2185 len = sizeof(unsigned int);
2186 val = po->tp_reserve;
2187 data = &val;
2188 break;
69e3c75f
JB
2189 case PACKET_LOSS:
2190 if (len > sizeof(unsigned int))
2191 len = sizeof(unsigned int);
2192 val = po->tp_loss;
2193 data = &val;
2194 break;
614f60fa
SM
2195 case PACKET_TIMESTAMP:
2196 if (len > sizeof(int))
2197 len = sizeof(int);
2198 val = po->tp_tstamp;
2199 data = &val;
2200 break;
1da177e4
LT
2201 default:
2202 return -ENOPROTOOPT;
2203 }
2204
8ae55f04
KK
2205 if (put_user(len, optlen))
2206 return -EFAULT;
8dc41944
HX
2207 if (copy_to_user(optval, data, len))
2208 return -EFAULT;
8ae55f04 2209 return 0;
1da177e4
LT
2210}
2211
2212
2213static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2214{
2215 struct sock *sk;
2216 struct hlist_node *node;
ad930650 2217 struct net_device *dev = data;
c346dca1 2218 struct net *net = dev_net(dev);
1da177e4 2219
808f5114 2220 rcu_read_lock();
2221 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2222 struct packet_sock *po = pkt_sk(sk);
2223
2224 switch (msg) {
2225 case NETDEV_UNREGISTER:
1da177e4
LT
2226 if (po->mclist)
2227 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2228 /* fallthrough */
2229
1da177e4
LT
2230 case NETDEV_DOWN:
2231 if (dev->ifindex == po->ifindex) {
2232 spin_lock(&po->bind_lock);
2233 if (po->running) {
2234 __dev_remove_pack(&po->prot_hook);
2235 __sock_put(sk);
2236 po->running = 0;
2237 sk->sk_err = ENETDOWN;
2238 if (!sock_flag(sk, SOCK_DEAD))
2239 sk->sk_error_report(sk);
2240 }
2241 if (msg == NETDEV_UNREGISTER) {
2242 po->ifindex = -1;
2243 po->prot_hook.dev = NULL;
2244 }
2245 spin_unlock(&po->bind_lock);
2246 }
2247 break;
2248 case NETDEV_UP:
808f5114 2249 if (dev->ifindex == po->ifindex) {
2250 spin_lock(&po->bind_lock);
2251 if (po->num && !po->running) {
2252 dev_add_pack(&po->prot_hook);
2253 sock_hold(sk);
2254 po->running = 1;
2255 }
2256 spin_unlock(&po->bind_lock);
1da177e4 2257 }
1da177e4
LT
2258 break;
2259 }
2260 }
808f5114 2261 rcu_read_unlock();
1da177e4
LT
2262 return NOTIFY_DONE;
2263}
2264
2265
2266static int packet_ioctl(struct socket *sock, unsigned int cmd,
2267 unsigned long arg)
2268{
2269 struct sock *sk = sock->sk;
2270
69e3c75f 2271 switch (cmd) {
40d4e3df
ED
2272 case SIOCOUTQ:
2273 {
2274 int amount = sk_wmem_alloc_get(sk);
31e6d363 2275
40d4e3df
ED
2276 return put_user(amount, (int __user *)arg);
2277 }
2278 case SIOCINQ:
2279 {
2280 struct sk_buff *skb;
2281 int amount = 0;
2282
2283 spin_lock_bh(&sk->sk_receive_queue.lock);
2284 skb = skb_peek(&sk->sk_receive_queue);
2285 if (skb)
2286 amount = skb->len;
2287 spin_unlock_bh(&sk->sk_receive_queue.lock);
2288 return put_user(amount, (int __user *)arg);
2289 }
2290 case SIOCGSTAMP:
2291 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2292 case SIOCGSTAMPNS:
2293 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2294
1da177e4 2295#ifdef CONFIG_INET
40d4e3df
ED
2296 case SIOCADDRT:
2297 case SIOCDELRT:
2298 case SIOCDARP:
2299 case SIOCGARP:
2300 case SIOCSARP:
2301 case SIOCGIFADDR:
2302 case SIOCSIFADDR:
2303 case SIOCGIFBRDADDR:
2304 case SIOCSIFBRDADDR:
2305 case SIOCGIFNETMASK:
2306 case SIOCSIFNETMASK:
2307 case SIOCGIFDSTADDR:
2308 case SIOCSIFDSTADDR:
2309 case SIOCSIFFLAGS:
40d4e3df 2310 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2311#endif
2312
40d4e3df
ED
2313 default:
2314 return -ENOIOCTLCMD;
1da177e4
LT
2315 }
2316 return 0;
2317}
2318
40d4e3df 2319static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2320 poll_table *wait)
2321{
2322 struct sock *sk = sock->sk;
2323 struct packet_sock *po = pkt_sk(sk);
2324 unsigned int mask = datagram_poll(file, sock, wait);
2325
2326 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2327 if (po->rx_ring.pg_vec) {
2328 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2329 mask |= POLLIN | POLLRDNORM;
2330 }
2331 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2332 spin_lock_bh(&sk->sk_write_queue.lock);
2333 if (po->tx_ring.pg_vec) {
2334 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2335 mask |= POLLOUT | POLLWRNORM;
2336 }
2337 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2338 return mask;
2339}
2340
2341
2342/* Dirty? Well, I still did not learn better way to account
2343 * for user mmaps.
2344 */
2345
2346static void packet_mm_open(struct vm_area_struct *vma)
2347{
2348 struct file *file = vma->vm_file;
40d4e3df 2349 struct socket *sock = file->private_data;
1da177e4 2350 struct sock *sk = sock->sk;
1ce4f28b 2351
1da177e4
LT
2352 if (sk)
2353 atomic_inc(&pkt_sk(sk)->mapped);
2354}
2355
2356static void packet_mm_close(struct vm_area_struct *vma)
2357{
2358 struct file *file = vma->vm_file;
40d4e3df 2359 struct socket *sock = file->private_data;
1da177e4 2360 struct sock *sk = sock->sk;
1ce4f28b 2361
1da177e4
LT
2362 if (sk)
2363 atomic_dec(&pkt_sk(sk)->mapped);
2364}
2365
f0f37e2f 2366static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2367 .open = packet_mm_open,
2368 .close = packet_mm_close,
1da177e4
LT
2369};
2370
0e3125c7
NH
2371static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2372 unsigned int len)
1da177e4
LT
2373{
2374 int i;
2375
4ebf0ae2 2376 for (i = 0; i < len; i++) {
0e3125c7 2377 if (likely(pg_vec[i].buffer)) {
c56b4d90 2378 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2379 vfree(pg_vec[i].buffer);
2380 else
2381 free_pages((unsigned long)pg_vec[i].buffer,
2382 order);
2383 pg_vec[i].buffer = NULL;
2384 }
1da177e4
LT
2385 }
2386 kfree(pg_vec);
2387}
2388
c56b4d90 2389static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2390{
0e3125c7
NH
2391 char *buffer = NULL;
2392 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2393 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2394
2395 buffer = (char *) __get_free_pages(gfp_flags, order);
2396
2397 if (buffer)
2398 return buffer;
2399
2400 /*
2401 * __get_free_pages failed, fall back to vmalloc
2402 */
bbce5a59 2403 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2404
0e3125c7
NH
2405 if (buffer)
2406 return buffer;
2407
2408 /*
2409 * vmalloc failed, lets dig into swap here
2410 */
0e3125c7
NH
2411 gfp_flags &= ~__GFP_NORETRY;
2412 buffer = (char *)__get_free_pages(gfp_flags, order);
2413 if (buffer)
2414 return buffer;
2415
2416 /*
2417 * complete and utter failure
2418 */
2419 return NULL;
4ebf0ae2
DM
2420}
2421
0e3125c7 2422static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2423{
2424 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2425 struct pgv *pg_vec;
4ebf0ae2
DM
2426 int i;
2427
0e3125c7 2428 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2429 if (unlikely(!pg_vec))
2430 goto out;
2431
2432 for (i = 0; i < block_nr; i++) {
c56b4d90 2433 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2434 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2435 goto out_free_pgvec;
2436 }
2437
2438out:
2439 return pg_vec;
2440
2441out_free_pgvec:
2442 free_pg_vec(pg_vec, order, block_nr);
2443 pg_vec = NULL;
2444 goto out;
2445}
1da177e4 2446
69e3c75f
JB
2447static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2448 int closing, int tx_ring)
1da177e4 2449{
0e3125c7 2450 struct pgv *pg_vec = NULL;
1da177e4 2451 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2452 int was_running, order = 0;
69e3c75f
JB
2453 struct packet_ring_buffer *rb;
2454 struct sk_buff_head *rb_queue;
0e11c91e 2455 __be16 num;
69e3c75f 2456 int err;
1ce4f28b 2457
69e3c75f
JB
2458 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2459 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2460
69e3c75f
JB
2461 err = -EBUSY;
2462 if (!closing) {
2463 if (atomic_read(&po->mapped))
2464 goto out;
2465 if (atomic_read(&rb->pending))
2466 goto out;
2467 }
1da177e4 2468
69e3c75f
JB
2469 if (req->tp_block_nr) {
2470 /* Sanity tests and some calculations */
2471 err = -EBUSY;
2472 if (unlikely(rb->pg_vec))
2473 goto out;
1da177e4 2474
bbd6ef87
PM
2475 switch (po->tp_version) {
2476 case TPACKET_V1:
2477 po->tp_hdrlen = TPACKET_HDRLEN;
2478 break;
2479 case TPACKET_V2:
2480 po->tp_hdrlen = TPACKET2_HDRLEN;
2481 break;
2482 }
2483
69e3c75f 2484 err = -EINVAL;
4ebf0ae2 2485 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2486 goto out;
4ebf0ae2 2487 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2488 goto out;
8913336a 2489 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2490 po->tp_reserve))
2491 goto out;
4ebf0ae2 2492 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2493 goto out;
1da177e4 2494
69e3c75f
JB
2495 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2496 if (unlikely(rb->frames_per_block <= 0))
2497 goto out;
2498 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2499 req->tp_frame_nr))
2500 goto out;
1da177e4
LT
2501
2502 err = -ENOMEM;
4ebf0ae2
DM
2503 order = get_order(req->tp_block_size);
2504 pg_vec = alloc_pg_vec(req, order);
2505 if (unlikely(!pg_vec))
1da177e4 2506 goto out;
69e3c75f
JB
2507 }
2508 /* Done */
2509 else {
2510 err = -EINVAL;
4ebf0ae2 2511 if (unlikely(req->tp_frame_nr))
69e3c75f 2512 goto out;
1da177e4
LT
2513 }
2514
2515 lock_sock(sk);
2516
2517 /* Detach socket from network */
2518 spin_lock(&po->bind_lock);
2519 was_running = po->running;
2520 num = po->num;
2521 if (was_running) {
2522 __dev_remove_pack(&po->prot_hook);
2523 po->num = 0;
2524 po->running = 0;
2525 __sock_put(sk);
2526 }
2527 spin_unlock(&po->bind_lock);
1ce4f28b 2528
1da177e4
LT
2529 synchronize_net();
2530
2531 err = -EBUSY;
905db440 2532 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2533 if (closing || atomic_read(&po->mapped) == 0) {
2534 err = 0;
69e3c75f 2535 spin_lock_bh(&rb_queue->lock);
c053fd96 2536 swap(rb->pg_vec, pg_vec);
69e3c75f
JB
2537 rb->frame_max = (req->tp_frame_nr - 1);
2538 rb->head = 0;
2539 rb->frame_size = req->tp_frame_size;
2540 spin_unlock_bh(&rb_queue->lock);
2541
c053fd96
CG
2542 swap(rb->pg_vec_order, order);
2543 swap(rb->pg_vec_len, req->tp_block_nr);
69e3c75f
JB
2544
2545 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2546 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2547 tpacket_rcv : packet_rcv;
2548 skb_queue_purge(rb_queue);
1da177e4 2549 if (atomic_read(&po->mapped))
40d4e3df
ED
2550 pr_err("packet_mmap: vma is busy: %d\n",
2551 atomic_read(&po->mapped));
1da177e4 2552 }
905db440 2553 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2554
2555 spin_lock(&po->bind_lock);
2556 if (was_running && !po->running) {
2557 sock_hold(sk);
2558 po->running = 1;
2559 po->num = num;
2560 dev_add_pack(&po->prot_hook);
2561 }
2562 spin_unlock(&po->bind_lock);
2563
2564 release_sock(sk);
2565
1da177e4
LT
2566 if (pg_vec)
2567 free_pg_vec(pg_vec, order, req->tp_block_nr);
2568out:
2569 return err;
2570}
2571
69e3c75f
JB
2572static int packet_mmap(struct file *file, struct socket *sock,
2573 struct vm_area_struct *vma)
1da177e4
LT
2574{
2575 struct sock *sk = sock->sk;
2576 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2577 unsigned long size, expected_size;
2578 struct packet_ring_buffer *rb;
1da177e4
LT
2579 unsigned long start;
2580 int err = -EINVAL;
2581 int i;
2582
2583 if (vma->vm_pgoff)
2584 return -EINVAL;
2585
905db440 2586 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2587
2588 expected_size = 0;
2589 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2590 if (rb->pg_vec) {
2591 expected_size += rb->pg_vec_len
2592 * rb->pg_vec_pages
2593 * PAGE_SIZE;
2594 }
2595 }
2596
2597 if (expected_size == 0)
1da177e4 2598 goto out;
69e3c75f
JB
2599
2600 size = vma->vm_end - vma->vm_start;
2601 if (size != expected_size)
1da177e4
LT
2602 goto out;
2603
1da177e4 2604 start = vma->vm_start;
69e3c75f
JB
2605 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2606 if (rb->pg_vec == NULL)
2607 continue;
2608
2609 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2610 struct page *page;
2611 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2612 int pg_num;
2613
c56b4d90
CG
2614 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2615 page = pgv_to_page(kaddr);
69e3c75f
JB
2616 err = vm_insert_page(vma, start, page);
2617 if (unlikely(err))
2618 goto out;
2619 start += PAGE_SIZE;
0e3125c7 2620 kaddr += PAGE_SIZE;
69e3c75f 2621 }
4ebf0ae2 2622 }
1da177e4 2623 }
69e3c75f 2624
4ebf0ae2 2625 atomic_inc(&po->mapped);
1da177e4
LT
2626 vma->vm_ops = &packet_mmap_ops;
2627 err = 0;
2628
2629out:
905db440 2630 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2631 return err;
2632}
1da177e4 2633
90ddc4f0 2634static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2635 .family = PF_PACKET,
2636 .owner = THIS_MODULE,
2637 .release = packet_release,
2638 .bind = packet_bind_spkt,
2639 .connect = sock_no_connect,
2640 .socketpair = sock_no_socketpair,
2641 .accept = sock_no_accept,
2642 .getname = packet_getname_spkt,
2643 .poll = datagram_poll,
2644 .ioctl = packet_ioctl,
2645 .listen = sock_no_listen,
2646 .shutdown = sock_no_shutdown,
2647 .setsockopt = sock_no_setsockopt,
2648 .getsockopt = sock_no_getsockopt,
2649 .sendmsg = packet_sendmsg_spkt,
2650 .recvmsg = packet_recvmsg,
2651 .mmap = sock_no_mmap,
2652 .sendpage = sock_no_sendpage,
2653};
1da177e4 2654
90ddc4f0 2655static const struct proto_ops packet_ops = {
1da177e4
LT
2656 .family = PF_PACKET,
2657 .owner = THIS_MODULE,
2658 .release = packet_release,
2659 .bind = packet_bind,
2660 .connect = sock_no_connect,
2661 .socketpair = sock_no_socketpair,
2662 .accept = sock_no_accept,
1ce4f28b 2663 .getname = packet_getname,
1da177e4
LT
2664 .poll = packet_poll,
2665 .ioctl = packet_ioctl,
2666 .listen = sock_no_listen,
2667 .shutdown = sock_no_shutdown,
2668 .setsockopt = packet_setsockopt,
2669 .getsockopt = packet_getsockopt,
2670 .sendmsg = packet_sendmsg,
2671 .recvmsg = packet_recvmsg,
2672 .mmap = packet_mmap,
2673 .sendpage = sock_no_sendpage,
2674};
2675
ec1b4cf7 2676static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2677 .family = PF_PACKET,
2678 .create = packet_create,
2679 .owner = THIS_MODULE,
2680};
2681
2682static struct notifier_block packet_netdev_notifier = {
40d4e3df 2683 .notifier_call = packet_notifier,
1da177e4
LT
2684};
2685
2686#ifdef CONFIG_PROC_FS
1da177e4
LT
2687
2688static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2689 __acquires(RCU)
1da177e4 2690{
e372c414 2691 struct net *net = seq_file_net(seq);
808f5114 2692
2693 rcu_read_lock();
2694 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2695}
2696
2697static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2698{
1bf40954 2699 struct net *net = seq_file_net(seq);
808f5114 2700 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2701}
2702
2703static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2704 __releases(RCU)
1da177e4 2705{
808f5114 2706 rcu_read_unlock();
1da177e4
LT
2707}
2708
1ce4f28b 2709static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2710{
2711 if (v == SEQ_START_TOKEN)
2712 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2713 else {
b7ceabd9 2714 struct sock *s = sk_entry(v);
1da177e4
LT
2715 const struct packet_sock *po = pkt_sk(s);
2716
2717 seq_printf(seq,
71338aa7 2718 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1da177e4
LT
2719 s,
2720 atomic_read(&s->sk_refcnt),
2721 s->sk_type,
2722 ntohs(po->num),
2723 po->ifindex,
2724 po->running,
2725 atomic_read(&s->sk_rmem_alloc),
2726 sock_i_uid(s),
40d4e3df 2727 sock_i_ino(s));
1da177e4
LT
2728 }
2729
2730 return 0;
2731}
2732
56b3d975 2733static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2734 .start = packet_seq_start,
2735 .next = packet_seq_next,
2736 .stop = packet_seq_stop,
2737 .show = packet_seq_show,
2738};
2739
2740static int packet_seq_open(struct inode *inode, struct file *file)
2741{
e372c414
DL
2742 return seq_open_net(inode, file, &packet_seq_ops,
2743 sizeof(struct seq_net_private));
1da177e4
LT
2744}
2745
da7071d7 2746static const struct file_operations packet_seq_fops = {
1da177e4
LT
2747 .owner = THIS_MODULE,
2748 .open = packet_seq_open,
2749 .read = seq_read,
2750 .llseek = seq_lseek,
e372c414 2751 .release = seq_release_net,
1da177e4
LT
2752};
2753
2754#endif
2755
2c8c1e72 2756static int __net_init packet_net_init(struct net *net)
d12d01d6 2757{
808f5114 2758 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2759 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2760
2761 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2762 return -ENOMEM;
2763
2764 return 0;
2765}
2766
2c8c1e72 2767static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2768{
2769 proc_net_remove(net, "packet");
2770}
2771
2772static struct pernet_operations packet_net_ops = {
2773 .init = packet_net_init,
2774 .exit = packet_net_exit,
2775};
2776
2777
1da177e4
LT
2778static void __exit packet_exit(void)
2779{
1da177e4 2780 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2781 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2782 sock_unregister(PF_PACKET);
2783 proto_unregister(&packet_proto);
2784}
2785
2786static int __init packet_init(void)
2787{
2788 int rc = proto_register(&packet_proto, 0);
2789
2790 if (rc != 0)
2791 goto out;
2792
2793 sock_register(&packet_family_ops);
d12d01d6 2794 register_pernet_subsys(&packet_net_ops);
1da177e4 2795 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2796out:
2797 return rc;
2798}
2799
2800module_init(packet_init);
2801module_exit(packet_exit);
2802MODULE_LICENSE("GPL");
2803MODULE_ALIAS_NETPROTO(PF_PACKET);
This page took 0.754428 seconds and 5 git commands to generate.