[IPV6]: Fix build warning.
[deliverable/linux.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
1ce4f28b 14 * Fixes:
1da177e4
LT
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 37 * Ulises Alonso : Frame number limit removal and
1da177e4 38 * packet_set_ring memory leak.
0fb375fb
EB
39 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
1ce4f28b 42 * byte arrays at the end of sockaddr_ll
0fb375fb 43 * and packet_mreq.
1da177e4
LT
44 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
1ce4f28b 51
1da177e4 52#include <linux/types.h>
1da177e4 53#include <linux/mm.h>
4fc268d2 54#include <linux/capability.h>
1da177e4
LT
55#include <linux/fcntl.h>
56#include <linux/socket.h>
57#include <linux/in.h>
58#include <linux/inet.h>
59#include <linux/netdevice.h>
60#include <linux/if_packet.h>
61#include <linux/wireless.h>
ffbc6111 62#include <linux/kernel.h>
1da177e4
LT
63#include <linux/kmod.h>
64#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
a1f8e7f7 74#include <asm/cacheflush.h>
1da177e4
LT
75#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
81
82#ifdef CONFIG_INET
83#include <net/inet_common.h>
84#endif
85
86#define CONFIG_SOCK_PACKET 1
87
1da177e4
LT
88/*
89 Assumptions:
90 - if device has no dev->hard_header routine, it adds and removes ll header
91 inside itself. In this case ll header is invisible outside of device,
92 but higher levels still should reserve dev->hard_header_len.
93 Some devices are enough clever to reallocate skb, when header
94 will not fit to reserved space (tunnel), another ones are silly
95 (PPP).
96 - packet socket receives packets with pulled ll header,
97 so that SOCK_RAW should push it back.
98
99On receive:
100-----------
101
102Incoming, dev->hard_header!=NULL
b0e380b1
ACM
103 mac_header -> ll header
104 data -> data
1da177e4
LT
105
106Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
107 mac_header -> ll header
108 data -> ll header
1da177e4
LT
109
110Incoming, dev->hard_header==NULL
b0e380b1
ACM
111 mac_header -> UNKNOWN position. It is very likely, that it points to ll
112 header. PPP makes it, that is wrong, because introduce
113 assymetry between rx and tx paths.
114 data -> data
1da177e4
LT
115
116Outgoing, dev->hard_header==NULL
b0e380b1
ACM
117 mac_header -> data. ll header is still not built!
118 data -> data
1da177e4
LT
119
120Resume
121 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
122
123
124On transmit:
125------------
126
127dev->hard_header != NULL
b0e380b1
ACM
128 mac_header -> ll header
129 data -> ll header
1da177e4
LT
130
131dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
132 mac_header -> data
133 data -> data
1da177e4
LT
134
135 We should set nh.raw on output to correct posistion,
136 packet classifier depends on it.
137 */
138
139/* List of all packet sockets. */
140static HLIST_HEAD(packet_sklist);
141static DEFINE_RWLOCK(packet_sklist_lock);
142
143static atomic_t packet_socks_nr;
144
145
146/* Private packet socket structures. */
147
1da177e4
LT
148struct packet_mclist
149{
150 struct packet_mclist *next;
151 int ifindex;
152 int count;
153 unsigned short type;
154 unsigned short alen;
0fb375fb
EB
155 unsigned char addr[MAX_ADDR_LEN];
156};
157/* identical to struct packet_mreq except it has
158 * a longer address field.
159 */
160struct packet_mreq_max
161{
162 int mr_ifindex;
163 unsigned short mr_type;
164 unsigned short mr_alen;
165 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 166};
a2efcfa0 167
1da177e4
LT
168#ifdef CONFIG_PACKET_MMAP
169static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
170#endif
171
172static void packet_flush_mclist(struct sock *sk);
173
174struct packet_sock {
175 /* struct sock has to be the first member of packet_sock */
176 struct sock sk;
177 struct tpacket_stats stats;
178#ifdef CONFIG_PACKET_MMAP
179 char * *pg_vec;
180 unsigned int head;
181 unsigned int frames_per_block;
182 unsigned int frame_size;
183 unsigned int frame_max;
184 int copy_thresh;
185#endif
186 struct packet_type prot_hook;
187 spinlock_t bind_lock;
8dc41944 188 unsigned int running:1, /* prot_hook is attached*/
80feaacb
PWJ
189 auxdata:1,
190 origdev:1;
1da177e4 191 int ifindex; /* bound device */
0e11c91e 192 __be16 num;
1da177e4 193 struct packet_mclist *mclist;
1da177e4
LT
194#ifdef CONFIG_PACKET_MMAP
195 atomic_t mapped;
196 unsigned int pg_vec_order;
197 unsigned int pg_vec_pages;
198 unsigned int pg_vec_len;
199#endif
200};
201
ffbc6111
HX
202struct packet_skb_cb {
203 unsigned int origlen;
204 union {
205 struct sockaddr_pkt pkt;
206 struct sockaddr_ll ll;
207 } sa;
208};
209
210#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 211
1da177e4
LT
212#ifdef CONFIG_PACKET_MMAP
213
ad930650 214static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
1da177e4
LT
215{
216 unsigned int pg_vec_pos, frame_offset;
1da177e4
LT
217
218 pg_vec_pos = position / po->frames_per_block;
219 frame_offset = position % po->frames_per_block;
220
ad930650 221 return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
1da177e4
LT
222}
223#endif
224
225static inline struct packet_sock *pkt_sk(struct sock *sk)
226{
227 return (struct packet_sock *)sk;
228}
229
230static void packet_sock_destruct(struct sock *sk)
231{
232 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
233 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
234
235 if (!sock_flag(sk, SOCK_DEAD)) {
236 printk("Attempt to release alive packet socket: %p\n", sk);
237 return;
238 }
239
240 atomic_dec(&packet_socks_nr);
241#ifdef PACKET_REFCNT_DEBUG
242 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
243#endif
244}
245
246
90ddc4f0 247static const struct proto_ops packet_ops;
1da177e4
LT
248
249#ifdef CONFIG_SOCK_PACKET
90ddc4f0 250static const struct proto_ops packet_ops_spkt;
1da177e4 251
f2ccd8fa 252static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
253{
254 struct sock *sk;
255 struct sockaddr_pkt *spkt;
256
257 /*
258 * When we registered the protocol we saved the socket in the data
259 * field for just this event.
260 */
261
262 sk = pt->af_packet_priv;
1ce4f28b 263
1da177e4
LT
264 /*
265 * Yank back the headers [hope the device set this
266 * right or kerboom...]
267 *
268 * Incoming packets have ll header pulled,
269 * push it back.
270 *
98e399f8 271 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
272 * so that this procedure is noop.
273 */
274
275 if (skb->pkt_type == PACKET_LOOPBACK)
276 goto out;
277
278 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
279 goto oom;
280
281 /* drop any routing info */
282 dst_release(skb->dst);
283 skb->dst = NULL;
284
84531c24
PO
285 /* drop conntrack reference */
286 nf_reset(skb);
287
ffbc6111 288 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 289
98e399f8 290 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
291
292 /*
293 * The SOCK_PACKET socket receives _all_ frames.
294 */
295
296 spkt->spkt_family = dev->type;
297 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
298 spkt->spkt_protocol = skb->protocol;
299
300 /*
301 * Charge the memory to the socket. This is done specifically
302 * to prevent sockets using all the memory up.
303 */
304
305 if (sock_queue_rcv_skb(sk,skb) == 0)
306 return 0;
307
308out:
309 kfree_skb(skb);
310oom:
311 return 0;
312}
313
314
315/*
316 * Output a raw packet to a device layer. This bypasses all the other
317 * protocol layers and you must therefore supply it with a complete frame
318 */
1ce4f28b 319
1da177e4
LT
320static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
321 struct msghdr *msg, size_t len)
322{
323 struct sock *sk = sock->sk;
324 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
325 struct sk_buff *skb;
326 struct net_device *dev;
0e11c91e 327 __be16 proto=0;
1da177e4 328 int err;
1ce4f28b 329
1da177e4 330 /*
1ce4f28b 331 * Get and verify the address.
1da177e4
LT
332 */
333
334 if (saddr)
335 {
336 if (msg->msg_namelen < sizeof(struct sockaddr))
337 return(-EINVAL);
338 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
339 proto=saddr->spkt_protocol;
340 }
341 else
342 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
343
344 /*
1ce4f28b 345 * Find the device first to size check it
1da177e4
LT
346 */
347
348 saddr->spkt_device[13] = 0;
349 dev = dev_get_by_name(saddr->spkt_device);
350 err = -ENODEV;
351 if (dev == NULL)
352 goto out_unlock;
1ce4f28b 353
d5e76b0a
DM
354 err = -ENETDOWN;
355 if (!(dev->flags & IFF_UP))
356 goto out_unlock;
357
1da177e4
LT
358 /*
359 * You may not queue a frame bigger than the mtu. This is the lowest level
360 * raw protocol and you must do your own fragmentation at this level.
361 */
1ce4f28b 362
1da177e4 363 err = -EMSGSIZE;
8ae55f04 364 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
365 goto out_unlock;
366
367 err = -ENOBUFS;
368 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
369
370 /*
371 * If the write buffer is full, then tough. At this level the user gets to
372 * deal with the problem - do your own algorithmic backoffs. That's far
373 * more flexible.
374 */
1ce4f28b
YH
375
376 if (skb == NULL)
1da177e4
LT
377 goto out_unlock;
378
379 /*
1ce4f28b 380 * Fill it in
1da177e4 381 */
1ce4f28b 382
1da177e4
LT
383 /* FIXME: Save some space for broken drivers that write a
384 * hard header at transmission time by themselves. PPP is the
385 * notable one here. This should really be fixed at the driver level.
386 */
387 skb_reserve(skb, LL_RESERVED_SPACE(dev));
c1d2bbe1 388 skb_reset_network_header(skb);
1da177e4
LT
389
390 /* Try to align data part correctly */
391 if (dev->hard_header) {
392 skb->data -= dev->hard_header_len;
393 skb->tail -= dev->hard_header_len;
394 if (len < dev->hard_header_len)
c1d2bbe1 395 skb_reset_network_header(skb);
1da177e4
LT
396 }
397
398 /* Returns -EFAULT on error */
399 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
400 skb->protocol = proto;
401 skb->dev = dev;
402 skb->priority = sk->sk_priority;
403 if (err)
404 goto out_free;
405
1da177e4
LT
406 /*
407 * Now send it
408 */
409
410 dev_queue_xmit(skb);
411 dev_put(dev);
412 return(len);
413
414out_free:
415 kfree_skb(skb);
416out_unlock:
417 if (dev)
418 dev_put(dev);
419 return err;
420}
421#endif
422
dbcb5855
DM
423static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
424 unsigned int res)
1da177e4
LT
425{
426 struct sk_filter *filter;
fda9ef5d
DM
427
428 rcu_read_lock_bh();
429 filter = rcu_dereference(sk->sk_filter);
dbcb5855
DM
430 if (filter != NULL)
431 res = sk_run_filter(skb, filter->insns, filter->len);
fda9ef5d 432 rcu_read_unlock_bh();
1da177e4 433
dbcb5855 434 return res;
1da177e4
LT
435}
436
437/*
438 This function makes lazy skb cloning in hope that most of packets
439 are discarded by BPF.
440
441 Note tricky part: we DO mangle shared skb! skb->data, skb->len
442 and skb->cb are mangled. It works because (and until) packets
443 falling here are owned by current CPU. Output packets are cloned
444 by dev_queue_xmit_nit(), input packets are processed by net_bh
445 sequencially, so that if we return skb to original state on exit,
446 we will not harm anyone.
447 */
448
f2ccd8fa 449static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
450{
451 struct sock *sk;
452 struct sockaddr_ll *sll;
453 struct packet_sock *po;
454 u8 * skb_head = skb->data;
455 int skb_len = skb->len;
dbcb5855 456 unsigned int snaplen, res;
1da177e4
LT
457
458 if (skb->pkt_type == PACKET_LOOPBACK)
459 goto drop;
460
461 sk = pt->af_packet_priv;
462 po = pkt_sk(sk);
463
464 skb->dev = dev;
465
466 if (dev->hard_header) {
467 /* The device has an explicit notion of ll header,
468 exported to higher levels.
469
470 Otherwise, the device hides datails of it frame
471 structure, so that corresponding packet head
472 never delivered to user.
473 */
474 if (sk->sk_type != SOCK_DGRAM)
98e399f8 475 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
476 else if (skb->pkt_type == PACKET_OUTGOING) {
477 /* Special case: outgoing packets have ll header at head */
bbe735e4 478 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
479 }
480 }
481
482 snaplen = skb->len;
483
dbcb5855
DM
484 res = run_filter(skb, sk, snaplen);
485 if (!res)
fda9ef5d 486 goto drop_n_restore;
dbcb5855
DM
487 if (snaplen > res)
488 snaplen = res;
1da177e4
LT
489
490 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
491 (unsigned)sk->sk_rcvbuf)
492 goto drop_n_acct;
493
494 if (skb_shared(skb)) {
495 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
496 if (nskb == NULL)
497 goto drop_n_acct;
498
499 if (skb_head != skb->data) {
500 skb->data = skb_head;
501 skb->len = skb_len;
502 }
503 kfree_skb(skb);
504 skb = nskb;
505 }
506
ffbc6111
HX
507 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
508 sizeof(skb->cb));
509
510 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
511 sll->sll_family = AF_PACKET;
512 sll->sll_hatype = dev->type;
513 sll->sll_protocol = skb->protocol;
514 sll->sll_pkttype = skb->pkt_type;
80feaacb
PWJ
515 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
516 sll->sll_ifindex = orig_dev->ifindex;
517 else
518 sll->sll_ifindex = dev->ifindex;
1da177e4
LT
519 sll->sll_halen = 0;
520
521 if (dev->hard_header_parse)
522 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
523
ffbc6111 524 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 525
1da177e4
LT
526 if (pskb_trim(skb, snaplen))
527 goto drop_n_acct;
528
529 skb_set_owner_r(skb, sk);
530 skb->dev = NULL;
531 dst_release(skb->dst);
532 skb->dst = NULL;
533
84531c24
PO
534 /* drop conntrack reference */
535 nf_reset(skb);
536
1da177e4
LT
537 spin_lock(&sk->sk_receive_queue.lock);
538 po->stats.tp_packets++;
539 __skb_queue_tail(&sk->sk_receive_queue, skb);
540 spin_unlock(&sk->sk_receive_queue.lock);
541 sk->sk_data_ready(sk, skb->len);
542 return 0;
543
544drop_n_acct:
545 spin_lock(&sk->sk_receive_queue.lock);
546 po->stats.tp_drops++;
547 spin_unlock(&sk->sk_receive_queue.lock);
548
549drop_n_restore:
550 if (skb_head != skb->data && skb_shared(skb)) {
551 skb->data = skb_head;
552 skb->len = skb_len;
553 }
554drop:
555 kfree_skb(skb);
556 return 0;
557}
558
559#ifdef CONFIG_PACKET_MMAP
f2ccd8fa 560static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
561{
562 struct sock *sk;
563 struct packet_sock *po;
564 struct sockaddr_ll *sll;
565 struct tpacket_hdr *h;
566 u8 * skb_head = skb->data;
567 int skb_len = skb->len;
dbcb5855 568 unsigned int snaplen, res;
1da177e4
LT
569 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
570 unsigned short macoff, netoff;
571 struct sk_buff *copy_skb = NULL;
b7aa0bf7 572 struct timeval tv;
1da177e4
LT
573
574 if (skb->pkt_type == PACKET_LOOPBACK)
575 goto drop;
576
577 sk = pt->af_packet_priv;
578 po = pkt_sk(sk);
579
580 if (dev->hard_header) {
581 if (sk->sk_type != SOCK_DGRAM)
98e399f8 582 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
583 else if (skb->pkt_type == PACKET_OUTGOING) {
584 /* Special case: outgoing packets have ll header at head */
bbe735e4 585 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
586 }
587 }
588
8dc41944
HX
589 if (skb->ip_summed == CHECKSUM_PARTIAL)
590 status |= TP_STATUS_CSUMNOTREADY;
591
1da177e4
LT
592 snaplen = skb->len;
593
dbcb5855
DM
594 res = run_filter(skb, sk, snaplen);
595 if (!res)
fda9ef5d 596 goto drop_n_restore;
dbcb5855
DM
597 if (snaplen > res)
598 snaplen = res;
1da177e4
LT
599
600 if (sk->sk_type == SOCK_DGRAM) {
601 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
602 } else {
bbe735e4 603 unsigned maclen = skb_network_offset(skb);
1da177e4
LT
604 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
605 macoff = netoff - maclen;
606 }
607
608 if (macoff + snaplen > po->frame_size) {
609 if (po->copy_thresh &&
610 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
611 (unsigned)sk->sk_rcvbuf) {
612 if (skb_shared(skb)) {
613 copy_skb = skb_clone(skb, GFP_ATOMIC);
614 } else {
615 copy_skb = skb_get(skb);
616 skb_head = skb->data;
617 }
618 if (copy_skb)
619 skb_set_owner_r(copy_skb, sk);
620 }
621 snaplen = po->frame_size - macoff;
622 if ((int)snaplen < 0)
623 snaplen = 0;
624 }
1da177e4
LT
625
626 spin_lock(&sk->sk_receive_queue.lock);
ad930650 627 h = packet_lookup_frame(po, po->head);
1ce4f28b 628
1da177e4
LT
629 if (h->tp_status)
630 goto ring_is_full;
631 po->head = po->head != po->frame_max ? po->head+1 : 0;
632 po->stats.tp_packets++;
633 if (copy_skb) {
634 status |= TP_STATUS_COPY;
635 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
636 }
637 if (!po->stats.tp_drops)
638 status &= ~TP_STATUS_LOSING;
639 spin_unlock(&sk->sk_receive_queue.lock);
640
cbe21d8f 641 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
1da177e4
LT
642
643 h->tp_len = skb->len;
644 h->tp_snaplen = snaplen;
645 h->tp_mac = macoff;
646 h->tp_net = netoff;
b7aa0bf7 647 if (skb->tstamp.tv64 == 0) {
a61bbcf2 648 __net_timestamp(skb);
1da177e4
LT
649 sock_enable_timestamp(sk);
650 }
b7aa0bf7
ED
651 tv = ktime_to_timeval(skb->tstamp);
652 h->tp_sec = tv.tv_sec;
653 h->tp_usec = tv.tv_usec;
1da177e4
LT
654
655 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
656 sll->sll_halen = 0;
657 if (dev->hard_header_parse)
658 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
659 sll->sll_family = AF_PACKET;
660 sll->sll_hatype = dev->type;
661 sll->sll_protocol = skb->protocol;
662 sll->sll_pkttype = skb->pkt_type;
80feaacb
PWJ
663 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
664 sll->sll_ifindex = orig_dev->ifindex;
665 else
666 sll->sll_ifindex = dev->ifindex;
1da177e4
LT
667
668 h->tp_status = status;
e16aa207 669 smp_mb();
1da177e4
LT
670
671 {
672 struct page *p_start, *p_end;
673 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
674
675 p_start = virt_to_page(h);
676 p_end = virt_to_page(h_end);
677 while (p_start <= p_end) {
678 flush_dcache_page(p_start);
679 p_start++;
680 }
681 }
682
683 sk->sk_data_ready(sk, 0);
684
685drop_n_restore:
686 if (skb_head != skb->data && skb_shared(skb)) {
687 skb->data = skb_head;
688 skb->len = skb_len;
689 }
690drop:
1ce4f28b 691 kfree_skb(skb);
1da177e4
LT
692 return 0;
693
694ring_is_full:
695 po->stats.tp_drops++;
696 spin_unlock(&sk->sk_receive_queue.lock);
697
698 sk->sk_data_ready(sk, 0);
699 if (copy_skb)
700 kfree_skb(copy_skb);
701 goto drop_n_restore;
702}
703
704#endif
705
706
707static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
708 struct msghdr *msg, size_t len)
709{
710 struct sock *sk = sock->sk;
711 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
712 struct sk_buff *skb;
713 struct net_device *dev;
0e11c91e 714 __be16 proto;
1da177e4
LT
715 unsigned char *addr;
716 int ifindex, err, reserve = 0;
717
718 /*
1ce4f28b 719 * Get and verify the address.
1da177e4 720 */
1ce4f28b 721
1da177e4
LT
722 if (saddr == NULL) {
723 struct packet_sock *po = pkt_sk(sk);
724
725 ifindex = po->ifindex;
726 proto = po->num;
727 addr = NULL;
728 } else {
729 err = -EINVAL;
730 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
731 goto out;
0fb375fb
EB
732 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
733 goto out;
1da177e4
LT
734 ifindex = saddr->sll_ifindex;
735 proto = saddr->sll_protocol;
736 addr = saddr->sll_addr;
737 }
738
739
740 dev = dev_get_by_index(ifindex);
741 err = -ENXIO;
742 if (dev == NULL)
743 goto out_unlock;
744 if (sock->type == SOCK_RAW)
745 reserve = dev->hard_header_len;
746
d5e76b0a
DM
747 err = -ENETDOWN;
748 if (!(dev->flags & IFF_UP))
749 goto out_unlock;
750
1da177e4
LT
751 err = -EMSGSIZE;
752 if (len > dev->mtu+reserve)
753 goto out_unlock;
754
755 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
756 msg->msg_flags & MSG_DONTWAIT, &err);
757 if (skb==NULL)
758 goto out_unlock;
759
760 skb_reserve(skb, LL_RESERVED_SPACE(dev));
c1d2bbe1 761 skb_reset_network_header(skb);
1da177e4
LT
762
763 if (dev->hard_header) {
764 int res;
765 err = -EINVAL;
766 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
767 if (sock->type != SOCK_DGRAM) {
27a884dc 768 skb_reset_tail_pointer(skb);
1da177e4
LT
769 skb->len = 0;
770 } else if (res < 0)
771 goto out_free;
772 }
773
774 /* Returns -EFAULT on error */
775 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
776 if (err)
777 goto out_free;
778
779 skb->protocol = proto;
780 skb->dev = dev;
781 skb->priority = sk->sk_priority;
782
1da177e4
LT
783 /*
784 * Now send it
785 */
786
787 err = dev_queue_xmit(skb);
788 if (err > 0 && (err = net_xmit_errno(err)) != 0)
789 goto out_unlock;
790
791 dev_put(dev);
792
793 return(len);
794
795out_free:
796 kfree_skb(skb);
797out_unlock:
798 if (dev)
799 dev_put(dev);
800out:
801 return err;
802}
803
804/*
805 * Close a PACKET socket. This is fairly simple. We immediately go
806 * to 'closed' state and remove our protocol entry in the device list.
807 */
808
809static int packet_release(struct socket *sock)
810{
811 struct sock *sk = sock->sk;
812 struct packet_sock *po;
813
814 if (!sk)
815 return 0;
816
817 po = pkt_sk(sk);
818
819 write_lock_bh(&packet_sklist_lock);
820 sk_del_node_init(sk);
821 write_unlock_bh(&packet_sklist_lock);
822
823 /*
824 * Unhook packet receive handler.
825 */
826
827 if (po->running) {
828 /*
829 * Remove the protocol hook
830 */
831 dev_remove_pack(&po->prot_hook);
832 po->running = 0;
833 po->num = 0;
834 __sock_put(sk);
835 }
836
1da177e4 837 packet_flush_mclist(sk);
1da177e4
LT
838
839#ifdef CONFIG_PACKET_MMAP
840 if (po->pg_vec) {
841 struct tpacket_req req;
842 memset(&req, 0, sizeof(req));
843 packet_set_ring(sk, &req, 1);
844 }
845#endif
846
847 /*
848 * Now the socket is dead. No more input will appear.
849 */
850
851 sock_orphan(sk);
852 sock->sk = NULL;
853
854 /* Purge queues */
855
856 skb_queue_purge(&sk->sk_receive_queue);
857
858 sock_put(sk);
859 return 0;
860}
861
862/*
863 * Attach a packet hook.
864 */
865
0e11c91e 866static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
867{
868 struct packet_sock *po = pkt_sk(sk);
869 /*
870 * Detach an existing hook if present.
871 */
872
873 lock_sock(sk);
874
875 spin_lock(&po->bind_lock);
876 if (po->running) {
877 __sock_put(sk);
878 po->running = 0;
879 po->num = 0;
880 spin_unlock(&po->bind_lock);
881 dev_remove_pack(&po->prot_hook);
882 spin_lock(&po->bind_lock);
883 }
884
885 po->num = protocol;
886 po->prot_hook.type = protocol;
887 po->prot_hook.dev = dev;
888
889 po->ifindex = dev ? dev->ifindex : 0;
890
891 if (protocol == 0)
892 goto out_unlock;
893
894 if (dev) {
895 if (dev->flags&IFF_UP) {
896 dev_add_pack(&po->prot_hook);
897 sock_hold(sk);
898 po->running = 1;
899 } else {
900 sk->sk_err = ENETDOWN;
901 if (!sock_flag(sk, SOCK_DEAD))
902 sk->sk_error_report(sk);
903 }
904 } else {
905 dev_add_pack(&po->prot_hook);
906 sock_hold(sk);
907 po->running = 1;
908 }
909
910out_unlock:
911 spin_unlock(&po->bind_lock);
912 release_sock(sk);
913 return 0;
914}
915
916/*
917 * Bind a packet socket to a device
918 */
919
920#ifdef CONFIG_SOCK_PACKET
921
922static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
923{
924 struct sock *sk=sock->sk;
925 char name[15];
926 struct net_device *dev;
927 int err = -ENODEV;
1ce4f28b 928
1da177e4
LT
929 /*
930 * Check legality
931 */
1ce4f28b 932
8ae55f04 933 if (addr_len != sizeof(struct sockaddr))
1da177e4
LT
934 return -EINVAL;
935 strlcpy(name,uaddr->sa_data,sizeof(name));
936
937 dev = dev_get_by_name(name);
938 if (dev) {
939 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
940 dev_put(dev);
941 }
942 return err;
943}
944#endif
945
946static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
947{
948 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
949 struct sock *sk=sock->sk;
950 struct net_device *dev = NULL;
951 int err;
952
953
954 /*
955 * Check legality
956 */
1ce4f28b 957
1da177e4
LT
958 if (addr_len < sizeof(struct sockaddr_ll))
959 return -EINVAL;
960 if (sll->sll_family != AF_PACKET)
961 return -EINVAL;
962
963 if (sll->sll_ifindex) {
964 err = -ENODEV;
965 dev = dev_get_by_index(sll->sll_ifindex);
966 if (dev == NULL)
967 goto out;
968 }
969 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
970 if (dev)
971 dev_put(dev);
972
973out:
974 return err;
975}
976
977static struct proto packet_proto = {
978 .name = "PACKET",
979 .owner = THIS_MODULE,
980 .obj_size = sizeof(struct packet_sock),
981};
982
983/*
1ce4f28b 984 * Create a packet of type SOCK_PACKET.
1da177e4
LT
985 */
986
987static int packet_create(struct socket *sock, int protocol)
988{
989 struct sock *sk;
990 struct packet_sock *po;
0e11c91e 991 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
992 int err;
993
994 if (!capable(CAP_NET_RAW))
995 return -EPERM;
996 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
997#ifdef CONFIG_SOCK_PACKET
998 && sock->type != SOCK_PACKET
999#endif
1000 )
1001 return -ESOCKTNOSUPPORT;
1002
1003 sock->state = SS_UNCONNECTED;
1004
1005 err = -ENOBUFS;
1006 sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1007 if (sk == NULL)
1008 goto out;
1009
1010 sock->ops = &packet_ops;
1011#ifdef CONFIG_SOCK_PACKET
1012 if (sock->type == SOCK_PACKET)
1013 sock->ops = &packet_ops_spkt;
1014#endif
1015 sock_init_data(sock, sk);
1016
1017 po = pkt_sk(sk);
1018 sk->sk_family = PF_PACKET;
0e11c91e 1019 po->num = proto;
1da177e4
LT
1020
1021 sk->sk_destruct = packet_sock_destruct;
1022 atomic_inc(&packet_socks_nr);
1023
1024 /*
1025 * Attach a protocol block
1026 */
1027
1028 spin_lock_init(&po->bind_lock);
1029 po->prot_hook.func = packet_rcv;
1030#ifdef CONFIG_SOCK_PACKET
1031 if (sock->type == SOCK_PACKET)
1032 po->prot_hook.func = packet_rcv_spkt;
1033#endif
1034 po->prot_hook.af_packet_priv = sk;
1035
0e11c91e
AV
1036 if (proto) {
1037 po->prot_hook.type = proto;
1da177e4
LT
1038 dev_add_pack(&po->prot_hook);
1039 sock_hold(sk);
1040 po->running = 1;
1041 }
1042
1043 write_lock_bh(&packet_sklist_lock);
1044 sk_add_node(sk, &packet_sklist);
1045 write_unlock_bh(&packet_sklist_lock);
1046 return(0);
1047out:
1048 return err;
1049}
1050
1051/*
1052 * Pull a packet from our receive queue and hand it to the user.
1053 * If necessary we block.
1054 */
1055
1056static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1057 struct msghdr *msg, size_t len, int flags)
1058{
1059 struct sock *sk = sock->sk;
1060 struct sk_buff *skb;
1061 int copied, err;
0fb375fb 1062 struct sockaddr_ll *sll;
1da177e4
LT
1063
1064 err = -EINVAL;
1065 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1066 goto out;
1067
1068#if 0
1069 /* What error should we return now? EUNATTACH? */
1070 if (pkt_sk(sk)->ifindex < 0)
1071 return -ENODEV;
1072#endif
1073
1da177e4
LT
1074 /*
1075 * Call the generic datagram receiver. This handles all sorts
1076 * of horrible races and re-entrancy so we can forget about it
1077 * in the protocol layers.
1078 *
1079 * Now it will return ENETDOWN, if device have just gone down,
1080 * but then it will block.
1081 */
1082
1083 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1084
1085 /*
1ce4f28b 1086 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1087 * handles the blocking we don't see and worry about blocking
1088 * retries.
1089 */
1090
8ae55f04 1091 if (skb == NULL)
1da177e4
LT
1092 goto out;
1093
0fb375fb
EB
1094 /*
1095 * If the address length field is there to be filled in, we fill
1096 * it in now.
1097 */
1098
ffbc6111 1099 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1100 if (sock->type == SOCK_PACKET)
1101 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1102 else
1103 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1104
1da177e4
LT
1105 /*
1106 * You lose any data beyond the buffer you gave. If it worries a
1107 * user program they can ask the device for its MTU anyway.
1108 */
1109
1110 copied = skb->len;
1111 if (copied > len)
1112 {
1113 copied=len;
1114 msg->msg_flags|=MSG_TRUNC;
1115 }
1116
1117 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1118 if (err)
1119 goto out_free;
1120
1121 sock_recv_timestamp(msg, sk, skb);
1122
1123 if (msg->msg_name)
ffbc6111
HX
1124 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1125 msg->msg_namelen);
1da177e4 1126
8dc41944 1127 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1128 struct tpacket_auxdata aux;
1129
1130 aux.tp_status = TP_STATUS_USER;
1131 if (skb->ip_summed == CHECKSUM_PARTIAL)
1132 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1133 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1134 aux.tp_snaplen = skb->len;
1135 aux.tp_mac = 0;
bbe735e4 1136 aux.tp_net = skb_network_offset(skb);
ffbc6111
HX
1137
1138 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1139 }
1140
1da177e4
LT
1141 /*
1142 * Free or return the buffer as appropriate. Again this
1143 * hides all the races and re-entrancy issues from us.
1144 */
1145 err = (flags&MSG_TRUNC) ? skb->len : copied;
1146
1147out_free:
1148 skb_free_datagram(sk, skb);
1149out:
1150 return err;
1151}
1152
1153#ifdef CONFIG_SOCK_PACKET
1154static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1155 int *uaddr_len, int peer)
1156{
1157 struct net_device *dev;
1158 struct sock *sk = sock->sk;
1159
1160 if (peer)
1161 return -EOPNOTSUPP;
1162
1163 uaddr->sa_family = AF_PACKET;
1164 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1165 if (dev) {
1166 strlcpy(uaddr->sa_data, dev->name, 15);
1167 dev_put(dev);
1168 } else
1169 memset(uaddr->sa_data, 0, 14);
1170 *uaddr_len = sizeof(*uaddr);
1171
1172 return 0;
1173}
1174#endif
1175
1176static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1177 int *uaddr_len, int peer)
1178{
1179 struct net_device *dev;
1180 struct sock *sk = sock->sk;
1181 struct packet_sock *po = pkt_sk(sk);
1182 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1183
1184 if (peer)
1185 return -EOPNOTSUPP;
1186
1187 sll->sll_family = AF_PACKET;
1188 sll->sll_ifindex = po->ifindex;
1189 sll->sll_protocol = po->num;
1190 dev = dev_get_by_index(po->ifindex);
1191 if (dev) {
1192 sll->sll_hatype = dev->type;
1193 sll->sll_halen = dev->addr_len;
1194 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1195 dev_put(dev);
1196 } else {
1197 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1198 sll->sll_halen = 0;
1199 }
0fb375fb 1200 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1201
1202 return 0;
1203}
1204
1da177e4
LT
1205static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1206{
1207 switch (i->type) {
1208 case PACKET_MR_MULTICAST:
1209 if (what > 0)
1210 dev_mc_add(dev, i->addr, i->alen, 0);
1211 else
1212 dev_mc_delete(dev, i->addr, i->alen, 0);
1213 break;
1214 case PACKET_MR_PROMISC:
1215 dev_set_promiscuity(dev, what);
1216 break;
1217 case PACKET_MR_ALLMULTI:
1218 dev_set_allmulti(dev, what);
1219 break;
1220 default:;
1221 }
1222}
1223
1224static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1225{
1226 for ( ; i; i=i->next) {
1227 if (i->ifindex == dev->ifindex)
1228 packet_dev_mc(dev, i, what);
1229 }
1230}
1231
0fb375fb 1232static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1233{
1234 struct packet_sock *po = pkt_sk(sk);
1235 struct packet_mclist *ml, *i;
1236 struct net_device *dev;
1237 int err;
1238
1239 rtnl_lock();
1240
1241 err = -ENODEV;
1242 dev = __dev_get_by_index(mreq->mr_ifindex);
1243 if (!dev)
1244 goto done;
1245
1246 err = -EINVAL;
1247 if (mreq->mr_alen > dev->addr_len)
1248 goto done;
1249
1250 err = -ENOBUFS;
8b3a7005 1251 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1252 if (i == NULL)
1253 goto done;
1254
1255 err = 0;
1256 for (ml = po->mclist; ml; ml = ml->next) {
1257 if (ml->ifindex == mreq->mr_ifindex &&
1258 ml->type == mreq->mr_type &&
1259 ml->alen == mreq->mr_alen &&
1260 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1261 ml->count++;
1262 /* Free the new element ... */
1263 kfree(i);
1264 goto done;
1265 }
1266 }
1267
1268 i->type = mreq->mr_type;
1269 i->ifindex = mreq->mr_ifindex;
1270 i->alen = mreq->mr_alen;
1271 memcpy(i->addr, mreq->mr_address, i->alen);
1272 i->count = 1;
1273 i->next = po->mclist;
1274 po->mclist = i;
1275 packet_dev_mc(dev, i, +1);
1276
1277done:
1278 rtnl_unlock();
1279 return err;
1280}
1281
0fb375fb 1282static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1283{
1284 struct packet_mclist *ml, **mlp;
1285
1286 rtnl_lock();
1287
1288 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1289 if (ml->ifindex == mreq->mr_ifindex &&
1290 ml->type == mreq->mr_type &&
1291 ml->alen == mreq->mr_alen &&
1292 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1293 if (--ml->count == 0) {
1294 struct net_device *dev;
1295 *mlp = ml->next;
1296 dev = dev_get_by_index(ml->ifindex);
1297 if (dev) {
1298 packet_dev_mc(dev, ml, -1);
1299 dev_put(dev);
1300 }
1301 kfree(ml);
1302 }
1303 rtnl_unlock();
1304 return 0;
1305 }
1306 }
1307 rtnl_unlock();
1308 return -EADDRNOTAVAIL;
1309}
1310
1311static void packet_flush_mclist(struct sock *sk)
1312{
1313 struct packet_sock *po = pkt_sk(sk);
1314 struct packet_mclist *ml;
1315
1316 if (!po->mclist)
1317 return;
1318
1319 rtnl_lock();
1320 while ((ml = po->mclist) != NULL) {
1321 struct net_device *dev;
1322
1323 po->mclist = ml->next;
1324 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1325 packet_dev_mc(dev, ml, -1);
1326 dev_put(dev);
1327 }
1328 kfree(ml);
1329 }
1330 rtnl_unlock();
1331}
1da177e4
LT
1332
1333static int
1334packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1335{
1336 struct sock *sk = sock->sk;
8dc41944 1337 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1338 int ret;
1339
1340 if (level != SOL_PACKET)
1341 return -ENOPROTOOPT;
1342
1343 switch(optname) {
1ce4f28b 1344 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1345 case PACKET_DROP_MEMBERSHIP:
1346 {
0fb375fb
EB
1347 struct packet_mreq_max mreq;
1348 int len = optlen;
1349 memset(&mreq, 0, sizeof(mreq));
1350 if (len < sizeof(struct packet_mreq))
1da177e4 1351 return -EINVAL;
0fb375fb
EB
1352 if (len > sizeof(mreq))
1353 len = sizeof(mreq);
1354 if (copy_from_user(&mreq,optval,len))
1da177e4 1355 return -EFAULT;
0fb375fb
EB
1356 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1357 return -EINVAL;
1da177e4
LT
1358 if (optname == PACKET_ADD_MEMBERSHIP)
1359 ret = packet_mc_add(sk, &mreq);
1360 else
1361 ret = packet_mc_drop(sk, &mreq);
1362 return ret;
1363 }
a2efcfa0 1364
1da177e4
LT
1365#ifdef CONFIG_PACKET_MMAP
1366 case PACKET_RX_RING:
1367 {
1368 struct tpacket_req req;
1369
1370 if (optlen<sizeof(req))
1371 return -EINVAL;
1372 if (copy_from_user(&req,optval,sizeof(req)))
1373 return -EFAULT;
1374 return packet_set_ring(sk, &req, 0);
1375 }
1376 case PACKET_COPY_THRESH:
1377 {
1378 int val;
1379
1380 if (optlen!=sizeof(val))
1381 return -EINVAL;
1382 if (copy_from_user(&val,optval,sizeof(val)))
1383 return -EFAULT;
1384
1385 pkt_sk(sk)->copy_thresh = val;
1386 return 0;
1387 }
1388#endif
8dc41944
HX
1389 case PACKET_AUXDATA:
1390 {
1391 int val;
1392
1393 if (optlen < sizeof(val))
1394 return -EINVAL;
1395 if (copy_from_user(&val, optval, sizeof(val)))
1396 return -EFAULT;
1397
1398 po->auxdata = !!val;
1399 return 0;
1400 }
80feaacb
PWJ
1401 case PACKET_ORIGDEV:
1402 {
1403 int val;
1404
1405 if (optlen < sizeof(val))
1406 return -EINVAL;
1407 if (copy_from_user(&val, optval, sizeof(val)))
1408 return -EFAULT;
1409
1410 po->origdev = !!val;
1411 return 0;
1412 }
1da177e4
LT
1413 default:
1414 return -ENOPROTOOPT;
1415 }
1416}
1417
1418static int packet_getsockopt(struct socket *sock, int level, int optname,
1419 char __user *optval, int __user *optlen)
1420{
1421 int len;
8dc41944 1422 int val;
1da177e4
LT
1423 struct sock *sk = sock->sk;
1424 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
1425 void *data;
1426 struct tpacket_stats st;
1da177e4
LT
1427
1428 if (level != SOL_PACKET)
1429 return -ENOPROTOOPT;
1430
8ae55f04
KK
1431 if (get_user(len, optlen))
1432 return -EFAULT;
1da177e4
LT
1433
1434 if (len < 0)
1435 return -EINVAL;
1ce4f28b 1436
1da177e4
LT
1437 switch(optname) {
1438 case PACKET_STATISTICS:
1da177e4
LT
1439 if (len > sizeof(struct tpacket_stats))
1440 len = sizeof(struct tpacket_stats);
1441 spin_lock_bh(&sk->sk_receive_queue.lock);
1442 st = po->stats;
1443 memset(&po->stats, 0, sizeof(st));
1444 spin_unlock_bh(&sk->sk_receive_queue.lock);
1445 st.tp_packets += st.tp_drops;
1446
8dc41944
HX
1447 data = &st;
1448 break;
1449 case PACKET_AUXDATA:
1450 if (len > sizeof(int))
1451 len = sizeof(int);
1452 val = po->auxdata;
1453
80feaacb
PWJ
1454 data = &val;
1455 break;
1456 case PACKET_ORIGDEV:
1457 if (len > sizeof(int))
1458 len = sizeof(int);
1459 val = po->origdev;
1460
8dc41944 1461 data = &val;
1da177e4 1462 break;
1da177e4
LT
1463 default:
1464 return -ENOPROTOOPT;
1465 }
1466
8ae55f04
KK
1467 if (put_user(len, optlen))
1468 return -EFAULT;
8dc41944
HX
1469 if (copy_to_user(optval, data, len))
1470 return -EFAULT;
8ae55f04 1471 return 0;
1da177e4
LT
1472}
1473
1474
1475static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1476{
1477 struct sock *sk;
1478 struct hlist_node *node;
ad930650 1479 struct net_device *dev = data;
1da177e4
LT
1480
1481 read_lock(&packet_sklist_lock);
1482 sk_for_each(sk, node, &packet_sklist) {
1483 struct packet_sock *po = pkt_sk(sk);
1484
1485 switch (msg) {
1486 case NETDEV_UNREGISTER:
1da177e4
LT
1487 if (po->mclist)
1488 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
1489 /* fallthrough */
1490
1da177e4
LT
1491 case NETDEV_DOWN:
1492 if (dev->ifindex == po->ifindex) {
1493 spin_lock(&po->bind_lock);
1494 if (po->running) {
1495 __dev_remove_pack(&po->prot_hook);
1496 __sock_put(sk);
1497 po->running = 0;
1498 sk->sk_err = ENETDOWN;
1499 if (!sock_flag(sk, SOCK_DEAD))
1500 sk->sk_error_report(sk);
1501 }
1502 if (msg == NETDEV_UNREGISTER) {
1503 po->ifindex = -1;
1504 po->prot_hook.dev = NULL;
1505 }
1506 spin_unlock(&po->bind_lock);
1507 }
1508 break;
1509 case NETDEV_UP:
1510 spin_lock(&po->bind_lock);
1511 if (dev->ifindex == po->ifindex && po->num &&
1512 !po->running) {
1513 dev_add_pack(&po->prot_hook);
1514 sock_hold(sk);
1515 po->running = 1;
1516 }
1517 spin_unlock(&po->bind_lock);
1518 break;
1519 }
1520 }
1521 read_unlock(&packet_sklist_lock);
1522 return NOTIFY_DONE;
1523}
1524
1525
1526static int packet_ioctl(struct socket *sock, unsigned int cmd,
1527 unsigned long arg)
1528{
1529 struct sock *sk = sock->sk;
1530
1531 switch(cmd) {
1532 case SIOCOUTQ:
1533 {
1534 int amount = atomic_read(&sk->sk_wmem_alloc);
1535 return put_user(amount, (int __user *)arg);
1536 }
1537 case SIOCINQ:
1538 {
1539 struct sk_buff *skb;
1540 int amount = 0;
1541
1542 spin_lock_bh(&sk->sk_receive_queue.lock);
1543 skb = skb_peek(&sk->sk_receive_queue);
1544 if (skb)
1545 amount = skb->len;
1546 spin_unlock_bh(&sk->sk_receive_queue.lock);
1547 return put_user(amount, (int __user *)arg);
1548 }
1549 case SIOCGSTAMP:
1550 return sock_get_timestamp(sk, (struct timeval __user *)arg);
ae40eb1e
ED
1551 case SIOCGSTAMPNS:
1552 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 1553
1da177e4
LT
1554#ifdef CONFIG_INET
1555 case SIOCADDRT:
1556 case SIOCDELRT:
1557 case SIOCDARP:
1558 case SIOCGARP:
1559 case SIOCSARP:
1560 case SIOCGIFADDR:
1561 case SIOCSIFADDR:
1562 case SIOCGIFBRDADDR:
1563 case SIOCSIFBRDADDR:
1564 case SIOCGIFNETMASK:
1565 case SIOCSIFNETMASK:
1566 case SIOCGIFDSTADDR:
1567 case SIOCSIFDSTADDR:
1568 case SIOCSIFFLAGS:
1569 return inet_dgram_ops.ioctl(sock, cmd, arg);
1570#endif
1571
1572 default:
b5e5fa5e 1573 return -ENOIOCTLCMD;
1da177e4
LT
1574 }
1575 return 0;
1576}
1577
1578#ifndef CONFIG_PACKET_MMAP
1579#define packet_mmap sock_no_mmap
1580#define packet_poll datagram_poll
1581#else
1582
1583static unsigned int packet_poll(struct file * file, struct socket *sock,
1584 poll_table *wait)
1585{
1586 struct sock *sk = sock->sk;
1587 struct packet_sock *po = pkt_sk(sk);
1588 unsigned int mask = datagram_poll(file, sock, wait);
1589
1590 spin_lock_bh(&sk->sk_receive_queue.lock);
1591 if (po->pg_vec) {
1592 unsigned last = po->head ? po->head-1 : po->frame_max;
1593 struct tpacket_hdr *h;
1594
ad930650 1595 h = packet_lookup_frame(po, last);
1da177e4
LT
1596
1597 if (h->tp_status)
1598 mask |= POLLIN | POLLRDNORM;
1599 }
1600 spin_unlock_bh(&sk->sk_receive_queue.lock);
1601 return mask;
1602}
1603
1604
1605/* Dirty? Well, I still did not learn better way to account
1606 * for user mmaps.
1607 */
1608
1609static void packet_mm_open(struct vm_area_struct *vma)
1610{
1611 struct file *file = vma->vm_file;
b69aee04 1612 struct socket * sock = file->private_data;
1da177e4 1613 struct sock *sk = sock->sk;
1ce4f28b 1614
1da177e4
LT
1615 if (sk)
1616 atomic_inc(&pkt_sk(sk)->mapped);
1617}
1618
1619static void packet_mm_close(struct vm_area_struct *vma)
1620{
1621 struct file *file = vma->vm_file;
b69aee04 1622 struct socket * sock = file->private_data;
1da177e4 1623 struct sock *sk = sock->sk;
1ce4f28b 1624
1da177e4
LT
1625 if (sk)
1626 atomic_dec(&pkt_sk(sk)->mapped);
1627}
1628
1629static struct vm_operations_struct packet_mmap_ops = {
1630 .open = packet_mm_open,
1631 .close =packet_mm_close,
1632};
1633
1634static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1635{
1636 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1637}
1638
4ebf0ae2 1639static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1da177e4
LT
1640{
1641 int i;
1642
4ebf0ae2
DM
1643 for (i = 0; i < len; i++) {
1644 if (likely(pg_vec[i]))
1645 free_pages((unsigned long) pg_vec[i], order);
1da177e4
LT
1646 }
1647 kfree(pg_vec);
1648}
1649
4ebf0ae2
DM
1650static inline char *alloc_one_pg_vec_page(unsigned long order)
1651{
1652 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1653 order);
1654}
1655
1656static char **alloc_pg_vec(struct tpacket_req *req, int order)
1657{
1658 unsigned int block_nr = req->tp_block_nr;
1659 char **pg_vec;
1660 int i;
1661
1662 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1663 if (unlikely(!pg_vec))
1664 goto out;
1665
1666 for (i = 0; i < block_nr; i++) {
1667 pg_vec[i] = alloc_one_pg_vec_page(order);
1668 if (unlikely(!pg_vec[i]))
1669 goto out_free_pgvec;
1670 }
1671
1672out:
1673 return pg_vec;
1674
1675out_free_pgvec:
1676 free_pg_vec(pg_vec, order, block_nr);
1677 pg_vec = NULL;
1678 goto out;
1679}
1da177e4
LT
1680
1681static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1682{
1683 char **pg_vec = NULL;
1684 struct packet_sock *po = pkt_sk(sk);
0e11c91e
AV
1685 int was_running, order = 0;
1686 __be16 num;
1da177e4 1687 int err = 0;
1ce4f28b 1688
1da177e4
LT
1689 if (req->tp_block_nr) {
1690 int i, l;
1691
1692 /* Sanity tests and some calculations */
1693
4ebf0ae2 1694 if (unlikely(po->pg_vec))
1da177e4
LT
1695 return -EBUSY;
1696
4ebf0ae2 1697 if (unlikely((int)req->tp_block_size <= 0))
1da177e4 1698 return -EINVAL;
4ebf0ae2 1699 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1da177e4 1700 return -EINVAL;
4ebf0ae2 1701 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1da177e4 1702 return -EINVAL;
4ebf0ae2 1703 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1da177e4
LT
1704 return -EINVAL;
1705
1706 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
4ebf0ae2 1707 if (unlikely(po->frames_per_block <= 0))
1da177e4 1708 return -EINVAL;
4ebf0ae2
DM
1709 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1710 req->tp_frame_nr))
1da177e4 1711 return -EINVAL;
1da177e4
LT
1712
1713 err = -ENOMEM;
4ebf0ae2
DM
1714 order = get_order(req->tp_block_size);
1715 pg_vec = alloc_pg_vec(req, order);
1716 if (unlikely(!pg_vec))
1da177e4 1717 goto out;
1da177e4
LT
1718
1719 l = 0;
4ebf0ae2 1720 for (i = 0; i < req->tp_block_nr; i++) {
1da177e4
LT
1721 char *ptr = pg_vec[i];
1722 struct tpacket_hdr *header;
1723 int k;
1724
4ebf0ae2
DM
1725 for (k = 0; k < po->frames_per_block; k++) {
1726 header = (struct tpacket_hdr *) ptr;
1da177e4
LT
1727 header->tp_status = TP_STATUS_KERNEL;
1728 ptr += req->tp_frame_size;
1729 }
1730 }
1731 /* Done */
1732 } else {
4ebf0ae2 1733 if (unlikely(req->tp_frame_nr))
1da177e4
LT
1734 return -EINVAL;
1735 }
1736
1737 lock_sock(sk);
1738
1739 /* Detach socket from network */
1740 spin_lock(&po->bind_lock);
1741 was_running = po->running;
1742 num = po->num;
1743 if (was_running) {
1744 __dev_remove_pack(&po->prot_hook);
1745 po->num = 0;
1746 po->running = 0;
1747 __sock_put(sk);
1748 }
1749 spin_unlock(&po->bind_lock);
1ce4f28b 1750
1da177e4
LT
1751 synchronize_net();
1752
1753 err = -EBUSY;
1754 if (closing || atomic_read(&po->mapped) == 0) {
1755 err = 0;
1756#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1757
1758 spin_lock_bh(&sk->sk_receive_queue.lock);
1759 pg_vec = XC(po->pg_vec, pg_vec);
4ebf0ae2 1760 po->frame_max = (req->tp_frame_nr - 1);
1da177e4
LT
1761 po->head = 0;
1762 po->frame_size = req->tp_frame_size;
1763 spin_unlock_bh(&sk->sk_receive_queue.lock);
1764
1765 order = XC(po->pg_vec_order, order);
1766 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1767
1768 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1769 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1770 skb_queue_purge(&sk->sk_receive_queue);
1771#undef XC
1772 if (atomic_read(&po->mapped))
1773 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1774 }
1775
1776 spin_lock(&po->bind_lock);
1777 if (was_running && !po->running) {
1778 sock_hold(sk);
1779 po->running = 1;
1780 po->num = num;
1781 dev_add_pack(&po->prot_hook);
1782 }
1783 spin_unlock(&po->bind_lock);
1784
1785 release_sock(sk);
1786
1da177e4
LT
1787 if (pg_vec)
1788 free_pg_vec(pg_vec, order, req->tp_block_nr);
1789out:
1790 return err;
1791}
1792
1793static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1794{
1795 struct sock *sk = sock->sk;
1796 struct packet_sock *po = pkt_sk(sk);
1797 unsigned long size;
1798 unsigned long start;
1799 int err = -EINVAL;
1800 int i;
1801
1802 if (vma->vm_pgoff)
1803 return -EINVAL;
1804
1805 size = vma->vm_end - vma->vm_start;
1806
1807 lock_sock(sk);
1808 if (po->pg_vec == NULL)
1809 goto out;
1810 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1811 goto out;
1812
1da177e4 1813 start = vma->vm_start;
4ebf0ae2
DM
1814 for (i = 0; i < po->pg_vec_len; i++) {
1815 struct page *page = virt_to_page(po->pg_vec[i]);
1816 int pg_num;
1817
1818 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1819 err = vm_insert_page(vma, start, page);
1820 if (unlikely(err))
1821 goto out;
1822 start += PAGE_SIZE;
1823 }
1da177e4 1824 }
4ebf0ae2 1825 atomic_inc(&po->mapped);
1da177e4
LT
1826 vma->vm_ops = &packet_mmap_ops;
1827 err = 0;
1828
1829out:
1830 release_sock(sk);
1831 return err;
1832}
1833#endif
1834
1835
1836#ifdef CONFIG_SOCK_PACKET
90ddc4f0 1837static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
1838 .family = PF_PACKET,
1839 .owner = THIS_MODULE,
1840 .release = packet_release,
1841 .bind = packet_bind_spkt,
1842 .connect = sock_no_connect,
1843 .socketpair = sock_no_socketpair,
1844 .accept = sock_no_accept,
1845 .getname = packet_getname_spkt,
1846 .poll = datagram_poll,
1847 .ioctl = packet_ioctl,
1848 .listen = sock_no_listen,
1849 .shutdown = sock_no_shutdown,
1850 .setsockopt = sock_no_setsockopt,
1851 .getsockopt = sock_no_getsockopt,
1852 .sendmsg = packet_sendmsg_spkt,
1853 .recvmsg = packet_recvmsg,
1854 .mmap = sock_no_mmap,
1855 .sendpage = sock_no_sendpage,
1856};
1857#endif
1858
90ddc4f0 1859static const struct proto_ops packet_ops = {
1da177e4
LT
1860 .family = PF_PACKET,
1861 .owner = THIS_MODULE,
1862 .release = packet_release,
1863 .bind = packet_bind,
1864 .connect = sock_no_connect,
1865 .socketpair = sock_no_socketpair,
1866 .accept = sock_no_accept,
1ce4f28b 1867 .getname = packet_getname,
1da177e4
LT
1868 .poll = packet_poll,
1869 .ioctl = packet_ioctl,
1870 .listen = sock_no_listen,
1871 .shutdown = sock_no_shutdown,
1872 .setsockopt = packet_setsockopt,
1873 .getsockopt = packet_getsockopt,
1874 .sendmsg = packet_sendmsg,
1875 .recvmsg = packet_recvmsg,
1876 .mmap = packet_mmap,
1877 .sendpage = sock_no_sendpage,
1878};
1879
1880static struct net_proto_family packet_family_ops = {
1881 .family = PF_PACKET,
1882 .create = packet_create,
1883 .owner = THIS_MODULE,
1884};
1885
1886static struct notifier_block packet_netdev_notifier = {
1887 .notifier_call =packet_notifier,
1888};
1889
1890#ifdef CONFIG_PROC_FS
1891static inline struct sock *packet_seq_idx(loff_t off)
1892{
1893 struct sock *s;
1894 struct hlist_node *node;
1895
1896 sk_for_each(s, node, &packet_sklist) {
1897 if (!off--)
1898 return s;
1899 }
1900 return NULL;
1901}
1902
1903static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1904{
1905 read_lock(&packet_sklist_lock);
1906 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1907}
1908
1909static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1910{
1911 ++*pos;
1ce4f28b
YH
1912 return (v == SEQ_START_TOKEN)
1913 ? sk_head(&packet_sklist)
1da177e4
LT
1914 : sk_next((struct sock*)v) ;
1915}
1916
1917static void packet_seq_stop(struct seq_file *seq, void *v)
1918{
1ce4f28b 1919 read_unlock(&packet_sklist_lock);
1da177e4
LT
1920}
1921
1ce4f28b 1922static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
1923{
1924 if (v == SEQ_START_TOKEN)
1925 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1926 else {
1927 struct sock *s = v;
1928 const struct packet_sock *po = pkt_sk(s);
1929
1930 seq_printf(seq,
1931 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1932 s,
1933 atomic_read(&s->sk_refcnt),
1934 s->sk_type,
1935 ntohs(po->num),
1936 po->ifindex,
1937 po->running,
1938 atomic_read(&s->sk_rmem_alloc),
1939 sock_i_uid(s),
1940 sock_i_ino(s) );
1941 }
1942
1943 return 0;
1944}
1945
1946static struct seq_operations packet_seq_ops = {
1947 .start = packet_seq_start,
1948 .next = packet_seq_next,
1949 .stop = packet_seq_stop,
1950 .show = packet_seq_show,
1951};
1952
1953static int packet_seq_open(struct inode *inode, struct file *file)
1954{
1955 return seq_open(file, &packet_seq_ops);
1956}
1957
da7071d7 1958static const struct file_operations packet_seq_fops = {
1da177e4
LT
1959 .owner = THIS_MODULE,
1960 .open = packet_seq_open,
1961 .read = seq_read,
1962 .llseek = seq_lseek,
1963 .release = seq_release,
1964};
1965
1966#endif
1967
1968static void __exit packet_exit(void)
1969{
1970 proc_net_remove("packet");
1971 unregister_netdevice_notifier(&packet_netdev_notifier);
1972 sock_unregister(PF_PACKET);
1973 proto_unregister(&packet_proto);
1974}
1975
1976static int __init packet_init(void)
1977{
1978 int rc = proto_register(&packet_proto, 0);
1979
1980 if (rc != 0)
1981 goto out;
1982
1983 sock_register(&packet_family_ops);
1984 register_netdevice_notifier(&packet_netdev_notifier);
1985 proc_net_fops_create("packet", 0, &packet_seq_fops);
1986out:
1987 return rc;
1988}
1989
1990module_init(packet_init);
1991module_exit(packet_exit);
1992MODULE_LICENSE("GPL");
1993MODULE_ALIAS_NETPROTO(PF_PACKET);
This page took 0.332676 seconds and 5 git commands to generate.