tg3: Enable GRO by default.
[deliverable/linux.git] / net / ipv4 / ipmr.c
... / ...
CommitLineData
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
32#include <linux/capability.h>
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
49#include <linux/if_ether.h>
50#include <linux/slab.h>
51#include <net/net_namespace.h>
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
55#include <net/route.h>
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
65#include <net/netlink.h>
66#include <net/fib_rules.h>
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
72struct mr_table {
73 struct list_head list;
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
123static struct kmem_cache *mrt_cachep __read_mostly;
124
125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
219static struct fib_rules_ops ipmr_rules_ops_template = {
220 .family = FIB_RULES_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
306
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
314 write_pnet(&mrt->net, net);
315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
334
335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
339 struct net *net = dev_net(dev);
340
341 dev_close(dev);
342
343 dev = __dev_get_by_name(net, "tunl0");
344 if (dev) {
345 const struct net_device_ops *ops = dev->netdev_ops;
346 struct ifreq ifr;
347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
365 }
366}
367
368static
369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
370{
371 struct net_device *dev;
372
373 dev = __dev_get_by_name(net, "tunl0");
374
375 if (dev) {
376 const struct net_device_ops *ops = dev->netdev_ops;
377 int err;
378 struct ifreq ifr;
379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
390
391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
399
400 dev = NULL;
401
402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
404 dev->flags |= IFF_MULTICAST;
405
406 in_dev = __in_dev_get_rtnl(dev);
407 if (in_dev == NULL)
408 goto failure;
409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
412
413 if (dev_open(dev))
414 goto failure;
415 dev_hold(dev);
416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
432{
433 struct net *net = dev_net(dev);
434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
445
446 read_lock(&mrt_lock);
447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
452 return NETDEV_TX_OK;
453}
454
455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
463 dev->flags = IFF_NOARP;
464 dev->netdev_ops = &reg_vif_netdev_ops,
465 dev->destructor = free_netdev;
466 dev->features |= NETIF_F_NETNS_LOCAL;
467}
468
469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
470{
471 struct net_device *dev;
472 struct in_device *in_dev;
473 char name[IFNAMSIZ];
474
475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
479
480 dev = alloc_netdev(0, name, reg_vif_setup);
481
482 if (dev == NULL)
483 return NULL;
484
485 dev_net_set(dev, net);
486
487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
496 goto failure;
497 }
498
499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
502
503 if (dev_open(dev))
504 goto failure;
505
506 dev_hold(dev);
507
508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
522 * @notify: Set to 1, if the caller is a notifier_call
523 */
524
525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
526 struct list_head *head)
527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
532 if (vifi < 0 || vifi >= mrt->maxvif)
533 return -EADDRNOTAVAIL;
534
535 v = &mrt->vif_table[vifi];
536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
549#endif
550
551 if (vifi+1 == mrt->maxvif) {
552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
554 if (VIF_EXISTS(mrt, tmp))
555 break;
556 }
557 mrt->maxvif = tmp+1;
558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
566 ip_rt_multicast_event(in_dev);
567 }
568
569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
570 unregister_netdevice_queue(dev, head);
571
572 dev_put(dev);
573 return 0;
574}
575
576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
578 kmem_cache_free(mrt_cachep, c);
579}
580
581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
586{
587 struct net *net = read_pnet(&mrt->net);
588 struct sk_buff *skb;
589 struct nlmsgerr *e;
590
591 atomic_dec(&mrt->cache_resolve_queue_len);
592
593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
594 if (ip_hdr(skb)->version == 0) {
595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
602
603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
604 } else
605 kfree_skb(skb);
606 }
607
608 ipmr_cache_free(c);
609}
610
611
612/* Timer process for the unresolved queue. */
613
614static void ipmr_expire_process(unsigned long arg)
615{
616 struct mr_table *mrt = (struct mr_table *)arg;
617 unsigned long now;
618 unsigned long expires;
619 struct mfc_cache *c, *next;
620
621 if (!spin_trylock(&mfc_unres_lock)) {
622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
623 return;
624 }
625
626 if (list_empty(&mrt->mfc_unres_queue))
627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
631
632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
637 continue;
638 }
639
640 list_del(&c->list);
641 ipmr_destroy_unres(mrt, c);
642 }
643
644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654 unsigned char *ttls)
655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
664 ttls[vifi] && ttls[vifi] < 255) {
665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
676{
677 int vifi = vifc->vifc_vifi;
678 struct vif_device *v = &mrt->vif_table[vifi];
679 struct net_device *dev;
680 struct in_device *in_dev;
681 int err;
682
683 /* Is vif busy ? */
684 if (VIF_EXISTS(mrt, vifi))
685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
694 if (mrt->mroute_reg_vif_num >= 0)
695 return -EADDRINUSE;
696 dev = ipmr_reg_vif(net, mrt);
697 if (!dev)
698 return -ENOBUFS;
699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
702 dev_put(dev);
703 return err;
704 }
705 break;
706#endif
707 case VIFF_TUNNEL:
708 dev = ipmr_new_tunnel(net, vifc);
709 if (!dev)
710 return -ENOBUFS;
711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
714 dev_put(dev);
715 return err;
716 }
717 break;
718
719 case VIFF_USE_IFINDEX:
720 case 0:
721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
730 if (!dev)
731 return -EADDRNOTAVAIL;
732 err = dev_set_allmulti(dev, 1);
733 if (err) {
734 dev_put(dev);
735 return err;
736 }
737 break;
738 default:
739 return -EINVAL;
740 }
741
742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
744 return -EADDRNOTAVAIL;
745 }
746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
758 v->threshold = vifc->vifc_threshold;
759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
769 v->dev = dev;
770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
772 mrt->mroute_reg_vif_num = vifi;
773#endif
774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
781 __be32 origin,
782 __be32 mcastgrp)
783{
784 int line = MFC_HASH(mcastgrp, origin);
785 struct mfc_cache *c;
786
787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
790 }
791 return NULL;
792}
793
794/*
795 * Allocate a multicast cache entry
796 */
797static struct mfc_cache *ipmr_cache_alloc(void)
798{
799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
801 return NULL;
802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
806static struct mfc_cache *ipmr_cache_alloc_unres(void)
807{
808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
810 return NULL;
811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
819
820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
822{
823 struct sk_buff *skb;
824 struct nlmsgerr *e;
825
826 /*
827 * Play the pending entries through our router
828 */
829
830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
831 if (ip_hdr(skb)->version == 0) {
832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
834 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
844 }
845
846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
847 } else
848 ip_mr_forward(net, mrt, skb, c, 0);
849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
858
859static int ipmr_cache_report(struct mr_table *mrt,
860 struct sk_buff *pkt, vifi_t vifi, int assert)
861{
862 struct sk_buff *skb;
863 const int ihl = ip_hdrlen(pkt);
864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
875 if (!skb)
876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
887 skb_reset_transport_header(skb);
888 msg = (struct igmpmsg *)skb_network_header(skb);
889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
892 msg->im_vif = mrt->mroute_reg_vif_num;
893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
896 } else
897#endif
898 {
899
900 /*
901 * Copy the IP header
902 */
903
904 skb->network_header = skb->tail;
905 skb_put(skb, ihl);
906 skb_copy_to_linear_data(skb, pkt->data, ihl);
907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
909 msg->im_vif = vifi;
910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
911
912 /*
913 * Add our header
914 */
915
916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
921 skb->transport_header = skb->network_header;
922 }
923
924 if (mrt->mroute_sk == NULL) {
925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
933 if (ret < 0) {
934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
945
946static int
947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
948{
949 bool found = false;
950 int err;
951 struct mfc_cache *c;
952 const struct iphdr *iph = ip_hdr(skb);
953
954 spin_lock_bh(&mfc_unres_lock);
955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
956 if (c->mfc_mcastgrp == iph->daddr &&
957 c->mfc_origin == iph->saddr) {
958 found = true;
959 break;
960 }
961 }
962
963 if (!found) {
964 /*
965 * Create a new entry if allowable
966 */
967
968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
969 (c = ipmr_cache_alloc_unres()) == NULL) {
970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
982
983 /*
984 * Reflect first query at mrouted.
985 */
986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
987 if (err < 0) {
988 /* If the report failed throw the cache entry
989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
993 ipmr_cache_free(c);
994 kfree_skb(skb);
995 return err;
996 }
997
998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
1000
1001 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1002 }
1003
1004 /*
1005 * See if we can append the packet
1006 */
1007 if (c->mfc_un.unres.unresolved.qlen>3) {
1008 kfree_skb(skb);
1009 err = -ENOBUFS;
1010 } else {
1011 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1012 err = 0;
1013 }
1014
1015 spin_unlock_bh(&mfc_unres_lock);
1016 return err;
1017}
1018
1019/*
1020 * MFC cache manipulation by user space mroute daemon
1021 */
1022
1023static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1024{
1025 int line;
1026 struct mfc_cache *c, *next;
1027
1028 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1029
1030 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1031 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1032 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1033 write_lock_bh(&mrt_lock);
1034 list_del(&c->list);
1035 write_unlock_bh(&mrt_lock);
1036
1037 ipmr_cache_free(c);
1038 return 0;
1039 }
1040 }
1041 return -ENOENT;
1042}
1043
1044static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1045 struct mfcctl *mfc, int mrtsock)
1046{
1047 bool found = false;
1048 int line;
1049 struct mfc_cache *uc, *c;
1050
1051 if (mfc->mfcc_parent >= MAXVIFS)
1052 return -ENFILE;
1053
1054 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1055
1056 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1057 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1058 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1059 found = true;
1060 break;
1061 }
1062 }
1063
1064 if (found) {
1065 write_lock_bh(&mrt_lock);
1066 c->mfc_parent = mfc->mfcc_parent;
1067 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1068 if (!mrtsock)
1069 c->mfc_flags |= MFC_STATIC;
1070 write_unlock_bh(&mrt_lock);
1071 return 0;
1072 }
1073
1074 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1075 return -EINVAL;
1076
1077 c = ipmr_cache_alloc();
1078 if (c == NULL)
1079 return -ENOMEM;
1080
1081 c->mfc_origin = mfc->mfcc_origin.s_addr;
1082 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1083 c->mfc_parent = mfc->mfcc_parent;
1084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1085 if (!mrtsock)
1086 c->mfc_flags |= MFC_STATIC;
1087
1088 write_lock_bh(&mrt_lock);
1089 list_add(&c->list, &mrt->mfc_cache_array[line]);
1090 write_unlock_bh(&mrt_lock);
1091
1092 /*
1093 * Check to see if we resolved a queued list. If so we
1094 * need to send on the frames and tidy up.
1095 */
1096 found = false;
1097 spin_lock_bh(&mfc_unres_lock);
1098 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1099 if (uc->mfc_origin == c->mfc_origin &&
1100 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1101 list_del(&uc->list);
1102 atomic_dec(&mrt->cache_resolve_queue_len);
1103 found = true;
1104 break;
1105 }
1106 }
1107 if (list_empty(&mrt->mfc_unres_queue))
1108 del_timer(&mrt->ipmr_expire_timer);
1109 spin_unlock_bh(&mfc_unres_lock);
1110
1111 if (found) {
1112 ipmr_cache_resolve(net, mrt, uc, c);
1113 ipmr_cache_free(uc);
1114 }
1115 return 0;
1116}
1117
1118/*
1119 * Close the multicast socket, and clear the vif tables etc
1120 */
1121
1122static void mroute_clean_tables(struct mr_table *mrt)
1123{
1124 int i;
1125 LIST_HEAD(list);
1126 struct mfc_cache *c, *next;
1127
1128 /*
1129 * Shut down all active vif entries
1130 */
1131 for (i = 0; i < mrt->maxvif; i++) {
1132 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1133 vif_delete(mrt, i, 0, &list);
1134 }
1135 unregister_netdevice_many(&list);
1136
1137 /*
1138 * Wipe the cache
1139 */
1140 for (i = 0; i < MFC_LINES; i++) {
1141 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1142 if (c->mfc_flags&MFC_STATIC)
1143 continue;
1144 write_lock_bh(&mrt_lock);
1145 list_del(&c->list);
1146 write_unlock_bh(&mrt_lock);
1147
1148 ipmr_cache_free(c);
1149 }
1150 }
1151
1152 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1153 spin_lock_bh(&mfc_unres_lock);
1154 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1155 list_del(&c->list);
1156 ipmr_destroy_unres(mrt, c);
1157 }
1158 spin_unlock_bh(&mfc_unres_lock);
1159 }
1160}
1161
1162static void mrtsock_destruct(struct sock *sk)
1163{
1164 struct net *net = sock_net(sk);
1165 struct mr_table *mrt;
1166
1167 rtnl_lock();
1168 ipmr_for_each_table(mrt, net) {
1169 if (sk == mrt->mroute_sk) {
1170 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1171
1172 write_lock_bh(&mrt_lock);
1173 mrt->mroute_sk = NULL;
1174 write_unlock_bh(&mrt_lock);
1175
1176 mroute_clean_tables(mrt);
1177 }
1178 }
1179 rtnl_unlock();
1180}
1181
1182/*
1183 * Socket options and virtual interface manipulation. The whole
1184 * virtual interface system is a complete heap, but unfortunately
1185 * that's how BSD mrouted happens to think. Maybe one day with a proper
1186 * MOSPF/PIM router set up we can clean this up.
1187 */
1188
1189int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1190{
1191 int ret;
1192 struct vifctl vif;
1193 struct mfcctl mfc;
1194 struct net *net = sock_net(sk);
1195 struct mr_table *mrt;
1196
1197 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1198 if (mrt == NULL)
1199 return -ENOENT;
1200
1201 if (optname != MRT_INIT) {
1202 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1203 return -EACCES;
1204 }
1205
1206 switch (optname) {
1207 case MRT_INIT:
1208 if (sk->sk_type != SOCK_RAW ||
1209 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1210 return -EOPNOTSUPP;
1211 if (optlen != sizeof(int))
1212 return -ENOPROTOOPT;
1213
1214 rtnl_lock();
1215 if (mrt->mroute_sk) {
1216 rtnl_unlock();
1217 return -EADDRINUSE;
1218 }
1219
1220 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1221 if (ret == 0) {
1222 write_lock_bh(&mrt_lock);
1223 mrt->mroute_sk = sk;
1224 write_unlock_bh(&mrt_lock);
1225
1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1227 }
1228 rtnl_unlock();
1229 return ret;
1230 case MRT_DONE:
1231 if (sk != mrt->mroute_sk)
1232 return -EACCES;
1233 return ip_ra_control(sk, 0, NULL);
1234 case MRT_ADD_VIF:
1235 case MRT_DEL_VIF:
1236 if (optlen != sizeof(vif))
1237 return -EINVAL;
1238 if (copy_from_user(&vif, optval, sizeof(vif)))
1239 return -EFAULT;
1240 if (vif.vifc_vifi >= MAXVIFS)
1241 return -ENFILE;
1242 rtnl_lock();
1243 if (optname == MRT_ADD_VIF) {
1244 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1245 } else {
1246 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1247 }
1248 rtnl_unlock();
1249 return ret;
1250
1251 /*
1252 * Manipulate the forwarding caches. These live
1253 * in a sort of kernel/user symbiosis.
1254 */
1255 case MRT_ADD_MFC:
1256 case MRT_DEL_MFC:
1257 if (optlen != sizeof(mfc))
1258 return -EINVAL;
1259 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1260 return -EFAULT;
1261 rtnl_lock();
1262 if (optname == MRT_DEL_MFC)
1263 ret = ipmr_mfc_delete(mrt, &mfc);
1264 else
1265 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1266 rtnl_unlock();
1267 return ret;
1268 /*
1269 * Control PIM assert.
1270 */
1271 case MRT_ASSERT:
1272 {
1273 int v;
1274 if (get_user(v,(int __user *)optval))
1275 return -EFAULT;
1276 mrt->mroute_do_assert = (v) ? 1 : 0;
1277 return 0;
1278 }
1279#ifdef CONFIG_IP_PIMSM
1280 case MRT_PIM:
1281 {
1282 int v;
1283
1284 if (get_user(v,(int __user *)optval))
1285 return -EFAULT;
1286 v = (v) ? 1 : 0;
1287
1288 rtnl_lock();
1289 ret = 0;
1290 if (v != mrt->mroute_do_pim) {
1291 mrt->mroute_do_pim = v;
1292 mrt->mroute_do_assert = v;
1293 }
1294 rtnl_unlock();
1295 return ret;
1296 }
1297#endif
1298#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1299 case MRT_TABLE:
1300 {
1301 u32 v;
1302
1303 if (optlen != sizeof(u32))
1304 return -EINVAL;
1305 if (get_user(v, (u32 __user *)optval))
1306 return -EFAULT;
1307 if (sk == mrt->mroute_sk)
1308 return -EBUSY;
1309
1310 rtnl_lock();
1311 ret = 0;
1312 if (!ipmr_new_table(net, v))
1313 ret = -ENOMEM;
1314 raw_sk(sk)->ipmr_table = v;
1315 rtnl_unlock();
1316 return ret;
1317 }
1318#endif
1319 /*
1320 * Spurious command, or MRT_VERSION which you cannot
1321 * set.
1322 */
1323 default:
1324 return -ENOPROTOOPT;
1325 }
1326}
1327
1328/*
1329 * Getsock opt support for the multicast routing system.
1330 */
1331
1332int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1333{
1334 int olr;
1335 int val;
1336 struct net *net = sock_net(sk);
1337 struct mr_table *mrt;
1338
1339 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1340 if (mrt == NULL)
1341 return -ENOENT;
1342
1343 if (optname != MRT_VERSION &&
1344#ifdef CONFIG_IP_PIMSM
1345 optname!=MRT_PIM &&
1346#endif
1347 optname!=MRT_ASSERT)
1348 return -ENOPROTOOPT;
1349
1350 if (get_user(olr, optlen))
1351 return -EFAULT;
1352
1353 olr = min_t(unsigned int, olr, sizeof(int));
1354 if (olr < 0)
1355 return -EINVAL;
1356
1357 if (put_user(olr, optlen))
1358 return -EFAULT;
1359 if (optname == MRT_VERSION)
1360 val = 0x0305;
1361#ifdef CONFIG_IP_PIMSM
1362 else if (optname == MRT_PIM)
1363 val = mrt->mroute_do_pim;
1364#endif
1365 else
1366 val = mrt->mroute_do_assert;
1367 if (copy_to_user(optval, &val, olr))
1368 return -EFAULT;
1369 return 0;
1370}
1371
1372/*
1373 * The IP multicast ioctl support routines.
1374 */
1375
1376int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1377{
1378 struct sioc_sg_req sr;
1379 struct sioc_vif_req vr;
1380 struct vif_device *vif;
1381 struct mfc_cache *c;
1382 struct net *net = sock_net(sk);
1383 struct mr_table *mrt;
1384
1385 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1386 if (mrt == NULL)
1387 return -ENOENT;
1388
1389 switch (cmd) {
1390 case SIOCGETVIFCNT:
1391 if (copy_from_user(&vr, arg, sizeof(vr)))
1392 return -EFAULT;
1393 if (vr.vifi >= mrt->maxvif)
1394 return -EINVAL;
1395 read_lock(&mrt_lock);
1396 vif = &mrt->vif_table[vr.vifi];
1397 if (VIF_EXISTS(mrt, vr.vifi)) {
1398 vr.icount = vif->pkt_in;
1399 vr.ocount = vif->pkt_out;
1400 vr.ibytes = vif->bytes_in;
1401 vr.obytes = vif->bytes_out;
1402 read_unlock(&mrt_lock);
1403
1404 if (copy_to_user(arg, &vr, sizeof(vr)))
1405 return -EFAULT;
1406 return 0;
1407 }
1408 read_unlock(&mrt_lock);
1409 return -EADDRNOTAVAIL;
1410 case SIOCGETSGCNT:
1411 if (copy_from_user(&sr, arg, sizeof(sr)))
1412 return -EFAULT;
1413
1414 read_lock(&mrt_lock);
1415 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1416 if (c) {
1417 sr.pktcnt = c->mfc_un.res.pkt;
1418 sr.bytecnt = c->mfc_un.res.bytes;
1419 sr.wrong_if = c->mfc_un.res.wrong_if;
1420 read_unlock(&mrt_lock);
1421
1422 if (copy_to_user(arg, &sr, sizeof(sr)))
1423 return -EFAULT;
1424 return 0;
1425 }
1426 read_unlock(&mrt_lock);
1427 return -EADDRNOTAVAIL;
1428 default:
1429 return -ENOIOCTLCMD;
1430 }
1431}
1432
1433
1434static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1435{
1436 struct net_device *dev = ptr;
1437 struct net *net = dev_net(dev);
1438 struct mr_table *mrt;
1439 struct vif_device *v;
1440 int ct;
1441 LIST_HEAD(list);
1442
1443 if (event != NETDEV_UNREGISTER)
1444 return NOTIFY_DONE;
1445
1446 ipmr_for_each_table(mrt, net) {
1447 v = &mrt->vif_table[0];
1448 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1449 if (v->dev == dev)
1450 vif_delete(mrt, ct, 1, &list);
1451 }
1452 }
1453 unregister_netdevice_many(&list);
1454 return NOTIFY_DONE;
1455}
1456
1457
1458static struct notifier_block ip_mr_notifier = {
1459 .notifier_call = ipmr_device_event,
1460};
1461
1462/*
1463 * Encapsulate a packet by attaching a valid IPIP header to it.
1464 * This avoids tunnel drivers and other mess and gives us the speed so
1465 * important for multicast video.
1466 */
1467
1468static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1469{
1470 struct iphdr *iph;
1471 struct iphdr *old_iph = ip_hdr(skb);
1472
1473 skb_push(skb, sizeof(struct iphdr));
1474 skb->transport_header = skb->network_header;
1475 skb_reset_network_header(skb);
1476 iph = ip_hdr(skb);
1477
1478 iph->version = 4;
1479 iph->tos = old_iph->tos;
1480 iph->ttl = old_iph->ttl;
1481 iph->frag_off = 0;
1482 iph->daddr = daddr;
1483 iph->saddr = saddr;
1484 iph->protocol = IPPROTO_IPIP;
1485 iph->ihl = 5;
1486 iph->tot_len = htons(skb->len);
1487 ip_select_ident(iph, skb_dst(skb), NULL);
1488 ip_send_check(iph);
1489
1490 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1491 nf_reset(skb);
1492}
1493
1494static inline int ipmr_forward_finish(struct sk_buff *skb)
1495{
1496 struct ip_options * opt = &(IPCB(skb)->opt);
1497
1498 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1499
1500 if (unlikely(opt->optlen))
1501 ip_forward_options(skb);
1502
1503 return dst_output(skb);
1504}
1505
1506/*
1507 * Processing handlers for ipmr_forward
1508 */
1509
1510static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1511 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1512{
1513 const struct iphdr *iph = ip_hdr(skb);
1514 struct vif_device *vif = &mrt->vif_table[vifi];
1515 struct net_device *dev;
1516 struct rtable *rt;
1517 int encap = 0;
1518
1519 if (vif->dev == NULL)
1520 goto out_free;
1521
1522#ifdef CONFIG_IP_PIMSM
1523 if (vif->flags & VIFF_REGISTER) {
1524 vif->pkt_out++;
1525 vif->bytes_out += skb->len;
1526 vif->dev->stats.tx_bytes += skb->len;
1527 vif->dev->stats.tx_packets++;
1528 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1529 goto out_free;
1530 }
1531#endif
1532
1533 if (vif->flags&VIFF_TUNNEL) {
1534 struct flowi fl = { .oif = vif->link,
1535 .nl_u = { .ip4_u =
1536 { .daddr = vif->remote,
1537 .saddr = vif->local,
1538 .tos = RT_TOS(iph->tos) } },
1539 .proto = IPPROTO_IPIP };
1540 if (ip_route_output_key(net, &rt, &fl))
1541 goto out_free;
1542 encap = sizeof(struct iphdr);
1543 } else {
1544 struct flowi fl = { .oif = vif->link,
1545 .nl_u = { .ip4_u =
1546 { .daddr = iph->daddr,
1547 .tos = RT_TOS(iph->tos) } },
1548 .proto = IPPROTO_IPIP };
1549 if (ip_route_output_key(net, &rt, &fl))
1550 goto out_free;
1551 }
1552
1553 dev = rt->u.dst.dev;
1554
1555 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1556 /* Do not fragment multicasts. Alas, IPv4 does not
1557 allow to send ICMP, so that packets will disappear
1558 to blackhole.
1559 */
1560
1561 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1562 ip_rt_put(rt);
1563 goto out_free;
1564 }
1565
1566 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1567
1568 if (skb_cow(skb, encap)) {
1569 ip_rt_put(rt);
1570 goto out_free;
1571 }
1572
1573 vif->pkt_out++;
1574 vif->bytes_out += skb->len;
1575
1576 skb_dst_drop(skb);
1577 skb_dst_set(skb, &rt->u.dst);
1578 ip_decrease_ttl(ip_hdr(skb));
1579
1580 /* FIXME: forward and output firewalls used to be called here.
1581 * What do we do with netfilter? -- RR */
1582 if (vif->flags & VIFF_TUNNEL) {
1583 ip_encap(skb, vif->local, vif->remote);
1584 /* FIXME: extra output firewall step used to be here. --RR */
1585 vif->dev->stats.tx_packets++;
1586 vif->dev->stats.tx_bytes += skb->len;
1587 }
1588
1589 IPCB(skb)->flags |= IPSKB_FORWARDED;
1590
1591 /*
1592 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1593 * not only before forwarding, but after forwarding on all output
1594 * interfaces. It is clear, if mrouter runs a multicasting
1595 * program, it should receive packets not depending to what interface
1596 * program is joined.
1597 * If we will not make it, the program will have to join on all
1598 * interfaces. On the other hand, multihoming host (or router, but
1599 * not mrouter) cannot join to more than one interface - it will
1600 * result in receiving multiple packets.
1601 */
1602 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1603 ipmr_forward_finish);
1604 return;
1605
1606out_free:
1607 kfree_skb(skb);
1608 return;
1609}
1610
1611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1612{
1613 int ct;
1614
1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
1617 break;
1618 }
1619 return ct;
1620}
1621
1622/* "local" means that we should preserve one skb (for local delivery) */
1623
1624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
1627{
1628 int psend = -1;
1629 int vif, ct;
1630
1631 vif = cache->mfc_parent;
1632 cache->mfc_un.res.pkt++;
1633 cache->mfc_un.res.bytes += skb->len;
1634
1635 /*
1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1637 */
1638 if (mrt->vif_table[vif].dev != skb->dev) {
1639 int true_vifi;
1640
1641 if (skb_rtable(skb)->fl.iif == 0) {
1642 /* It is our own packet, looped back.
1643 Very complicated situation...
1644
1645 The best workaround until routing daemons will be
1646 fixed is not to redistribute packet, if it was
1647 send through wrong interface. It means, that
1648 multicast applications WILL NOT work for
1649 (S,G), which have default multicast route pointing
1650 to wrong oif. In any case, it is not a good
1651 idea to use multicasting applications on router.
1652 */
1653 goto dont_forward;
1654 }
1655
1656 cache->mfc_un.res.wrong_if++;
1657 true_vifi = ipmr_find_vif(mrt, skb->dev);
1658
1659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1660 /* pimsm uses asserts, when switching from RPT to SPT,
1661 so that we cannot check that packet arrived on an oif.
1662 It is bad, but otherwise we would need to move pretty
1663 large chunk of pimd to kernel. Ough... --ANK
1664 */
1665 (mrt->mroute_do_pim ||
1666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1667 time_after(jiffies,
1668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1669 cache->mfc_un.res.last_assert = jiffies;
1670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1671 }
1672 goto dont_forward;
1673 }
1674
1675 mrt->vif_table[vif].pkt_in++;
1676 mrt->vif_table[vif].bytes_in += skb->len;
1677
1678 /*
1679 * Forward the frame
1680 */
1681 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1682 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1683 if (psend != -1) {
1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685 if (skb2)
1686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
1688 }
1689 psend = ct;
1690 }
1691 }
1692 if (psend != -1) {
1693 if (local) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695 if (skb2)
1696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1697 } else {
1698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1699 return 0;
1700 }
1701 }
1702
1703dont_forward:
1704 if (!local)
1705 kfree_skb(skb);
1706 return 0;
1707}
1708
1709
1710/*
1711 * Multicast packets for forwarding arrive here
1712 */
1713
1714int ip_mr_input(struct sk_buff *skb)
1715{
1716 struct mfc_cache *cache;
1717 struct net *net = dev_net(skb->dev);
1718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1719 struct mr_table *mrt;
1720 int err;
1721
1722 /* Packet is looped back after forward, it should not be
1723 forwarded second time, but still can be delivered locally.
1724 */
1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1726 goto dont_forward;
1727
1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
1732 if (!local) {
1733 if (IPCB(skb)->opt.router_alert) {
1734 if (ip_call_ra_chain(skb))
1735 return 0;
1736 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1737 /* IGMPv1 (and broken IGMPv2 implementations sort of
1738 Cisco IOS <= 11.2(8)) do not put router alert
1739 option to IGMP packets destined to routable
1740 groups. It is very bad, because it means
1741 that we can forward NO IGMP messages.
1742 */
1743 read_lock(&mrt_lock);
1744 if (mrt->mroute_sk) {
1745 nf_reset(skb);
1746 raw_rcv(mrt->mroute_sk, skb);
1747 read_unlock(&mrt_lock);
1748 return 0;
1749 }
1750 read_unlock(&mrt_lock);
1751 }
1752 }
1753
1754 read_lock(&mrt_lock);
1755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1756
1757 /*
1758 * No usable cache entry
1759 */
1760 if (cache == NULL) {
1761 int vif;
1762
1763 if (local) {
1764 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1765 ip_local_deliver(skb);
1766 if (skb2 == NULL) {
1767 read_unlock(&mrt_lock);
1768 return -ENOBUFS;
1769 }
1770 skb = skb2;
1771 }
1772
1773 vif = ipmr_find_vif(mrt, skb->dev);
1774 if (vif >= 0) {
1775 int err = ipmr_cache_unresolved(mrt, vif, skb);
1776 read_unlock(&mrt_lock);
1777
1778 return err;
1779 }
1780 read_unlock(&mrt_lock);
1781 kfree_skb(skb);
1782 return -ENODEV;
1783 }
1784
1785 ip_mr_forward(net, mrt, skb, cache, local);
1786
1787 read_unlock(&mrt_lock);
1788
1789 if (local)
1790 return ip_local_deliver(skb);
1791
1792 return 0;
1793
1794dont_forward:
1795 if (local)
1796 return ip_local_deliver(skb);
1797 kfree_skb(skb);
1798 return 0;
1799}
1800
1801#ifdef CONFIG_IP_PIMSM
1802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
1804{
1805 struct net_device *reg_dev = NULL;
1806 struct iphdr *encap;
1807
1808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1809 /*
1810 Check that:
1811 a. packet is really destinted to a multicast group
1812 b. packet is not a NULL-REGISTER
1813 c. packet is not truncated
1814 */
1815 if (!ipv4_is_multicast(encap->daddr) ||
1816 encap->tot_len == 0 ||
1817 ntohs(encap->tot_len) + pimlen > skb->len)
1818 return 1;
1819
1820 read_lock(&mrt_lock);
1821 if (mrt->mroute_reg_vif_num >= 0)
1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1823 if (reg_dev)
1824 dev_hold(reg_dev);
1825 read_unlock(&mrt_lock);
1826
1827 if (reg_dev == NULL)
1828 return 1;
1829
1830 skb->mac_header = skb->network_header;
1831 skb_pull(skb, (u8*)encap - skb->data);
1832 skb_reset_network_header(skb);
1833 skb->dev = reg_dev;
1834 skb->protocol = htons(ETH_P_IP);
1835 skb->ip_summed = 0;
1836 skb->pkt_type = PACKET_HOST;
1837 skb_dst_drop(skb);
1838 reg_dev->stats.rx_bytes += skb->len;
1839 reg_dev->stats.rx_packets++;
1840 nf_reset(skb);
1841 netif_rx(skb);
1842 dev_put(reg_dev);
1843
1844 return 0;
1845}
1846#endif
1847
1848#ifdef CONFIG_IP_PIMSM_V1
1849/*
1850 * Handle IGMP messages of PIMv1
1851 */
1852
1853int pim_rcv_v1(struct sk_buff * skb)
1854{
1855 struct igmphdr *pim;
1856 struct net *net = dev_net(skb->dev);
1857 struct mr_table *mrt;
1858
1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1860 goto drop;
1861
1862 pim = igmp_hdr(skb);
1863
1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
1867 if (!mrt->mroute_do_pim ||
1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1869 goto drop;
1870
1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1872drop:
1873 kfree_skb(skb);
1874 }
1875 return 0;
1876}
1877#endif
1878
1879#ifdef CONFIG_IP_PIMSM_V2
1880static int pim_rcv(struct sk_buff * skb)
1881{
1882 struct pimreghdr *pim;
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1885
1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1887 goto drop;
1888
1889 pim = (struct pimreghdr *)skb_transport_header(skb);
1890 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1891 (pim->flags&PIM_NULL_REGISTER) ||
1892 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1894 goto drop;
1895
1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1900drop:
1901 kfree_skb(skb);
1902 }
1903 return 0;
1904}
1905#endif
1906
1907static int
1908ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
1909 struct rtmsg *rtm)
1910{
1911 int ct;
1912 struct rtnexthop *nhp;
1913 u8 *b = skb_tail_pointer(skb);
1914 struct rtattr *mp_head;
1915
1916 /* If cache is unresolved, don't try to parse IIF and OIF */
1917 if (c->mfc_parent > MAXVIFS)
1918 return -ENOENT;
1919
1920 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1922
1923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1924
1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1928 goto rtattr_failure;
1929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1930 nhp->rtnh_flags = 0;
1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1933 nhp->rtnh_len = sizeof(*nhp);
1934 }
1935 }
1936 mp_head->rta_type = RTA_MULTIPATH;
1937 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1938 rtm->rtm_type = RTN_MULTICAST;
1939 return 1;
1940
1941rtattr_failure:
1942 nlmsg_trim(skb, b);
1943 return -EMSGSIZE;
1944}
1945
1946int ipmr_get_route(struct net *net,
1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1948{
1949 int err;
1950 struct mr_table *mrt;
1951 struct mfc_cache *cache;
1952 struct rtable *rt = skb_rtable(skb);
1953
1954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1955 if (mrt == NULL)
1956 return -ENOENT;
1957
1958 read_lock(&mrt_lock);
1959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1960
1961 if (cache == NULL) {
1962 struct sk_buff *skb2;
1963 struct iphdr *iph;
1964 struct net_device *dev;
1965 int vif;
1966
1967 if (nowait) {
1968 read_unlock(&mrt_lock);
1969 return -EAGAIN;
1970 }
1971
1972 dev = skb->dev;
1973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1974 read_unlock(&mrt_lock);
1975 return -ENODEV;
1976 }
1977 skb2 = skb_clone(skb, GFP_ATOMIC);
1978 if (!skb2) {
1979 read_unlock(&mrt_lock);
1980 return -ENOMEM;
1981 }
1982
1983 skb_push(skb2, sizeof(struct iphdr));
1984 skb_reset_network_header(skb2);
1985 iph = ip_hdr(skb2);
1986 iph->ihl = sizeof(struct iphdr) >> 2;
1987 iph->saddr = rt->rt_src;
1988 iph->daddr = rt->rt_dst;
1989 iph->version = 0;
1990 err = ipmr_cache_unresolved(mrt, vif, skb2);
1991 read_unlock(&mrt_lock);
1992 return err;
1993 }
1994
1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1996 cache->mfc_flags |= MFC_NOTIFY;
1997 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1998 read_unlock(&mrt_lock);
1999 return err;
2000}
2001
2002#ifdef CONFIG_PROC_FS
2003/*
2004 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2005 */
2006struct ipmr_vif_iter {
2007 struct seq_net_private p;
2008 struct mr_table *mrt;
2009 int ct;
2010};
2011
2012static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2013 struct ipmr_vif_iter *iter,
2014 loff_t pos)
2015{
2016 struct mr_table *mrt = iter->mrt;
2017
2018 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2019 if (!VIF_EXISTS(mrt, iter->ct))
2020 continue;
2021 if (pos-- == 0)
2022 return &mrt->vif_table[iter->ct];
2023 }
2024 return NULL;
2025}
2026
2027static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2028 __acquires(mrt_lock)
2029{
2030 struct ipmr_vif_iter *iter = seq->private;
2031 struct net *net = seq_file_net(seq);
2032 struct mr_table *mrt;
2033
2034 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2035 if (mrt == NULL)
2036 return ERR_PTR(-ENOENT);
2037
2038 iter->mrt = mrt;
2039
2040 read_lock(&mrt_lock);
2041 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2042 : SEQ_START_TOKEN;
2043}
2044
2045static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2046{
2047 struct ipmr_vif_iter *iter = seq->private;
2048 struct net *net = seq_file_net(seq);
2049 struct mr_table *mrt = iter->mrt;
2050
2051 ++*pos;
2052 if (v == SEQ_START_TOKEN)
2053 return ipmr_vif_seq_idx(net, iter, 0);
2054
2055 while (++iter->ct < mrt->maxvif) {
2056 if (!VIF_EXISTS(mrt, iter->ct))
2057 continue;
2058 return &mrt->vif_table[iter->ct];
2059 }
2060 return NULL;
2061}
2062
2063static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2064 __releases(mrt_lock)
2065{
2066 read_unlock(&mrt_lock);
2067}
2068
2069static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2070{
2071 struct ipmr_vif_iter *iter = seq->private;
2072 struct mr_table *mrt = iter->mrt;
2073
2074 if (v == SEQ_START_TOKEN) {
2075 seq_puts(seq,
2076 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2077 } else {
2078 const struct vif_device *vif = v;
2079 const char *name = vif->dev ? vif->dev->name : "none";
2080
2081 seq_printf(seq,
2082 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2083 vif - mrt->vif_table,
2084 name, vif->bytes_in, vif->pkt_in,
2085 vif->bytes_out, vif->pkt_out,
2086 vif->flags, vif->local, vif->remote);
2087 }
2088 return 0;
2089}
2090
2091static const struct seq_operations ipmr_vif_seq_ops = {
2092 .start = ipmr_vif_seq_start,
2093 .next = ipmr_vif_seq_next,
2094 .stop = ipmr_vif_seq_stop,
2095 .show = ipmr_vif_seq_show,
2096};
2097
2098static int ipmr_vif_open(struct inode *inode, struct file *file)
2099{
2100 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2101 sizeof(struct ipmr_vif_iter));
2102}
2103
2104static const struct file_operations ipmr_vif_fops = {
2105 .owner = THIS_MODULE,
2106 .open = ipmr_vif_open,
2107 .read = seq_read,
2108 .llseek = seq_lseek,
2109 .release = seq_release_net,
2110};
2111
2112struct ipmr_mfc_iter {
2113 struct seq_net_private p;
2114 struct mr_table *mrt;
2115 struct list_head *cache;
2116 int ct;
2117};
2118
2119
2120static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2121 struct ipmr_mfc_iter *it, loff_t pos)
2122{
2123 struct mr_table *mrt = it->mrt;
2124 struct mfc_cache *mfc;
2125
2126 read_lock(&mrt_lock);
2127 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2128 it->cache = &mrt->mfc_cache_array[it->ct];
2129 list_for_each_entry(mfc, it->cache, list)
2130 if (pos-- == 0)
2131 return mfc;
2132 }
2133 read_unlock(&mrt_lock);
2134
2135 spin_lock_bh(&mfc_unres_lock);
2136 it->cache = &mrt->mfc_unres_queue;
2137 list_for_each_entry(mfc, it->cache, list)
2138 if (pos-- == 0)
2139 return mfc;
2140 spin_unlock_bh(&mfc_unres_lock);
2141
2142 it->cache = NULL;
2143 return NULL;
2144}
2145
2146
2147static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2148{
2149 struct ipmr_mfc_iter *it = seq->private;
2150 struct net *net = seq_file_net(seq);
2151 struct mr_table *mrt;
2152
2153 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2154 if (mrt == NULL)
2155 return ERR_PTR(-ENOENT);
2156
2157 it->mrt = mrt;
2158 it->cache = NULL;
2159 it->ct = 0;
2160 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2161 : SEQ_START_TOKEN;
2162}
2163
2164static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2165{
2166 struct mfc_cache *mfc = v;
2167 struct ipmr_mfc_iter *it = seq->private;
2168 struct net *net = seq_file_net(seq);
2169 struct mr_table *mrt = it->mrt;
2170
2171 ++*pos;
2172
2173 if (v == SEQ_START_TOKEN)
2174 return ipmr_mfc_seq_idx(net, seq->private, 0);
2175
2176 if (mfc->list.next != it->cache)
2177 return list_entry(mfc->list.next, struct mfc_cache, list);
2178
2179 if (it->cache == &mrt->mfc_unres_queue)
2180 goto end_of_list;
2181
2182 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2183
2184 while (++it->ct < MFC_LINES) {
2185 it->cache = &mrt->mfc_cache_array[it->ct];
2186 if (list_empty(it->cache))
2187 continue;
2188 return list_first_entry(it->cache, struct mfc_cache, list);
2189 }
2190
2191 /* exhausted cache_array, show unresolved */
2192 read_unlock(&mrt_lock);
2193 it->cache = &mrt->mfc_unres_queue;
2194 it->ct = 0;
2195
2196 spin_lock_bh(&mfc_unres_lock);
2197 if (!list_empty(it->cache))
2198 return list_first_entry(it->cache, struct mfc_cache, list);
2199
2200 end_of_list:
2201 spin_unlock_bh(&mfc_unres_lock);
2202 it->cache = NULL;
2203
2204 return NULL;
2205}
2206
2207static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2208{
2209 struct ipmr_mfc_iter *it = seq->private;
2210 struct mr_table *mrt = it->mrt;
2211
2212 if (it->cache == &mrt->mfc_unres_queue)
2213 spin_unlock_bh(&mfc_unres_lock);
2214 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2215 read_unlock(&mrt_lock);
2216}
2217
2218static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2219{
2220 int n;
2221
2222 if (v == SEQ_START_TOKEN) {
2223 seq_puts(seq,
2224 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2225 } else {
2226 const struct mfc_cache *mfc = v;
2227 const struct ipmr_mfc_iter *it = seq->private;
2228 const struct mr_table *mrt = it->mrt;
2229
2230 seq_printf(seq, "%08lX %08lX %-3hd",
2231 (unsigned long) mfc->mfc_mcastgrp,
2232 (unsigned long) mfc->mfc_origin,
2233 mfc->mfc_parent);
2234
2235 if (it->cache != &mrt->mfc_unres_queue) {
2236 seq_printf(seq, " %8lu %8lu %8lu",
2237 mfc->mfc_un.res.pkt,
2238 mfc->mfc_un.res.bytes,
2239 mfc->mfc_un.res.wrong_if);
2240 for (n = mfc->mfc_un.res.minvif;
2241 n < mfc->mfc_un.res.maxvif; n++ ) {
2242 if (VIF_EXISTS(mrt, n) &&
2243 mfc->mfc_un.res.ttls[n] < 255)
2244 seq_printf(seq,
2245 " %2d:%-3d",
2246 n, mfc->mfc_un.res.ttls[n]);
2247 }
2248 } else {
2249 /* unresolved mfc_caches don't contain
2250 * pkt, bytes and wrong_if values
2251 */
2252 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2253 }
2254 seq_putc(seq, '\n');
2255 }
2256 return 0;
2257}
2258
2259static const struct seq_operations ipmr_mfc_seq_ops = {
2260 .start = ipmr_mfc_seq_start,
2261 .next = ipmr_mfc_seq_next,
2262 .stop = ipmr_mfc_seq_stop,
2263 .show = ipmr_mfc_seq_show,
2264};
2265
2266static int ipmr_mfc_open(struct inode *inode, struct file *file)
2267{
2268 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2269 sizeof(struct ipmr_mfc_iter));
2270}
2271
2272static const struct file_operations ipmr_mfc_fops = {
2273 .owner = THIS_MODULE,
2274 .open = ipmr_mfc_open,
2275 .read = seq_read,
2276 .llseek = seq_lseek,
2277 .release = seq_release_net,
2278};
2279#endif
2280
2281#ifdef CONFIG_IP_PIMSM_V2
2282static const struct net_protocol pim_protocol = {
2283 .handler = pim_rcv,
2284 .netns_ok = 1,
2285};
2286#endif
2287
2288
2289/*
2290 * Setup for IP multicast routing
2291 */
2292static int __net_init ipmr_net_init(struct net *net)
2293{
2294 int err;
2295
2296 err = ipmr_rules_init(net);
2297 if (err < 0)
2298 goto fail;
2299
2300#ifdef CONFIG_PROC_FS
2301 err = -ENOMEM;
2302 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2303 goto proc_vif_fail;
2304 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2305 goto proc_cache_fail;
2306#endif
2307 return 0;
2308
2309#ifdef CONFIG_PROC_FS
2310proc_cache_fail:
2311 proc_net_remove(net, "ip_mr_vif");
2312proc_vif_fail:
2313 ipmr_rules_exit(net);
2314#endif
2315fail:
2316 return err;
2317}
2318
2319static void __net_exit ipmr_net_exit(struct net *net)
2320{
2321#ifdef CONFIG_PROC_FS
2322 proc_net_remove(net, "ip_mr_cache");
2323 proc_net_remove(net, "ip_mr_vif");
2324#endif
2325 ipmr_rules_exit(net);
2326}
2327
2328static struct pernet_operations ipmr_net_ops = {
2329 .init = ipmr_net_init,
2330 .exit = ipmr_net_exit,
2331};
2332
2333int __init ip_mr_init(void)
2334{
2335 int err;
2336
2337 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2338 sizeof(struct mfc_cache),
2339 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2340 NULL);
2341 if (!mrt_cachep)
2342 return -ENOMEM;
2343
2344 err = register_pernet_subsys(&ipmr_net_ops);
2345 if (err)
2346 goto reg_pernet_fail;
2347
2348 err = register_netdevice_notifier(&ip_mr_notifier);
2349 if (err)
2350 goto reg_notif_fail;
2351#ifdef CONFIG_IP_PIMSM_V2
2352 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2353 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2354 err = -EAGAIN;
2355 goto add_proto_fail;
2356 }
2357#endif
2358 return 0;
2359
2360#ifdef CONFIG_IP_PIMSM_V2
2361add_proto_fail:
2362 unregister_netdevice_notifier(&ip_mr_notifier);
2363#endif
2364reg_notif_fail:
2365 unregister_pernet_subsys(&ipmr_net_ops);
2366reg_pernet_fail:
2367 kmem_cache_destroy(mrt_cachep);
2368 return err;
2369}
This page took 0.049888 seconds and 5 git commands to generate.