Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab...
[deliverable/linux.git] / net / ipv4 / ipmr.c
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
26 *
27 */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
67
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
70 #endif
71
72 struct mr_table {
73 struct list_head list;
74 #ifdef CONFIG_NET_NS
75 struct net *net;
76 #endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89 #endif
90 };
91
92 struct ipmr_rule {
93 struct fib_rule common;
94 };
95
96 struct ipmr_result {
97 struct mr_table *mrt;
98 };
99
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104 static DEFINE_RWLOCK(mrt_lock);
105
106 /*
107 * Multicast router control variables
108 */
109
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
114
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
123 static struct kmem_cache *mrt_cachep __read_mostly;
124
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
134
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140 {
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148 }
149
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152 {
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162 }
163
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166 {
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187 }
188
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190 {
191 return 1;
192 }
193
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196 };
197
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200 {
201 return 0;
202 }
203
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206 {
207 return 1;
208 }
209
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212 {
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217 }
218
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232 };
233
234 static int __net_init ipmr_rules_init(struct net *net)
235 {
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259 err2:
260 kfree(mrt);
261 err1:
262 fib_rules_unregister(ops);
263 return err;
264 }
265
266 static void __net_exit ipmr_rules_exit(struct net *net)
267 {
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
272 kfree(mrt);
273 }
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275 }
276 #else
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281 {
282 return net->ipv4.mrt;
283 }
284
285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287 {
288 *mrt = net->ipv4.mrt;
289 return 0;
290 }
291
292 static int __net_init ipmr_rules_init(struct net *net)
293 {
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296 }
297
298 static void __net_exit ipmr_rules_exit(struct net *net)
299 {
300 kfree(net->ipv4.mrt);
301 }
302 #endif
303
304 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305 {
306 struct mr_table *mrt;
307 unsigned int i;
308
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
316 write_pnet(&mrt->net, net);
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328 #ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330 #endif
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333 #endif
334 return mrt;
335 }
336
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340 {
341 struct net *net = dev_net(dev);
342
343 dev_close(dev);
344
345 dev = __dev_get_by_name(net, "tunl0");
346 if (dev) {
347 const struct net_device_ops *ops = dev->netdev_ops;
348 struct ifreq ifr;
349 struct ip_tunnel_parm p;
350
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
362
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
366 }
367 }
368 }
369
370 static
371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
372 {
373 struct net_device *dev;
374
375 dev = __dev_get_by_name(net, "tunl0");
376
377 if (dev) {
378 const struct net_device_ops *ops = dev->netdev_ops;
379 int err;
380 struct ifreq ifr;
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
383
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
392
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
395
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
401
402 dev = NULL;
403
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
406 dev->flags |= IFF_MULTICAST;
407
408 in_dev = __in_dev_get_rtnl(dev);
409 if (in_dev == NULL)
410 goto failure;
411
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
414
415 if (dev_open(dev))
416 goto failure;
417 dev_hold(dev);
418 }
419 }
420 return dev;
421
422 failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
426
427 unregister_netdevice(dev);
428 return NULL;
429 }
430
431 #ifdef CONFIG_IP_PIMSM
432
433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
434 {
435 struct net *net = dev_net(dev);
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
445 if (err < 0)
446 return err;
447
448 read_lock(&mrt_lock);
449 dev->stats.tx_bytes += skb->len;
450 dev->stats.tx_packets++;
451 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
452 read_unlock(&mrt_lock);
453 kfree_skb(skb);
454 return NETDEV_TX_OK;
455 }
456
457 static const struct net_device_ops reg_vif_netdev_ops = {
458 .ndo_start_xmit = reg_vif_xmit,
459 };
460
461 static void reg_vif_setup(struct net_device *dev)
462 {
463 dev->type = ARPHRD_PIMREG;
464 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
465 dev->flags = IFF_NOARP;
466 dev->netdev_ops = &reg_vif_netdev_ops,
467 dev->destructor = free_netdev;
468 dev->features |= NETIF_F_NETNS_LOCAL;
469 }
470
471 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
472 {
473 struct net_device *dev;
474 struct in_device *in_dev;
475 char name[IFNAMSIZ];
476
477 if (mrt->id == RT_TABLE_DEFAULT)
478 sprintf(name, "pimreg");
479 else
480 sprintf(name, "pimreg%u", mrt->id);
481
482 dev = alloc_netdev(0, name, reg_vif_setup);
483
484 if (dev == NULL)
485 return NULL;
486
487 dev_net_set(dev, net);
488
489 if (register_netdevice(dev)) {
490 free_netdev(dev);
491 return NULL;
492 }
493 dev->iflink = 0;
494
495 rcu_read_lock();
496 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
497 rcu_read_unlock();
498 goto failure;
499 }
500
501 ipv4_devconf_setall(in_dev);
502 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
503 rcu_read_unlock();
504
505 if (dev_open(dev))
506 goto failure;
507
508 dev_hold(dev);
509
510 return dev;
511
512 failure:
513 /* allow the register to be completed before unregistering. */
514 rtnl_unlock();
515 rtnl_lock();
516
517 unregister_netdevice(dev);
518 return NULL;
519 }
520 #endif
521
522 /*
523 * Delete a VIF entry
524 * @notify: Set to 1, if the caller is a notifier_call
525 */
526
527 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
528 struct list_head *head)
529 {
530 struct vif_device *v;
531 struct net_device *dev;
532 struct in_device *in_dev;
533
534 if (vifi < 0 || vifi >= mrt->maxvif)
535 return -EADDRNOTAVAIL;
536
537 v = &mrt->vif_table[vifi];
538
539 write_lock_bh(&mrt_lock);
540 dev = v->dev;
541 v->dev = NULL;
542
543 if (!dev) {
544 write_unlock_bh(&mrt_lock);
545 return -EADDRNOTAVAIL;
546 }
547
548 #ifdef CONFIG_IP_PIMSM
549 if (vifi == mrt->mroute_reg_vif_num)
550 mrt->mroute_reg_vif_num = -1;
551 #endif
552
553 if (vifi+1 == mrt->maxvif) {
554 int tmp;
555 for (tmp=vifi-1; tmp>=0; tmp--) {
556 if (VIF_EXISTS(mrt, tmp))
557 break;
558 }
559 mrt->maxvif = tmp+1;
560 }
561
562 write_unlock_bh(&mrt_lock);
563
564 dev_set_allmulti(dev, -1);
565
566 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
567 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
568 ip_rt_multicast_event(in_dev);
569 }
570
571 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
572 unregister_netdevice_queue(dev, head);
573
574 dev_put(dev);
575 return 0;
576 }
577
578 static inline void ipmr_cache_free(struct mfc_cache *c)
579 {
580 kmem_cache_free(mrt_cachep, c);
581 }
582
583 /* Destroy an unresolved cache entry, killing queued skbs
584 and reporting error to netlink readers.
585 */
586
587 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
588 {
589 struct net *net = read_pnet(&mrt->net);
590 struct sk_buff *skb;
591 struct nlmsgerr *e;
592
593 atomic_dec(&mrt->cache_resolve_queue_len);
594
595 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
596 if (ip_hdr(skb)->version == 0) {
597 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
598 nlh->nlmsg_type = NLMSG_ERROR;
599 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
600 skb_trim(skb, nlh->nlmsg_len);
601 e = NLMSG_DATA(nlh);
602 e->error = -ETIMEDOUT;
603 memset(&e->msg, 0, sizeof(e->msg));
604
605 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
606 } else
607 kfree_skb(skb);
608 }
609
610 ipmr_cache_free(c);
611 }
612
613
614 /* Timer process for the unresolved queue. */
615
616 static void ipmr_expire_process(unsigned long arg)
617 {
618 struct mr_table *mrt = (struct mr_table *)arg;
619 unsigned long now;
620 unsigned long expires;
621 struct mfc_cache *c, *next;
622
623 if (!spin_trylock(&mfc_unres_lock)) {
624 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
625 return;
626 }
627
628 if (list_empty(&mrt->mfc_unres_queue))
629 goto out;
630
631 now = jiffies;
632 expires = 10*HZ;
633
634 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
635 if (time_after(c->mfc_un.unres.expires, now)) {
636 unsigned long interval = c->mfc_un.unres.expires - now;
637 if (interval < expires)
638 expires = interval;
639 continue;
640 }
641
642 list_del(&c->list);
643 ipmr_destroy_unres(mrt, c);
644 }
645
646 if (!list_empty(&mrt->mfc_unres_queue))
647 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
648
649 out:
650 spin_unlock(&mfc_unres_lock);
651 }
652
653 /* Fill oifs list. It is called under write locked mrt_lock. */
654
655 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
656 unsigned char *ttls)
657 {
658 int vifi;
659
660 cache->mfc_un.res.minvif = MAXVIFS;
661 cache->mfc_un.res.maxvif = 0;
662 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
663
664 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
665 if (VIF_EXISTS(mrt, vifi) &&
666 ttls[vifi] && ttls[vifi] < 255) {
667 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
668 if (cache->mfc_un.res.minvif > vifi)
669 cache->mfc_un.res.minvif = vifi;
670 if (cache->mfc_un.res.maxvif <= vifi)
671 cache->mfc_un.res.maxvif = vifi + 1;
672 }
673 }
674 }
675
676 static int vif_add(struct net *net, struct mr_table *mrt,
677 struct vifctl *vifc, int mrtsock)
678 {
679 int vifi = vifc->vifc_vifi;
680 struct vif_device *v = &mrt->vif_table[vifi];
681 struct net_device *dev;
682 struct in_device *in_dev;
683 int err;
684
685 /* Is vif busy ? */
686 if (VIF_EXISTS(mrt, vifi))
687 return -EADDRINUSE;
688
689 switch (vifc->vifc_flags) {
690 #ifdef CONFIG_IP_PIMSM
691 case VIFF_REGISTER:
692 /*
693 * Special Purpose VIF in PIM
694 * All the packets will be sent to the daemon
695 */
696 if (mrt->mroute_reg_vif_num >= 0)
697 return -EADDRINUSE;
698 dev = ipmr_reg_vif(net, mrt);
699 if (!dev)
700 return -ENOBUFS;
701 err = dev_set_allmulti(dev, 1);
702 if (err) {
703 unregister_netdevice(dev);
704 dev_put(dev);
705 return err;
706 }
707 break;
708 #endif
709 case VIFF_TUNNEL:
710 dev = ipmr_new_tunnel(net, vifc);
711 if (!dev)
712 return -ENOBUFS;
713 err = dev_set_allmulti(dev, 1);
714 if (err) {
715 ipmr_del_tunnel(dev, vifc);
716 dev_put(dev);
717 return err;
718 }
719 break;
720
721 case VIFF_USE_IFINDEX:
722 case 0:
723 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
724 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
725 if (dev && dev->ip_ptr == NULL) {
726 dev_put(dev);
727 return -EADDRNOTAVAIL;
728 }
729 } else
730 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
731
732 if (!dev)
733 return -EADDRNOTAVAIL;
734 err = dev_set_allmulti(dev, 1);
735 if (err) {
736 dev_put(dev);
737 return err;
738 }
739 break;
740 default:
741 return -EINVAL;
742 }
743
744 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
745 dev_put(dev);
746 return -EADDRNOTAVAIL;
747 }
748 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
749 ip_rt_multicast_event(in_dev);
750
751 /*
752 * Fill in the VIF structures
753 */
754 v->rate_limit = vifc->vifc_rate_limit;
755 v->local = vifc->vifc_lcl_addr.s_addr;
756 v->remote = vifc->vifc_rmt_addr.s_addr;
757 v->flags = vifc->vifc_flags;
758 if (!mrtsock)
759 v->flags |= VIFF_STATIC;
760 v->threshold = vifc->vifc_threshold;
761 v->bytes_in = 0;
762 v->bytes_out = 0;
763 v->pkt_in = 0;
764 v->pkt_out = 0;
765 v->link = dev->ifindex;
766 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
767 v->link = dev->iflink;
768
769 /* And finish update writing critical data */
770 write_lock_bh(&mrt_lock);
771 v->dev = dev;
772 #ifdef CONFIG_IP_PIMSM
773 if (v->flags&VIFF_REGISTER)
774 mrt->mroute_reg_vif_num = vifi;
775 #endif
776 if (vifi+1 > mrt->maxvif)
777 mrt->maxvif = vifi+1;
778 write_unlock_bh(&mrt_lock);
779 return 0;
780 }
781
782 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
783 __be32 origin,
784 __be32 mcastgrp)
785 {
786 int line = MFC_HASH(mcastgrp, origin);
787 struct mfc_cache *c;
788
789 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
790 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
791 return c;
792 }
793 return NULL;
794 }
795
796 /*
797 * Allocate a multicast cache entry
798 */
799 static struct mfc_cache *ipmr_cache_alloc(void)
800 {
801 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
802 if (c == NULL)
803 return NULL;
804 c->mfc_un.res.minvif = MAXVIFS;
805 return c;
806 }
807
808 static struct mfc_cache *ipmr_cache_alloc_unres(void)
809 {
810 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
811 if (c == NULL)
812 return NULL;
813 skb_queue_head_init(&c->mfc_un.unres.unresolved);
814 c->mfc_un.unres.expires = jiffies + 10*HZ;
815 return c;
816 }
817
818 /*
819 * A cache entry has gone into a resolved state from queued
820 */
821
822 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
823 struct mfc_cache *uc, struct mfc_cache *c)
824 {
825 struct sk_buff *skb;
826 struct nlmsgerr *e;
827
828 /*
829 * Play the pending entries through our router
830 */
831
832 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
833 if (ip_hdr(skb)->version == 0) {
834 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
835
836 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
837 nlh->nlmsg_len = (skb_tail_pointer(skb) -
838 (u8 *)nlh);
839 } else {
840 nlh->nlmsg_type = NLMSG_ERROR;
841 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
842 skb_trim(skb, nlh->nlmsg_len);
843 e = NLMSG_DATA(nlh);
844 e->error = -EMSGSIZE;
845 memset(&e->msg, 0, sizeof(e->msg));
846 }
847
848 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
849 } else
850 ip_mr_forward(net, mrt, skb, c, 0);
851 }
852 }
853
854 /*
855 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
856 * expects the following bizarre scheme.
857 *
858 * Called under mrt_lock.
859 */
860
861 static int ipmr_cache_report(struct mr_table *mrt,
862 struct sk_buff *pkt, vifi_t vifi, int assert)
863 {
864 struct sk_buff *skb;
865 const int ihl = ip_hdrlen(pkt);
866 struct igmphdr *igmp;
867 struct igmpmsg *msg;
868 int ret;
869
870 #ifdef CONFIG_IP_PIMSM
871 if (assert == IGMPMSG_WHOLEPKT)
872 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
873 else
874 #endif
875 skb = alloc_skb(128, GFP_ATOMIC);
876
877 if (!skb)
878 return -ENOBUFS;
879
880 #ifdef CONFIG_IP_PIMSM
881 if (assert == IGMPMSG_WHOLEPKT) {
882 /* Ugly, but we have no choice with this interface.
883 Duplicate old header, fix ihl, length etc.
884 And all this only to mangle msg->im_msgtype and
885 to set msg->im_mbz to "mbz" :-)
886 */
887 skb_push(skb, sizeof(struct iphdr));
888 skb_reset_network_header(skb);
889 skb_reset_transport_header(skb);
890 msg = (struct igmpmsg *)skb_network_header(skb);
891 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
892 msg->im_msgtype = IGMPMSG_WHOLEPKT;
893 msg->im_mbz = 0;
894 msg->im_vif = mrt->mroute_reg_vif_num;
895 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
896 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
897 sizeof(struct iphdr));
898 } else
899 #endif
900 {
901
902 /*
903 * Copy the IP header
904 */
905
906 skb->network_header = skb->tail;
907 skb_put(skb, ihl);
908 skb_copy_to_linear_data(skb, pkt->data, ihl);
909 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
910 msg = (struct igmpmsg *)skb_network_header(skb);
911 msg->im_vif = vifi;
912 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
913
914 /*
915 * Add our header
916 */
917
918 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
919 igmp->type =
920 msg->im_msgtype = assert;
921 igmp->code = 0;
922 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
923 skb->transport_header = skb->network_header;
924 }
925
926 if (mrt->mroute_sk == NULL) {
927 kfree_skb(skb);
928 return -EINVAL;
929 }
930
931 /*
932 * Deliver to mrouted
933 */
934 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
935 if (ret < 0) {
936 if (net_ratelimit())
937 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
938 kfree_skb(skb);
939 }
940
941 return ret;
942 }
943
944 /*
945 * Queue a packet for resolution. It gets locked cache entry!
946 */
947
948 static int
949 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
950 {
951 bool found = false;
952 int err;
953 struct mfc_cache *c;
954 const struct iphdr *iph = ip_hdr(skb);
955
956 spin_lock_bh(&mfc_unres_lock);
957 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
958 if (c->mfc_mcastgrp == iph->daddr &&
959 c->mfc_origin == iph->saddr) {
960 found = true;
961 break;
962 }
963 }
964
965 if (!found) {
966 /*
967 * Create a new entry if allowable
968 */
969
970 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
971 (c = ipmr_cache_alloc_unres()) == NULL) {
972 spin_unlock_bh(&mfc_unres_lock);
973
974 kfree_skb(skb);
975 return -ENOBUFS;
976 }
977
978 /*
979 * Fill in the new cache entry
980 */
981 c->mfc_parent = -1;
982 c->mfc_origin = iph->saddr;
983 c->mfc_mcastgrp = iph->daddr;
984
985 /*
986 * Reflect first query at mrouted.
987 */
988 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
989 if (err < 0) {
990 /* If the report failed throw the cache entry
991 out - Brad Parker
992 */
993 spin_unlock_bh(&mfc_unres_lock);
994
995 ipmr_cache_free(c);
996 kfree_skb(skb);
997 return err;
998 }
999
1000 atomic_inc(&mrt->cache_resolve_queue_len);
1001 list_add(&c->list, &mrt->mfc_unres_queue);
1002
1003 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1004 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1005 }
1006
1007 /*
1008 * See if we can append the packet
1009 */
1010 if (c->mfc_un.unres.unresolved.qlen>3) {
1011 kfree_skb(skb);
1012 err = -ENOBUFS;
1013 } else {
1014 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1015 err = 0;
1016 }
1017
1018 spin_unlock_bh(&mfc_unres_lock);
1019 return err;
1020 }
1021
1022 /*
1023 * MFC cache manipulation by user space mroute daemon
1024 */
1025
1026 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1027 {
1028 int line;
1029 struct mfc_cache *c, *next;
1030
1031 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1032
1033 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1034 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1035 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1036 write_lock_bh(&mrt_lock);
1037 list_del(&c->list);
1038 write_unlock_bh(&mrt_lock);
1039
1040 ipmr_cache_free(c);
1041 return 0;
1042 }
1043 }
1044 return -ENOENT;
1045 }
1046
1047 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1048 struct mfcctl *mfc, int mrtsock)
1049 {
1050 bool found = false;
1051 int line;
1052 struct mfc_cache *uc, *c;
1053
1054 if (mfc->mfcc_parent >= MAXVIFS)
1055 return -ENFILE;
1056
1057 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1058
1059 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1060 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1061 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1062 found = true;
1063 break;
1064 }
1065 }
1066
1067 if (found) {
1068 write_lock_bh(&mrt_lock);
1069 c->mfc_parent = mfc->mfcc_parent;
1070 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1071 if (!mrtsock)
1072 c->mfc_flags |= MFC_STATIC;
1073 write_unlock_bh(&mrt_lock);
1074 return 0;
1075 }
1076
1077 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1078 return -EINVAL;
1079
1080 c = ipmr_cache_alloc();
1081 if (c == NULL)
1082 return -ENOMEM;
1083
1084 c->mfc_origin = mfc->mfcc_origin.s_addr;
1085 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1086 c->mfc_parent = mfc->mfcc_parent;
1087 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1088 if (!mrtsock)
1089 c->mfc_flags |= MFC_STATIC;
1090
1091 write_lock_bh(&mrt_lock);
1092 list_add(&c->list, &mrt->mfc_cache_array[line]);
1093 write_unlock_bh(&mrt_lock);
1094
1095 /*
1096 * Check to see if we resolved a queued list. If so we
1097 * need to send on the frames and tidy up.
1098 */
1099 found = false;
1100 spin_lock_bh(&mfc_unres_lock);
1101 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1102 if (uc->mfc_origin == c->mfc_origin &&
1103 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1104 list_del(&uc->list);
1105 atomic_dec(&mrt->cache_resolve_queue_len);
1106 found = true;
1107 break;
1108 }
1109 }
1110 if (list_empty(&mrt->mfc_unres_queue))
1111 del_timer(&mrt->ipmr_expire_timer);
1112 spin_unlock_bh(&mfc_unres_lock);
1113
1114 if (found) {
1115 ipmr_cache_resolve(net, mrt, uc, c);
1116 ipmr_cache_free(uc);
1117 }
1118 return 0;
1119 }
1120
1121 /*
1122 * Close the multicast socket, and clear the vif tables etc
1123 */
1124
1125 static void mroute_clean_tables(struct mr_table *mrt)
1126 {
1127 int i;
1128 LIST_HEAD(list);
1129 struct mfc_cache *c, *next;
1130
1131 /*
1132 * Shut down all active vif entries
1133 */
1134 for (i = 0; i < mrt->maxvif; i++) {
1135 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1136 vif_delete(mrt, i, 0, &list);
1137 }
1138 unregister_netdevice_many(&list);
1139
1140 /*
1141 * Wipe the cache
1142 */
1143 for (i = 0; i < MFC_LINES; i++) {
1144 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1145 if (c->mfc_flags&MFC_STATIC)
1146 continue;
1147 write_lock_bh(&mrt_lock);
1148 list_del(&c->list);
1149 write_unlock_bh(&mrt_lock);
1150
1151 ipmr_cache_free(c);
1152 }
1153 }
1154
1155 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1156 spin_lock_bh(&mfc_unres_lock);
1157 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1158 list_del(&c->list);
1159 ipmr_destroy_unres(mrt, c);
1160 }
1161 spin_unlock_bh(&mfc_unres_lock);
1162 }
1163 }
1164
1165 static void mrtsock_destruct(struct sock *sk)
1166 {
1167 struct net *net = sock_net(sk);
1168 struct mr_table *mrt;
1169
1170 rtnl_lock();
1171 ipmr_for_each_table(mrt, net) {
1172 if (sk == mrt->mroute_sk) {
1173 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1174
1175 write_lock_bh(&mrt_lock);
1176 mrt->mroute_sk = NULL;
1177 write_unlock_bh(&mrt_lock);
1178
1179 mroute_clean_tables(mrt);
1180 }
1181 }
1182 rtnl_unlock();
1183 }
1184
1185 /*
1186 * Socket options and virtual interface manipulation. The whole
1187 * virtual interface system is a complete heap, but unfortunately
1188 * that's how BSD mrouted happens to think. Maybe one day with a proper
1189 * MOSPF/PIM router set up we can clean this up.
1190 */
1191
1192 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1193 {
1194 int ret;
1195 struct vifctl vif;
1196 struct mfcctl mfc;
1197 struct net *net = sock_net(sk);
1198 struct mr_table *mrt;
1199
1200 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1201 if (mrt == NULL)
1202 return -ENOENT;
1203
1204 if (optname != MRT_INIT) {
1205 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1206 return -EACCES;
1207 }
1208
1209 switch (optname) {
1210 case MRT_INIT:
1211 if (sk->sk_type != SOCK_RAW ||
1212 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1213 return -EOPNOTSUPP;
1214 if (optlen != sizeof(int))
1215 return -ENOPROTOOPT;
1216
1217 rtnl_lock();
1218 if (mrt->mroute_sk) {
1219 rtnl_unlock();
1220 return -EADDRINUSE;
1221 }
1222
1223 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1224 if (ret == 0) {
1225 write_lock_bh(&mrt_lock);
1226 mrt->mroute_sk = sk;
1227 write_unlock_bh(&mrt_lock);
1228
1229 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1230 }
1231 rtnl_unlock();
1232 return ret;
1233 case MRT_DONE:
1234 if (sk != mrt->mroute_sk)
1235 return -EACCES;
1236 return ip_ra_control(sk, 0, NULL);
1237 case MRT_ADD_VIF:
1238 case MRT_DEL_VIF:
1239 if (optlen != sizeof(vif))
1240 return -EINVAL;
1241 if (copy_from_user(&vif, optval, sizeof(vif)))
1242 return -EFAULT;
1243 if (vif.vifc_vifi >= MAXVIFS)
1244 return -ENFILE;
1245 rtnl_lock();
1246 if (optname == MRT_ADD_VIF) {
1247 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1248 } else {
1249 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1250 }
1251 rtnl_unlock();
1252 return ret;
1253
1254 /*
1255 * Manipulate the forwarding caches. These live
1256 * in a sort of kernel/user symbiosis.
1257 */
1258 case MRT_ADD_MFC:
1259 case MRT_DEL_MFC:
1260 if (optlen != sizeof(mfc))
1261 return -EINVAL;
1262 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1263 return -EFAULT;
1264 rtnl_lock();
1265 if (optname == MRT_DEL_MFC)
1266 ret = ipmr_mfc_delete(mrt, &mfc);
1267 else
1268 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1269 rtnl_unlock();
1270 return ret;
1271 /*
1272 * Control PIM assert.
1273 */
1274 case MRT_ASSERT:
1275 {
1276 int v;
1277 if (get_user(v,(int __user *)optval))
1278 return -EFAULT;
1279 mrt->mroute_do_assert = (v) ? 1 : 0;
1280 return 0;
1281 }
1282 #ifdef CONFIG_IP_PIMSM
1283 case MRT_PIM:
1284 {
1285 int v;
1286
1287 if (get_user(v,(int __user *)optval))
1288 return -EFAULT;
1289 v = (v) ? 1 : 0;
1290
1291 rtnl_lock();
1292 ret = 0;
1293 if (v != mrt->mroute_do_pim) {
1294 mrt->mroute_do_pim = v;
1295 mrt->mroute_do_assert = v;
1296 }
1297 rtnl_unlock();
1298 return ret;
1299 }
1300 #endif
1301 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1302 case MRT_TABLE:
1303 {
1304 u32 v;
1305
1306 if (optlen != sizeof(u32))
1307 return -EINVAL;
1308 if (get_user(v, (u32 __user *)optval))
1309 return -EFAULT;
1310 if (sk == mrt->mroute_sk)
1311 return -EBUSY;
1312
1313 rtnl_lock();
1314 ret = 0;
1315 if (!ipmr_new_table(net, v))
1316 ret = -ENOMEM;
1317 raw_sk(sk)->ipmr_table = v;
1318 rtnl_unlock();
1319 return ret;
1320 }
1321 #endif
1322 /*
1323 * Spurious command, or MRT_VERSION which you cannot
1324 * set.
1325 */
1326 default:
1327 return -ENOPROTOOPT;
1328 }
1329 }
1330
1331 /*
1332 * Getsock opt support for the multicast routing system.
1333 */
1334
1335 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1336 {
1337 int olr;
1338 int val;
1339 struct net *net = sock_net(sk);
1340 struct mr_table *mrt;
1341
1342 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1343 if (mrt == NULL)
1344 return -ENOENT;
1345
1346 if (optname != MRT_VERSION &&
1347 #ifdef CONFIG_IP_PIMSM
1348 optname!=MRT_PIM &&
1349 #endif
1350 optname!=MRT_ASSERT)
1351 return -ENOPROTOOPT;
1352
1353 if (get_user(olr, optlen))
1354 return -EFAULT;
1355
1356 olr = min_t(unsigned int, olr, sizeof(int));
1357 if (olr < 0)
1358 return -EINVAL;
1359
1360 if (put_user(olr, optlen))
1361 return -EFAULT;
1362 if (optname == MRT_VERSION)
1363 val = 0x0305;
1364 #ifdef CONFIG_IP_PIMSM
1365 else if (optname == MRT_PIM)
1366 val = mrt->mroute_do_pim;
1367 #endif
1368 else
1369 val = mrt->mroute_do_assert;
1370 if (copy_to_user(optval, &val, olr))
1371 return -EFAULT;
1372 return 0;
1373 }
1374
1375 /*
1376 * The IP multicast ioctl support routines.
1377 */
1378
1379 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1380 {
1381 struct sioc_sg_req sr;
1382 struct sioc_vif_req vr;
1383 struct vif_device *vif;
1384 struct mfc_cache *c;
1385 struct net *net = sock_net(sk);
1386 struct mr_table *mrt;
1387
1388 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1389 if (mrt == NULL)
1390 return -ENOENT;
1391
1392 switch (cmd) {
1393 case SIOCGETVIFCNT:
1394 if (copy_from_user(&vr, arg, sizeof(vr)))
1395 return -EFAULT;
1396 if (vr.vifi >= mrt->maxvif)
1397 return -EINVAL;
1398 read_lock(&mrt_lock);
1399 vif = &mrt->vif_table[vr.vifi];
1400 if (VIF_EXISTS(mrt, vr.vifi)) {
1401 vr.icount = vif->pkt_in;
1402 vr.ocount = vif->pkt_out;
1403 vr.ibytes = vif->bytes_in;
1404 vr.obytes = vif->bytes_out;
1405 read_unlock(&mrt_lock);
1406
1407 if (copy_to_user(arg, &vr, sizeof(vr)))
1408 return -EFAULT;
1409 return 0;
1410 }
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1413 case SIOCGETSGCNT:
1414 if (copy_from_user(&sr, arg, sizeof(sr)))
1415 return -EFAULT;
1416
1417 read_lock(&mrt_lock);
1418 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1419 if (c) {
1420 sr.pktcnt = c->mfc_un.res.pkt;
1421 sr.bytecnt = c->mfc_un.res.bytes;
1422 sr.wrong_if = c->mfc_un.res.wrong_if;
1423 read_unlock(&mrt_lock);
1424
1425 if (copy_to_user(arg, &sr, sizeof(sr)))
1426 return -EFAULT;
1427 return 0;
1428 }
1429 read_unlock(&mrt_lock);
1430 return -EADDRNOTAVAIL;
1431 default:
1432 return -ENOIOCTLCMD;
1433 }
1434 }
1435
1436
1437 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1438 {
1439 struct net_device *dev = ptr;
1440 struct net *net = dev_net(dev);
1441 struct mr_table *mrt;
1442 struct vif_device *v;
1443 int ct;
1444 LIST_HEAD(list);
1445
1446 if (event != NETDEV_UNREGISTER)
1447 return NOTIFY_DONE;
1448
1449 ipmr_for_each_table(mrt, net) {
1450 v = &mrt->vif_table[0];
1451 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1452 if (v->dev == dev)
1453 vif_delete(mrt, ct, 1, &list);
1454 }
1455 }
1456 unregister_netdevice_many(&list);
1457 return NOTIFY_DONE;
1458 }
1459
1460
1461 static struct notifier_block ip_mr_notifier = {
1462 .notifier_call = ipmr_device_event,
1463 };
1464
1465 /*
1466 * Encapsulate a packet by attaching a valid IPIP header to it.
1467 * This avoids tunnel drivers and other mess and gives us the speed so
1468 * important for multicast video.
1469 */
1470
1471 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1472 {
1473 struct iphdr *iph;
1474 struct iphdr *old_iph = ip_hdr(skb);
1475
1476 skb_push(skb, sizeof(struct iphdr));
1477 skb->transport_header = skb->network_header;
1478 skb_reset_network_header(skb);
1479 iph = ip_hdr(skb);
1480
1481 iph->version = 4;
1482 iph->tos = old_iph->tos;
1483 iph->ttl = old_iph->ttl;
1484 iph->frag_off = 0;
1485 iph->daddr = daddr;
1486 iph->saddr = saddr;
1487 iph->protocol = IPPROTO_IPIP;
1488 iph->ihl = 5;
1489 iph->tot_len = htons(skb->len);
1490 ip_select_ident(iph, skb_dst(skb), NULL);
1491 ip_send_check(iph);
1492
1493 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1494 nf_reset(skb);
1495 }
1496
1497 static inline int ipmr_forward_finish(struct sk_buff *skb)
1498 {
1499 struct ip_options * opt = &(IPCB(skb)->opt);
1500
1501 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1502
1503 if (unlikely(opt->optlen))
1504 ip_forward_options(skb);
1505
1506 return dst_output(skb);
1507 }
1508
1509 /*
1510 * Processing handlers for ipmr_forward
1511 */
1512
1513 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1514 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1515 {
1516 const struct iphdr *iph = ip_hdr(skb);
1517 struct vif_device *vif = &mrt->vif_table[vifi];
1518 struct net_device *dev;
1519 struct rtable *rt;
1520 int encap = 0;
1521
1522 if (vif->dev == NULL)
1523 goto out_free;
1524
1525 #ifdef CONFIG_IP_PIMSM
1526 if (vif->flags & VIFF_REGISTER) {
1527 vif->pkt_out++;
1528 vif->bytes_out += skb->len;
1529 vif->dev->stats.tx_bytes += skb->len;
1530 vif->dev->stats.tx_packets++;
1531 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1532 goto out_free;
1533 }
1534 #endif
1535
1536 if (vif->flags&VIFF_TUNNEL) {
1537 struct flowi fl = { .oif = vif->link,
1538 .nl_u = { .ip4_u =
1539 { .daddr = vif->remote,
1540 .saddr = vif->local,
1541 .tos = RT_TOS(iph->tos) } },
1542 .proto = IPPROTO_IPIP };
1543 if (ip_route_output_key(net, &rt, &fl))
1544 goto out_free;
1545 encap = sizeof(struct iphdr);
1546 } else {
1547 struct flowi fl = { .oif = vif->link,
1548 .nl_u = { .ip4_u =
1549 { .daddr = iph->daddr,
1550 .tos = RT_TOS(iph->tos) } },
1551 .proto = IPPROTO_IPIP };
1552 if (ip_route_output_key(net, &rt, &fl))
1553 goto out_free;
1554 }
1555
1556 dev = rt->u.dst.dev;
1557
1558 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1559 /* Do not fragment multicasts. Alas, IPv4 does not
1560 allow to send ICMP, so that packets will disappear
1561 to blackhole.
1562 */
1563
1564 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1565 ip_rt_put(rt);
1566 goto out_free;
1567 }
1568
1569 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1570
1571 if (skb_cow(skb, encap)) {
1572 ip_rt_put(rt);
1573 goto out_free;
1574 }
1575
1576 vif->pkt_out++;
1577 vif->bytes_out += skb->len;
1578
1579 skb_dst_drop(skb);
1580 skb_dst_set(skb, &rt->u.dst);
1581 ip_decrease_ttl(ip_hdr(skb));
1582
1583 /* FIXME: forward and output firewalls used to be called here.
1584 * What do we do with netfilter? -- RR */
1585 if (vif->flags & VIFF_TUNNEL) {
1586 ip_encap(skb, vif->local, vif->remote);
1587 /* FIXME: extra output firewall step used to be here. --RR */
1588 vif->dev->stats.tx_packets++;
1589 vif->dev->stats.tx_bytes += skb->len;
1590 }
1591
1592 IPCB(skb)->flags |= IPSKB_FORWARDED;
1593
1594 /*
1595 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1596 * not only before forwarding, but after forwarding on all output
1597 * interfaces. It is clear, if mrouter runs a multicasting
1598 * program, it should receive packets not depending to what interface
1599 * program is joined.
1600 * If we will not make it, the program will have to join on all
1601 * interfaces. On the other hand, multihoming host (or router, but
1602 * not mrouter) cannot join to more than one interface - it will
1603 * result in receiving multiple packets.
1604 */
1605 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1606 ipmr_forward_finish);
1607 return;
1608
1609 out_free:
1610 kfree_skb(skb);
1611 }
1612
1613 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1614 {
1615 int ct;
1616
1617 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1618 if (mrt->vif_table[ct].dev == dev)
1619 break;
1620 }
1621 return ct;
1622 }
1623
1624 /* "local" means that we should preserve one skb (for local delivery) */
1625
1626 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1627 struct sk_buff *skb, struct mfc_cache *cache,
1628 int local)
1629 {
1630 int psend = -1;
1631 int vif, ct;
1632
1633 vif = cache->mfc_parent;
1634 cache->mfc_un.res.pkt++;
1635 cache->mfc_un.res.bytes += skb->len;
1636
1637 /*
1638 * Wrong interface: drop packet and (maybe) send PIM assert.
1639 */
1640 if (mrt->vif_table[vif].dev != skb->dev) {
1641 int true_vifi;
1642
1643 if (skb_rtable(skb)->fl.iif == 0) {
1644 /* It is our own packet, looped back.
1645 Very complicated situation...
1646
1647 The best workaround until routing daemons will be
1648 fixed is not to redistribute packet, if it was
1649 send through wrong interface. It means, that
1650 multicast applications WILL NOT work for
1651 (S,G), which have default multicast route pointing
1652 to wrong oif. In any case, it is not a good
1653 idea to use multicasting applications on router.
1654 */
1655 goto dont_forward;
1656 }
1657
1658 cache->mfc_un.res.wrong_if++;
1659 true_vifi = ipmr_find_vif(mrt, skb->dev);
1660
1661 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1662 /* pimsm uses asserts, when switching from RPT to SPT,
1663 so that we cannot check that packet arrived on an oif.
1664 It is bad, but otherwise we would need to move pretty
1665 large chunk of pimd to kernel. Ough... --ANK
1666 */
1667 (mrt->mroute_do_pim ||
1668 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1669 time_after(jiffies,
1670 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1671 cache->mfc_un.res.last_assert = jiffies;
1672 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1673 }
1674 goto dont_forward;
1675 }
1676
1677 mrt->vif_table[vif].pkt_in++;
1678 mrt->vif_table[vif].bytes_in += skb->len;
1679
1680 /*
1681 * Forward the frame
1682 */
1683 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1684 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1685 if (psend != -1) {
1686 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1687 if (skb2)
1688 ipmr_queue_xmit(net, mrt, skb2, cache,
1689 psend);
1690 }
1691 psend = ct;
1692 }
1693 }
1694 if (psend != -1) {
1695 if (local) {
1696 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1697 if (skb2)
1698 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1699 } else {
1700 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1701 return 0;
1702 }
1703 }
1704
1705 dont_forward:
1706 if (!local)
1707 kfree_skb(skb);
1708 return 0;
1709 }
1710
1711
1712 /*
1713 * Multicast packets for forwarding arrive here
1714 */
1715
1716 int ip_mr_input(struct sk_buff *skb)
1717 {
1718 struct mfc_cache *cache;
1719 struct net *net = dev_net(skb->dev);
1720 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1721 struct mr_table *mrt;
1722 int err;
1723
1724 /* Packet is looped back after forward, it should not be
1725 forwarded second time, but still can be delivered locally.
1726 */
1727 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1728 goto dont_forward;
1729
1730 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1731 if (err < 0)
1732 return err;
1733
1734 if (!local) {
1735 if (IPCB(skb)->opt.router_alert) {
1736 if (ip_call_ra_chain(skb))
1737 return 0;
1738 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1739 /* IGMPv1 (and broken IGMPv2 implementations sort of
1740 Cisco IOS <= 11.2(8)) do not put router alert
1741 option to IGMP packets destined to routable
1742 groups. It is very bad, because it means
1743 that we can forward NO IGMP messages.
1744 */
1745 read_lock(&mrt_lock);
1746 if (mrt->mroute_sk) {
1747 nf_reset(skb);
1748 raw_rcv(mrt->mroute_sk, skb);
1749 read_unlock(&mrt_lock);
1750 return 0;
1751 }
1752 read_unlock(&mrt_lock);
1753 }
1754 }
1755
1756 read_lock(&mrt_lock);
1757 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1758
1759 /*
1760 * No usable cache entry
1761 */
1762 if (cache == NULL) {
1763 int vif;
1764
1765 if (local) {
1766 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1767 ip_local_deliver(skb);
1768 if (skb2 == NULL) {
1769 read_unlock(&mrt_lock);
1770 return -ENOBUFS;
1771 }
1772 skb = skb2;
1773 }
1774
1775 vif = ipmr_find_vif(mrt, skb->dev);
1776 if (vif >= 0) {
1777 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1778 read_unlock(&mrt_lock);
1779
1780 return err2;
1781 }
1782 read_unlock(&mrt_lock);
1783 kfree_skb(skb);
1784 return -ENODEV;
1785 }
1786
1787 ip_mr_forward(net, mrt, skb, cache, local);
1788
1789 read_unlock(&mrt_lock);
1790
1791 if (local)
1792 return ip_local_deliver(skb);
1793
1794 return 0;
1795
1796 dont_forward:
1797 if (local)
1798 return ip_local_deliver(skb);
1799 kfree_skb(skb);
1800 return 0;
1801 }
1802
1803 #ifdef CONFIG_IP_PIMSM
1804 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1805 unsigned int pimlen)
1806 {
1807 struct net_device *reg_dev = NULL;
1808 struct iphdr *encap;
1809
1810 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1811 /*
1812 Check that:
1813 a. packet is really destinted to a multicast group
1814 b. packet is not a NULL-REGISTER
1815 c. packet is not truncated
1816 */
1817 if (!ipv4_is_multicast(encap->daddr) ||
1818 encap->tot_len == 0 ||
1819 ntohs(encap->tot_len) + pimlen > skb->len)
1820 return 1;
1821
1822 read_lock(&mrt_lock);
1823 if (mrt->mroute_reg_vif_num >= 0)
1824 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1825 if (reg_dev)
1826 dev_hold(reg_dev);
1827 read_unlock(&mrt_lock);
1828
1829 if (reg_dev == NULL)
1830 return 1;
1831
1832 skb->mac_header = skb->network_header;
1833 skb_pull(skb, (u8*)encap - skb->data);
1834 skb_reset_network_header(skb);
1835 skb->protocol = htons(ETH_P_IP);
1836 skb->ip_summed = 0;
1837 skb->pkt_type = PACKET_HOST;
1838
1839 skb_tunnel_rx(skb, reg_dev);
1840
1841 netif_rx(skb);
1842 dev_put(reg_dev);
1843
1844 return 0;
1845 }
1846 #endif
1847
1848 #ifdef CONFIG_IP_PIMSM_V1
1849 /*
1850 * Handle IGMP messages of PIMv1
1851 */
1852
1853 int pim_rcv_v1(struct sk_buff * skb)
1854 {
1855 struct igmphdr *pim;
1856 struct net *net = dev_net(skb->dev);
1857 struct mr_table *mrt;
1858
1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1860 goto drop;
1861
1862 pim = igmp_hdr(skb);
1863
1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
1867 if (!mrt->mroute_do_pim ||
1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1869 goto drop;
1870
1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1872 drop:
1873 kfree_skb(skb);
1874 }
1875 return 0;
1876 }
1877 #endif
1878
1879 #ifdef CONFIG_IP_PIMSM_V2
1880 static int pim_rcv(struct sk_buff * skb)
1881 {
1882 struct pimreghdr *pim;
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1885
1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1887 goto drop;
1888
1889 pim = (struct pimreghdr *)skb_transport_header(skb);
1890 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1891 (pim->flags&PIM_NULL_REGISTER) ||
1892 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1894 goto drop;
1895
1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1900 drop:
1901 kfree_skb(skb);
1902 }
1903 return 0;
1904 }
1905 #endif
1906
1907 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1908 struct mfc_cache *c, struct rtmsg *rtm)
1909 {
1910 int ct;
1911 struct rtnexthop *nhp;
1912 u8 *b = skb_tail_pointer(skb);
1913 struct rtattr *mp_head;
1914
1915 /* If cache is unresolved, don't try to parse IIF and OIF */
1916 if (c->mfc_parent >= MAXVIFS)
1917 return -ENOENT;
1918
1919 if (VIF_EXISTS(mrt, c->mfc_parent))
1920 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1921
1922 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1923
1924 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1925 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1926 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1927 goto rtattr_failure;
1928 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1929 nhp->rtnh_flags = 0;
1930 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1931 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1932 nhp->rtnh_len = sizeof(*nhp);
1933 }
1934 }
1935 mp_head->rta_type = RTA_MULTIPATH;
1936 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1937 rtm->rtm_type = RTN_MULTICAST;
1938 return 1;
1939
1940 rtattr_failure:
1941 nlmsg_trim(skb, b);
1942 return -EMSGSIZE;
1943 }
1944
1945 int ipmr_get_route(struct net *net,
1946 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1947 {
1948 int err;
1949 struct mr_table *mrt;
1950 struct mfc_cache *cache;
1951 struct rtable *rt = skb_rtable(skb);
1952
1953 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1954 if (mrt == NULL)
1955 return -ENOENT;
1956
1957 read_lock(&mrt_lock);
1958 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1959
1960 if (cache == NULL) {
1961 struct sk_buff *skb2;
1962 struct iphdr *iph;
1963 struct net_device *dev;
1964 int vif;
1965
1966 if (nowait) {
1967 read_unlock(&mrt_lock);
1968 return -EAGAIN;
1969 }
1970
1971 dev = skb->dev;
1972 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1973 read_unlock(&mrt_lock);
1974 return -ENODEV;
1975 }
1976 skb2 = skb_clone(skb, GFP_ATOMIC);
1977 if (!skb2) {
1978 read_unlock(&mrt_lock);
1979 return -ENOMEM;
1980 }
1981
1982 skb_push(skb2, sizeof(struct iphdr));
1983 skb_reset_network_header(skb2);
1984 iph = ip_hdr(skb2);
1985 iph->ihl = sizeof(struct iphdr) >> 2;
1986 iph->saddr = rt->rt_src;
1987 iph->daddr = rt->rt_dst;
1988 iph->version = 0;
1989 err = ipmr_cache_unresolved(mrt, vif, skb2);
1990 read_unlock(&mrt_lock);
1991 return err;
1992 }
1993
1994 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1995 cache->mfc_flags |= MFC_NOTIFY;
1996 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1997 read_unlock(&mrt_lock);
1998 return err;
1999 }
2000
2001 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2002 u32 pid, u32 seq, struct mfc_cache *c)
2003 {
2004 struct nlmsghdr *nlh;
2005 struct rtmsg *rtm;
2006
2007 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2008 if (nlh == NULL)
2009 return -EMSGSIZE;
2010
2011 rtm = nlmsg_data(nlh);
2012 rtm->rtm_family = RTNL_FAMILY_IPMR;
2013 rtm->rtm_dst_len = 32;
2014 rtm->rtm_src_len = 32;
2015 rtm->rtm_tos = 0;
2016 rtm->rtm_table = mrt->id;
2017 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2018 rtm->rtm_type = RTN_MULTICAST;
2019 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2020 rtm->rtm_protocol = RTPROT_UNSPEC;
2021 rtm->rtm_flags = 0;
2022
2023 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2024 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2025
2026 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2027 goto nla_put_failure;
2028
2029 return nlmsg_end(skb, nlh);
2030
2031 nla_put_failure:
2032 nlmsg_cancel(skb, nlh);
2033 return -EMSGSIZE;
2034 }
2035
2036 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2037 {
2038 struct net *net = sock_net(skb->sk);
2039 struct mr_table *mrt;
2040 struct mfc_cache *mfc;
2041 unsigned int t = 0, s_t;
2042 unsigned int h = 0, s_h;
2043 unsigned int e = 0, s_e;
2044
2045 s_t = cb->args[0];
2046 s_h = cb->args[1];
2047 s_e = cb->args[2];
2048
2049 read_lock(&mrt_lock);
2050 ipmr_for_each_table(mrt, net) {
2051 if (t < s_t)
2052 goto next_table;
2053 if (t > s_t)
2054 s_h = 0;
2055 for (h = s_h; h < MFC_LINES; h++) {
2056 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2057 if (e < s_e)
2058 goto next_entry;
2059 if (ipmr_fill_mroute(mrt, skb,
2060 NETLINK_CB(cb->skb).pid,
2061 cb->nlh->nlmsg_seq,
2062 mfc) < 0)
2063 goto done;
2064 next_entry:
2065 e++;
2066 }
2067 e = s_e = 0;
2068 }
2069 s_h = 0;
2070 next_table:
2071 t++;
2072 }
2073 done:
2074 read_unlock(&mrt_lock);
2075
2076 cb->args[2] = e;
2077 cb->args[1] = h;
2078 cb->args[0] = t;
2079
2080 return skb->len;
2081 }
2082
2083 #ifdef CONFIG_PROC_FS
2084 /*
2085 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2086 */
2087 struct ipmr_vif_iter {
2088 struct seq_net_private p;
2089 struct mr_table *mrt;
2090 int ct;
2091 };
2092
2093 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2094 struct ipmr_vif_iter *iter,
2095 loff_t pos)
2096 {
2097 struct mr_table *mrt = iter->mrt;
2098
2099 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2100 if (!VIF_EXISTS(mrt, iter->ct))
2101 continue;
2102 if (pos-- == 0)
2103 return &mrt->vif_table[iter->ct];
2104 }
2105 return NULL;
2106 }
2107
2108 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2109 __acquires(mrt_lock)
2110 {
2111 struct ipmr_vif_iter *iter = seq->private;
2112 struct net *net = seq_file_net(seq);
2113 struct mr_table *mrt;
2114
2115 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2116 if (mrt == NULL)
2117 return ERR_PTR(-ENOENT);
2118
2119 iter->mrt = mrt;
2120
2121 read_lock(&mrt_lock);
2122 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2123 : SEQ_START_TOKEN;
2124 }
2125
2126 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2127 {
2128 struct ipmr_vif_iter *iter = seq->private;
2129 struct net *net = seq_file_net(seq);
2130 struct mr_table *mrt = iter->mrt;
2131
2132 ++*pos;
2133 if (v == SEQ_START_TOKEN)
2134 return ipmr_vif_seq_idx(net, iter, 0);
2135
2136 while (++iter->ct < mrt->maxvif) {
2137 if (!VIF_EXISTS(mrt, iter->ct))
2138 continue;
2139 return &mrt->vif_table[iter->ct];
2140 }
2141 return NULL;
2142 }
2143
2144 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2145 __releases(mrt_lock)
2146 {
2147 read_unlock(&mrt_lock);
2148 }
2149
2150 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2151 {
2152 struct ipmr_vif_iter *iter = seq->private;
2153 struct mr_table *mrt = iter->mrt;
2154
2155 if (v == SEQ_START_TOKEN) {
2156 seq_puts(seq,
2157 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2158 } else {
2159 const struct vif_device *vif = v;
2160 const char *name = vif->dev ? vif->dev->name : "none";
2161
2162 seq_printf(seq,
2163 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2164 vif - mrt->vif_table,
2165 name, vif->bytes_in, vif->pkt_in,
2166 vif->bytes_out, vif->pkt_out,
2167 vif->flags, vif->local, vif->remote);
2168 }
2169 return 0;
2170 }
2171
2172 static const struct seq_operations ipmr_vif_seq_ops = {
2173 .start = ipmr_vif_seq_start,
2174 .next = ipmr_vif_seq_next,
2175 .stop = ipmr_vif_seq_stop,
2176 .show = ipmr_vif_seq_show,
2177 };
2178
2179 static int ipmr_vif_open(struct inode *inode, struct file *file)
2180 {
2181 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2182 sizeof(struct ipmr_vif_iter));
2183 }
2184
2185 static const struct file_operations ipmr_vif_fops = {
2186 .owner = THIS_MODULE,
2187 .open = ipmr_vif_open,
2188 .read = seq_read,
2189 .llseek = seq_lseek,
2190 .release = seq_release_net,
2191 };
2192
2193 struct ipmr_mfc_iter {
2194 struct seq_net_private p;
2195 struct mr_table *mrt;
2196 struct list_head *cache;
2197 int ct;
2198 };
2199
2200
2201 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2202 struct ipmr_mfc_iter *it, loff_t pos)
2203 {
2204 struct mr_table *mrt = it->mrt;
2205 struct mfc_cache *mfc;
2206
2207 read_lock(&mrt_lock);
2208 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2209 it->cache = &mrt->mfc_cache_array[it->ct];
2210 list_for_each_entry(mfc, it->cache, list)
2211 if (pos-- == 0)
2212 return mfc;
2213 }
2214 read_unlock(&mrt_lock);
2215
2216 spin_lock_bh(&mfc_unres_lock);
2217 it->cache = &mrt->mfc_unres_queue;
2218 list_for_each_entry(mfc, it->cache, list)
2219 if (pos-- == 0)
2220 return mfc;
2221 spin_unlock_bh(&mfc_unres_lock);
2222
2223 it->cache = NULL;
2224 return NULL;
2225 }
2226
2227
2228 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2229 {
2230 struct ipmr_mfc_iter *it = seq->private;
2231 struct net *net = seq_file_net(seq);
2232 struct mr_table *mrt;
2233
2234 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2235 if (mrt == NULL)
2236 return ERR_PTR(-ENOENT);
2237
2238 it->mrt = mrt;
2239 it->cache = NULL;
2240 it->ct = 0;
2241 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2242 : SEQ_START_TOKEN;
2243 }
2244
2245 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2246 {
2247 struct mfc_cache *mfc = v;
2248 struct ipmr_mfc_iter *it = seq->private;
2249 struct net *net = seq_file_net(seq);
2250 struct mr_table *mrt = it->mrt;
2251
2252 ++*pos;
2253
2254 if (v == SEQ_START_TOKEN)
2255 return ipmr_mfc_seq_idx(net, seq->private, 0);
2256
2257 if (mfc->list.next != it->cache)
2258 return list_entry(mfc->list.next, struct mfc_cache, list);
2259
2260 if (it->cache == &mrt->mfc_unres_queue)
2261 goto end_of_list;
2262
2263 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2264
2265 while (++it->ct < MFC_LINES) {
2266 it->cache = &mrt->mfc_cache_array[it->ct];
2267 if (list_empty(it->cache))
2268 continue;
2269 return list_first_entry(it->cache, struct mfc_cache, list);
2270 }
2271
2272 /* exhausted cache_array, show unresolved */
2273 read_unlock(&mrt_lock);
2274 it->cache = &mrt->mfc_unres_queue;
2275 it->ct = 0;
2276
2277 spin_lock_bh(&mfc_unres_lock);
2278 if (!list_empty(it->cache))
2279 return list_first_entry(it->cache, struct mfc_cache, list);
2280
2281 end_of_list:
2282 spin_unlock_bh(&mfc_unres_lock);
2283 it->cache = NULL;
2284
2285 return NULL;
2286 }
2287
2288 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2289 {
2290 struct ipmr_mfc_iter *it = seq->private;
2291 struct mr_table *mrt = it->mrt;
2292
2293 if (it->cache == &mrt->mfc_unres_queue)
2294 spin_unlock_bh(&mfc_unres_lock);
2295 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2296 read_unlock(&mrt_lock);
2297 }
2298
2299 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2300 {
2301 int n;
2302
2303 if (v == SEQ_START_TOKEN) {
2304 seq_puts(seq,
2305 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2306 } else {
2307 const struct mfc_cache *mfc = v;
2308 const struct ipmr_mfc_iter *it = seq->private;
2309 const struct mr_table *mrt = it->mrt;
2310
2311 seq_printf(seq, "%08X %08X %-3hd",
2312 (__force u32) mfc->mfc_mcastgrp,
2313 (__force u32) mfc->mfc_origin,
2314 mfc->mfc_parent);
2315
2316 if (it->cache != &mrt->mfc_unres_queue) {
2317 seq_printf(seq, " %8lu %8lu %8lu",
2318 mfc->mfc_un.res.pkt,
2319 mfc->mfc_un.res.bytes,
2320 mfc->mfc_un.res.wrong_if);
2321 for (n = mfc->mfc_un.res.minvif;
2322 n < mfc->mfc_un.res.maxvif; n++ ) {
2323 if (VIF_EXISTS(mrt, n) &&
2324 mfc->mfc_un.res.ttls[n] < 255)
2325 seq_printf(seq,
2326 " %2d:%-3d",
2327 n, mfc->mfc_un.res.ttls[n]);
2328 }
2329 } else {
2330 /* unresolved mfc_caches don't contain
2331 * pkt, bytes and wrong_if values
2332 */
2333 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2334 }
2335 seq_putc(seq, '\n');
2336 }
2337 return 0;
2338 }
2339
2340 static const struct seq_operations ipmr_mfc_seq_ops = {
2341 .start = ipmr_mfc_seq_start,
2342 .next = ipmr_mfc_seq_next,
2343 .stop = ipmr_mfc_seq_stop,
2344 .show = ipmr_mfc_seq_show,
2345 };
2346
2347 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2348 {
2349 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2350 sizeof(struct ipmr_mfc_iter));
2351 }
2352
2353 static const struct file_operations ipmr_mfc_fops = {
2354 .owner = THIS_MODULE,
2355 .open = ipmr_mfc_open,
2356 .read = seq_read,
2357 .llseek = seq_lseek,
2358 .release = seq_release_net,
2359 };
2360 #endif
2361
2362 #ifdef CONFIG_IP_PIMSM_V2
2363 static const struct net_protocol pim_protocol = {
2364 .handler = pim_rcv,
2365 .netns_ok = 1,
2366 };
2367 #endif
2368
2369
2370 /*
2371 * Setup for IP multicast routing
2372 */
2373 static int __net_init ipmr_net_init(struct net *net)
2374 {
2375 int err;
2376
2377 err = ipmr_rules_init(net);
2378 if (err < 0)
2379 goto fail;
2380
2381 #ifdef CONFIG_PROC_FS
2382 err = -ENOMEM;
2383 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2384 goto proc_vif_fail;
2385 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2386 goto proc_cache_fail;
2387 #endif
2388 return 0;
2389
2390 #ifdef CONFIG_PROC_FS
2391 proc_cache_fail:
2392 proc_net_remove(net, "ip_mr_vif");
2393 proc_vif_fail:
2394 ipmr_rules_exit(net);
2395 #endif
2396 fail:
2397 return err;
2398 }
2399
2400 static void __net_exit ipmr_net_exit(struct net *net)
2401 {
2402 #ifdef CONFIG_PROC_FS
2403 proc_net_remove(net, "ip_mr_cache");
2404 proc_net_remove(net, "ip_mr_vif");
2405 #endif
2406 ipmr_rules_exit(net);
2407 }
2408
2409 static struct pernet_operations ipmr_net_ops = {
2410 .init = ipmr_net_init,
2411 .exit = ipmr_net_exit,
2412 };
2413
2414 int __init ip_mr_init(void)
2415 {
2416 int err;
2417
2418 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2419 sizeof(struct mfc_cache),
2420 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2421 NULL);
2422 if (!mrt_cachep)
2423 return -ENOMEM;
2424
2425 err = register_pernet_subsys(&ipmr_net_ops);
2426 if (err)
2427 goto reg_pernet_fail;
2428
2429 err = register_netdevice_notifier(&ip_mr_notifier);
2430 if (err)
2431 goto reg_notif_fail;
2432 #ifdef CONFIG_IP_PIMSM_V2
2433 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2434 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2435 err = -EAGAIN;
2436 goto add_proto_fail;
2437 }
2438 #endif
2439 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2440 return 0;
2441
2442 #ifdef CONFIG_IP_PIMSM_V2
2443 add_proto_fail:
2444 unregister_netdevice_notifier(&ip_mr_notifier);
2445 #endif
2446 reg_notif_fail:
2447 unregister_pernet_subsys(&ipmr_net_ops);
2448 reg_pernet_fail:
2449 kmem_cache_destroy(mrt_cachep);
2450 return err;
2451 }
This page took 0.10096 seconds and 5 git commands to generate.