2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
58 struct list_head list
;
63 struct sock
*mroute6_sk
;
64 struct timer_list ipmr_expire_timer
;
65 struct list_head mfc6_unres_queue
;
66 struct list_head mfc6_cache_array
[MFC6_LINES
];
67 struct mif_device vif6_table
[MAXMIFS
];
69 atomic_t cache_resolve_queue_len
;
70 bool mroute_do_assert
;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 int mroute_reg_vif_num
;
78 struct fib_rule common
;
82 struct mr6_table
*mrt
;
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86 Note that the changes are semaphored via rtnl_lock.
89 static DEFINE_RWLOCK(mrt_lock
);
92 * Multicast router control variables
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock
);
100 /* We return to original Alan's scheme. Hash table of resolved
101 entries is changed only in process context and protected
102 with weak lock mrt_lock. Queue of unresolved entries is protected
103 with strong spinlock mfc_unres_lock.
105 In this case data path is free of exclusive locks at all.
108 static struct kmem_cache
*mrt_cachep __read_mostly
;
110 static struct mr6_table
*ip6mr_new_table(struct net
*net
, u32 id
);
111 static void ip6mr_free_table(struct mr6_table
*mrt
);
113 static int ip6_mr_forward(struct net
*net
, struct mr6_table
*mrt
,
114 struct sk_buff
*skb
, struct mfc6_cache
*cache
);
115 static int ip6mr_cache_report(struct mr6_table
*mrt
, struct sk_buff
*pkt
,
116 mifi_t mifi
, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table
*mrt
, struct sk_buff
*skb
,
118 struct mfc6_cache
*c
, struct rtmsg
*rtm
);
119 static int ip6mr_rtm_dumproute(struct sk_buff
*skb
,
120 struct netlink_callback
*cb
);
121 static void mroute_clean_tables(struct mr6_table
*mrt
);
122 static void ipmr_expire_process(unsigned long arg
);
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
128 static struct mr6_table
*ip6mr_get_table(struct net
*net
, u32 id
)
130 struct mr6_table
*mrt
;
132 ip6mr_for_each_table(mrt
, net
) {
139 static int ip6mr_fib_lookup(struct net
*net
, struct flowi6
*flp6
,
140 struct mr6_table
**mrt
)
142 struct ip6mr_result res
;
143 struct fib_lookup_arg arg
= { .result
= &res
, };
146 err
= fib_rules_lookup(net
->ipv6
.mr6_rules_ops
,
147 flowi6_to_flowi(flp6
), 0, &arg
);
154 static int ip6mr_rule_action(struct fib_rule
*rule
, struct flowi
*flp
,
155 int flags
, struct fib_lookup_arg
*arg
)
157 struct ip6mr_result
*res
= arg
->result
;
158 struct mr6_table
*mrt
;
160 switch (rule
->action
) {
163 case FR_ACT_UNREACHABLE
:
165 case FR_ACT_PROHIBIT
:
167 case FR_ACT_BLACKHOLE
:
172 mrt
= ip6mr_get_table(rule
->fr_net
, rule
->table
);
179 static int ip6mr_rule_match(struct fib_rule
*rule
, struct flowi
*flp
, int flags
)
184 static const struct nla_policy ip6mr_rule_policy
[FRA_MAX
+ 1] = {
188 static int ip6mr_rule_configure(struct fib_rule
*rule
, struct sk_buff
*skb
,
189 struct fib_rule_hdr
*frh
, struct nlattr
**tb
)
194 static int ip6mr_rule_compare(struct fib_rule
*rule
, struct fib_rule_hdr
*frh
,
200 static int ip6mr_rule_fill(struct fib_rule
*rule
, struct sk_buff
*skb
,
201 struct fib_rule_hdr
*frh
)
209 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template
= {
210 .family
= RTNL_FAMILY_IP6MR
,
211 .rule_size
= sizeof(struct ip6mr_rule
),
212 .addr_size
= sizeof(struct in6_addr
),
213 .action
= ip6mr_rule_action
,
214 .match
= ip6mr_rule_match
,
215 .configure
= ip6mr_rule_configure
,
216 .compare
= ip6mr_rule_compare
,
217 .default_pref
= fib_default_rule_pref
,
218 .fill
= ip6mr_rule_fill
,
219 .nlgroup
= RTNLGRP_IPV6_RULE
,
220 .policy
= ip6mr_rule_policy
,
221 .owner
= THIS_MODULE
,
224 static int __net_init
ip6mr_rules_init(struct net
*net
)
226 struct fib_rules_ops
*ops
;
227 struct mr6_table
*mrt
;
230 ops
= fib_rules_register(&ip6mr_rules_ops_template
, net
);
234 INIT_LIST_HEAD(&net
->ipv6
.mr6_tables
);
236 mrt
= ip6mr_new_table(net
, RT6_TABLE_DFLT
);
242 err
= fib_default_rule_add(ops
, 0x7fff, RT6_TABLE_DFLT
, 0);
246 net
->ipv6
.mr6_rules_ops
= ops
;
252 fib_rules_unregister(ops
);
256 static void __net_exit
ip6mr_rules_exit(struct net
*net
)
258 struct mr6_table
*mrt
, *next
;
260 list_for_each_entry_safe(mrt
, next
, &net
->ipv6
.mr6_tables
, list
) {
261 list_del(&mrt
->list
);
262 ip6mr_free_table(mrt
);
264 fib_rules_unregister(net
->ipv6
.mr6_rules_ops
);
267 #define ip6mr_for_each_table(mrt, net) \
268 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
270 static struct mr6_table
*ip6mr_get_table(struct net
*net
, u32 id
)
272 return net
->ipv6
.mrt6
;
275 static int ip6mr_fib_lookup(struct net
*net
, struct flowi6
*flp6
,
276 struct mr6_table
**mrt
)
278 *mrt
= net
->ipv6
.mrt6
;
282 static int __net_init
ip6mr_rules_init(struct net
*net
)
284 net
->ipv6
.mrt6
= ip6mr_new_table(net
, RT6_TABLE_DFLT
);
285 return net
->ipv6
.mrt6
? 0 : -ENOMEM
;
288 static void __net_exit
ip6mr_rules_exit(struct net
*net
)
290 ip6mr_free_table(net
->ipv6
.mrt6
);
294 static struct mr6_table
*ip6mr_new_table(struct net
*net
, u32 id
)
296 struct mr6_table
*mrt
;
299 mrt
= ip6mr_get_table(net
, id
);
303 mrt
= kzalloc(sizeof(*mrt
), GFP_KERNEL
);
307 write_pnet(&mrt
->net
, net
);
309 /* Forwarding cache */
310 for (i
= 0; i
< MFC6_LINES
; i
++)
311 INIT_LIST_HEAD(&mrt
->mfc6_cache_array
[i
]);
313 INIT_LIST_HEAD(&mrt
->mfc6_unres_queue
);
315 setup_timer(&mrt
->ipmr_expire_timer
, ipmr_expire_process
,
318 #ifdef CONFIG_IPV6_PIMSM_V2
319 mrt
->mroute_reg_vif_num
= -1;
321 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
322 list_add_tail_rcu(&mrt
->list
, &net
->ipv6
.mr6_tables
);
327 static void ip6mr_free_table(struct mr6_table
*mrt
)
329 del_timer(&mrt
->ipmr_expire_timer
);
330 mroute_clean_tables(mrt
);
334 #ifdef CONFIG_PROC_FS
336 struct ipmr_mfc_iter
{
337 struct seq_net_private p
;
338 struct mr6_table
*mrt
;
339 struct list_head
*cache
;
344 static struct mfc6_cache
*ipmr_mfc_seq_idx(struct net
*net
,
345 struct ipmr_mfc_iter
*it
, loff_t pos
)
347 struct mr6_table
*mrt
= it
->mrt
;
348 struct mfc6_cache
*mfc
;
350 read_lock(&mrt_lock
);
351 for (it
->ct
= 0; it
->ct
< MFC6_LINES
; it
->ct
++) {
352 it
->cache
= &mrt
->mfc6_cache_array
[it
->ct
];
353 list_for_each_entry(mfc
, it
->cache
, list
)
357 read_unlock(&mrt_lock
);
359 spin_lock_bh(&mfc_unres_lock
);
360 it
->cache
= &mrt
->mfc6_unres_queue
;
361 list_for_each_entry(mfc
, it
->cache
, list
)
364 spin_unlock_bh(&mfc_unres_lock
);
371 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
374 struct ipmr_vif_iter
{
375 struct seq_net_private p
;
376 struct mr6_table
*mrt
;
380 static struct mif_device
*ip6mr_vif_seq_idx(struct net
*net
,
381 struct ipmr_vif_iter
*iter
,
384 struct mr6_table
*mrt
= iter
->mrt
;
386 for (iter
->ct
= 0; iter
->ct
< mrt
->maxvif
; ++iter
->ct
) {
387 if (!MIF_EXISTS(mrt
, iter
->ct
))
390 return &mrt
->vif6_table
[iter
->ct
];
395 static void *ip6mr_vif_seq_start(struct seq_file
*seq
, loff_t
*pos
)
398 struct ipmr_vif_iter
*iter
= seq
->private;
399 struct net
*net
= seq_file_net(seq
);
400 struct mr6_table
*mrt
;
402 mrt
= ip6mr_get_table(net
, RT6_TABLE_DFLT
);
404 return ERR_PTR(-ENOENT
);
408 read_lock(&mrt_lock
);
409 return *pos
? ip6mr_vif_seq_idx(net
, seq
->private, *pos
- 1)
413 static void *ip6mr_vif_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
415 struct ipmr_vif_iter
*iter
= seq
->private;
416 struct net
*net
= seq_file_net(seq
);
417 struct mr6_table
*mrt
= iter
->mrt
;
420 if (v
== SEQ_START_TOKEN
)
421 return ip6mr_vif_seq_idx(net
, iter
, 0);
423 while (++iter
->ct
< mrt
->maxvif
) {
424 if (!MIF_EXISTS(mrt
, iter
->ct
))
426 return &mrt
->vif6_table
[iter
->ct
];
431 static void ip6mr_vif_seq_stop(struct seq_file
*seq
, void *v
)
434 read_unlock(&mrt_lock
);
437 static int ip6mr_vif_seq_show(struct seq_file
*seq
, void *v
)
439 struct ipmr_vif_iter
*iter
= seq
->private;
440 struct mr6_table
*mrt
= iter
->mrt
;
442 if (v
== SEQ_START_TOKEN
) {
444 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
446 const struct mif_device
*vif
= v
;
447 const char *name
= vif
->dev
? vif
->dev
->name
: "none";
450 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
451 vif
- mrt
->vif6_table
,
452 name
, vif
->bytes_in
, vif
->pkt_in
,
453 vif
->bytes_out
, vif
->pkt_out
,
459 static const struct seq_operations ip6mr_vif_seq_ops
= {
460 .start
= ip6mr_vif_seq_start
,
461 .next
= ip6mr_vif_seq_next
,
462 .stop
= ip6mr_vif_seq_stop
,
463 .show
= ip6mr_vif_seq_show
,
466 static int ip6mr_vif_open(struct inode
*inode
, struct file
*file
)
468 return seq_open_net(inode
, file
, &ip6mr_vif_seq_ops
,
469 sizeof(struct ipmr_vif_iter
));
472 static const struct file_operations ip6mr_vif_fops
= {
473 .owner
= THIS_MODULE
,
474 .open
= ip6mr_vif_open
,
477 .release
= seq_release_net
,
480 static void *ipmr_mfc_seq_start(struct seq_file
*seq
, loff_t
*pos
)
482 struct ipmr_mfc_iter
*it
= seq
->private;
483 struct net
*net
= seq_file_net(seq
);
484 struct mr6_table
*mrt
;
486 mrt
= ip6mr_get_table(net
, RT6_TABLE_DFLT
);
488 return ERR_PTR(-ENOENT
);
491 return *pos
? ipmr_mfc_seq_idx(net
, seq
->private, *pos
- 1)
495 static void *ipmr_mfc_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
497 struct mfc6_cache
*mfc
= v
;
498 struct ipmr_mfc_iter
*it
= seq
->private;
499 struct net
*net
= seq_file_net(seq
);
500 struct mr6_table
*mrt
= it
->mrt
;
504 if (v
== SEQ_START_TOKEN
)
505 return ipmr_mfc_seq_idx(net
, seq
->private, 0);
507 if (mfc
->list
.next
!= it
->cache
)
508 return list_entry(mfc
->list
.next
, struct mfc6_cache
, list
);
510 if (it
->cache
== &mrt
->mfc6_unres_queue
)
513 BUG_ON(it
->cache
!= &mrt
->mfc6_cache_array
[it
->ct
]);
515 while (++it
->ct
< MFC6_LINES
) {
516 it
->cache
= &mrt
->mfc6_cache_array
[it
->ct
];
517 if (list_empty(it
->cache
))
519 return list_first_entry(it
->cache
, struct mfc6_cache
, list
);
522 /* exhausted cache_array, show unresolved */
523 read_unlock(&mrt_lock
);
524 it
->cache
= &mrt
->mfc6_unres_queue
;
527 spin_lock_bh(&mfc_unres_lock
);
528 if (!list_empty(it
->cache
))
529 return list_first_entry(it
->cache
, struct mfc6_cache
, list
);
532 spin_unlock_bh(&mfc_unres_lock
);
538 static void ipmr_mfc_seq_stop(struct seq_file
*seq
, void *v
)
540 struct ipmr_mfc_iter
*it
= seq
->private;
541 struct mr6_table
*mrt
= it
->mrt
;
543 if (it
->cache
== &mrt
->mfc6_unres_queue
)
544 spin_unlock_bh(&mfc_unres_lock
);
545 else if (it
->cache
== mrt
->mfc6_cache_array
)
546 read_unlock(&mrt_lock
);
549 static int ipmr_mfc_seq_show(struct seq_file
*seq
, void *v
)
553 if (v
== SEQ_START_TOKEN
) {
557 "Iif Pkts Bytes Wrong Oifs\n");
559 const struct mfc6_cache
*mfc
= v
;
560 const struct ipmr_mfc_iter
*it
= seq
->private;
561 struct mr6_table
*mrt
= it
->mrt
;
563 seq_printf(seq
, "%pI6 %pI6 %-3hd",
564 &mfc
->mf6c_mcastgrp
, &mfc
->mf6c_origin
,
567 if (it
->cache
!= &mrt
->mfc6_unres_queue
) {
568 seq_printf(seq
, " %8lu %8lu %8lu",
570 mfc
->mfc_un
.res
.bytes
,
571 mfc
->mfc_un
.res
.wrong_if
);
572 for (n
= mfc
->mfc_un
.res
.minvif
;
573 n
< mfc
->mfc_un
.res
.maxvif
; n
++) {
574 if (MIF_EXISTS(mrt
, n
) &&
575 mfc
->mfc_un
.res
.ttls
[n
] < 255)
578 n
, mfc
->mfc_un
.res
.ttls
[n
]);
581 /* unresolved mfc_caches don't contain
582 * pkt, bytes and wrong_if values
584 seq_printf(seq
, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
591 static const struct seq_operations ipmr_mfc_seq_ops
= {
592 .start
= ipmr_mfc_seq_start
,
593 .next
= ipmr_mfc_seq_next
,
594 .stop
= ipmr_mfc_seq_stop
,
595 .show
= ipmr_mfc_seq_show
,
598 static int ipmr_mfc_open(struct inode
*inode
, struct file
*file
)
600 return seq_open_net(inode
, file
, &ipmr_mfc_seq_ops
,
601 sizeof(struct ipmr_mfc_iter
));
604 static const struct file_operations ip6mr_mfc_fops
= {
605 .owner
= THIS_MODULE
,
606 .open
= ipmr_mfc_open
,
609 .release
= seq_release_net
,
613 #ifdef CONFIG_IPV6_PIMSM_V2
615 static int pim6_rcv(struct sk_buff
*skb
)
617 struct pimreghdr
*pim
;
618 struct ipv6hdr
*encap
;
619 struct net_device
*reg_dev
= NULL
;
620 struct net
*net
= dev_net(skb
->dev
);
621 struct mr6_table
*mrt
;
622 struct flowi6 fl6
= {
623 .flowi6_iif
= skb
->dev
->ifindex
,
624 .flowi6_mark
= skb
->mark
,
628 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(*encap
)))
631 pim
= (struct pimreghdr
*)skb_transport_header(skb
);
632 if (pim
->type
!= ((PIM_VERSION
<< 4) | PIM_REGISTER
) ||
633 (pim
->flags
& PIM_NULL_REGISTER
) ||
634 (csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
, &ipv6_hdr(skb
)->daddr
,
635 sizeof(*pim
), IPPROTO_PIM
,
636 csum_partial((void *)pim
, sizeof(*pim
), 0)) &&
637 csum_fold(skb_checksum(skb
, 0, skb
->len
, 0))))
640 /* check if the inner packet is destined to mcast group */
641 encap
= (struct ipv6hdr
*)(skb_transport_header(skb
) +
644 if (!ipv6_addr_is_multicast(&encap
->daddr
) ||
645 encap
->payload_len
== 0 ||
646 ntohs(encap
->payload_len
) + sizeof(*pim
) > skb
->len
)
649 if (ip6mr_fib_lookup(net
, &fl6
, &mrt
) < 0)
651 reg_vif_num
= mrt
->mroute_reg_vif_num
;
653 read_lock(&mrt_lock
);
654 if (reg_vif_num
>= 0)
655 reg_dev
= mrt
->vif6_table
[reg_vif_num
].dev
;
658 read_unlock(&mrt_lock
);
663 skb
->mac_header
= skb
->network_header
;
664 skb_pull(skb
, (u8
*)encap
- skb
->data
);
665 skb_reset_network_header(skb
);
666 skb
->protocol
= htons(ETH_P_IPV6
);
667 skb
->ip_summed
= CHECKSUM_NONE
;
668 skb
->pkt_type
= PACKET_HOST
;
670 skb_tunnel_rx(skb
, reg_dev
);
681 static const struct inet6_protocol pim6_protocol
= {
685 /* Service routines creating virtual interfaces: PIMREG */
687 static netdev_tx_t
reg_vif_xmit(struct sk_buff
*skb
,
688 struct net_device
*dev
)
690 struct net
*net
= dev_net(dev
);
691 struct mr6_table
*mrt
;
692 struct flowi6 fl6
= {
693 .flowi6_oif
= dev
->ifindex
,
694 .flowi6_iif
= skb
->skb_iif
,
695 .flowi6_mark
= skb
->mark
,
699 err
= ip6mr_fib_lookup(net
, &fl6
, &mrt
);
705 read_lock(&mrt_lock
);
706 dev
->stats
.tx_bytes
+= skb
->len
;
707 dev
->stats
.tx_packets
++;
708 ip6mr_cache_report(mrt
, skb
, mrt
->mroute_reg_vif_num
, MRT6MSG_WHOLEPKT
);
709 read_unlock(&mrt_lock
);
714 static const struct net_device_ops reg_vif_netdev_ops
= {
715 .ndo_start_xmit
= reg_vif_xmit
,
718 static void reg_vif_setup(struct net_device
*dev
)
720 dev
->type
= ARPHRD_PIMREG
;
721 dev
->mtu
= 1500 - sizeof(struct ipv6hdr
) - 8;
722 dev
->flags
= IFF_NOARP
;
723 dev
->netdev_ops
= ®_vif_netdev_ops
;
724 dev
->destructor
= free_netdev
;
725 dev
->features
|= NETIF_F_NETNS_LOCAL
;
728 static struct net_device
*ip6mr_reg_vif(struct net
*net
, struct mr6_table
*mrt
)
730 struct net_device
*dev
;
733 if (mrt
->id
== RT6_TABLE_DFLT
)
734 sprintf(name
, "pim6reg");
736 sprintf(name
, "pim6reg%u", mrt
->id
);
738 dev
= alloc_netdev(0, name
, reg_vif_setup
);
742 dev_net_set(dev
, net
);
744 if (register_netdevice(dev
)) {
757 /* allow the register to be completed before unregistering. */
761 unregister_netdevice(dev
);
770 static int mif6_delete(struct mr6_table
*mrt
, int vifi
, struct list_head
*head
)
772 struct mif_device
*v
;
773 struct net_device
*dev
;
774 struct inet6_dev
*in6_dev
;
776 if (vifi
< 0 || vifi
>= mrt
->maxvif
)
777 return -EADDRNOTAVAIL
;
779 v
= &mrt
->vif6_table
[vifi
];
781 write_lock_bh(&mrt_lock
);
786 write_unlock_bh(&mrt_lock
);
787 return -EADDRNOTAVAIL
;
790 #ifdef CONFIG_IPV6_PIMSM_V2
791 if (vifi
== mrt
->mroute_reg_vif_num
)
792 mrt
->mroute_reg_vif_num
= -1;
795 if (vifi
+ 1 == mrt
->maxvif
) {
797 for (tmp
= vifi
- 1; tmp
>= 0; tmp
--) {
798 if (MIF_EXISTS(mrt
, tmp
))
801 mrt
->maxvif
= tmp
+ 1;
804 write_unlock_bh(&mrt_lock
);
806 dev_set_allmulti(dev
, -1);
808 in6_dev
= __in6_dev_get(dev
);
810 in6_dev
->cnf
.mc_forwarding
--;
811 inet6_netconf_notify_devconf(dev_net(dev
),
812 NETCONFA_MC_FORWARDING
,
813 dev
->ifindex
, &in6_dev
->cnf
);
816 if (v
->flags
& MIFF_REGISTER
)
817 unregister_netdevice_queue(dev
, head
);
823 static inline void ip6mr_cache_free(struct mfc6_cache
*c
)
825 kmem_cache_free(mrt_cachep
, c
);
828 /* Destroy an unresolved cache entry, killing queued skbs
829 and reporting error to netlink readers.
832 static void ip6mr_destroy_unres(struct mr6_table
*mrt
, struct mfc6_cache
*c
)
834 struct net
*net
= read_pnet(&mrt
->net
);
837 atomic_dec(&mrt
->cache_resolve_queue_len
);
839 while((skb
= skb_dequeue(&c
->mfc_un
.unres
.unresolved
)) != NULL
) {
840 if (ipv6_hdr(skb
)->version
== 0) {
841 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct ipv6hdr
));
842 nlh
->nlmsg_type
= NLMSG_ERROR
;
843 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
844 skb_trim(skb
, nlh
->nlmsg_len
);
845 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -ETIMEDOUT
;
846 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).portid
);
855 /* Timer process for all the unresolved queue. */
857 static void ipmr_do_expire_process(struct mr6_table
*mrt
)
859 unsigned long now
= jiffies
;
860 unsigned long expires
= 10 * HZ
;
861 struct mfc6_cache
*c
, *next
;
863 list_for_each_entry_safe(c
, next
, &mrt
->mfc6_unres_queue
, list
) {
864 if (time_after(c
->mfc_un
.unres
.expires
, now
)) {
866 unsigned long interval
= c
->mfc_un
.unres
.expires
- now
;
867 if (interval
< expires
)
873 ip6mr_destroy_unres(mrt
, c
);
876 if (!list_empty(&mrt
->mfc6_unres_queue
))
877 mod_timer(&mrt
->ipmr_expire_timer
, jiffies
+ expires
);
880 static void ipmr_expire_process(unsigned long arg
)
882 struct mr6_table
*mrt
= (struct mr6_table
*)arg
;
884 if (!spin_trylock(&mfc_unres_lock
)) {
885 mod_timer(&mrt
->ipmr_expire_timer
, jiffies
+ 1);
889 if (!list_empty(&mrt
->mfc6_unres_queue
))
890 ipmr_do_expire_process(mrt
);
892 spin_unlock(&mfc_unres_lock
);
895 /* Fill oifs list. It is called under write locked mrt_lock. */
897 static void ip6mr_update_thresholds(struct mr6_table
*mrt
, struct mfc6_cache
*cache
,
902 cache
->mfc_un
.res
.minvif
= MAXMIFS
;
903 cache
->mfc_un
.res
.maxvif
= 0;
904 memset(cache
->mfc_un
.res
.ttls
, 255, MAXMIFS
);
906 for (vifi
= 0; vifi
< mrt
->maxvif
; vifi
++) {
907 if (MIF_EXISTS(mrt
, vifi
) &&
908 ttls
[vifi
] && ttls
[vifi
] < 255) {
909 cache
->mfc_un
.res
.ttls
[vifi
] = ttls
[vifi
];
910 if (cache
->mfc_un
.res
.minvif
> vifi
)
911 cache
->mfc_un
.res
.minvif
= vifi
;
912 if (cache
->mfc_un
.res
.maxvif
<= vifi
)
913 cache
->mfc_un
.res
.maxvif
= vifi
+ 1;
918 static int mif6_add(struct net
*net
, struct mr6_table
*mrt
,
919 struct mif6ctl
*vifc
, int mrtsock
)
921 int vifi
= vifc
->mif6c_mifi
;
922 struct mif_device
*v
= &mrt
->vif6_table
[vifi
];
923 struct net_device
*dev
;
924 struct inet6_dev
*in6_dev
;
928 if (MIF_EXISTS(mrt
, vifi
))
931 switch (vifc
->mif6c_flags
) {
932 #ifdef CONFIG_IPV6_PIMSM_V2
935 * Special Purpose VIF in PIM
936 * All the packets will be sent to the daemon
938 if (mrt
->mroute_reg_vif_num
>= 0)
940 dev
= ip6mr_reg_vif(net
, mrt
);
943 err
= dev_set_allmulti(dev
, 1);
945 unregister_netdevice(dev
);
952 dev
= dev_get_by_index(net
, vifc
->mif6c_pifi
);
954 return -EADDRNOTAVAIL
;
955 err
= dev_set_allmulti(dev
, 1);
965 in6_dev
= __in6_dev_get(dev
);
967 in6_dev
->cnf
.mc_forwarding
++;
968 inet6_netconf_notify_devconf(dev_net(dev
),
969 NETCONFA_MC_FORWARDING
,
970 dev
->ifindex
, &in6_dev
->cnf
);
974 * Fill in the VIF structures
976 v
->rate_limit
= vifc
->vifc_rate_limit
;
977 v
->flags
= vifc
->mif6c_flags
;
979 v
->flags
|= VIFF_STATIC
;
980 v
->threshold
= vifc
->vifc_threshold
;
985 v
->link
= dev
->ifindex
;
986 if (v
->flags
& MIFF_REGISTER
)
987 v
->link
= dev
->iflink
;
989 /* And finish update writing critical data */
990 write_lock_bh(&mrt_lock
);
992 #ifdef CONFIG_IPV6_PIMSM_V2
993 if (v
->flags
& MIFF_REGISTER
)
994 mrt
->mroute_reg_vif_num
= vifi
;
996 if (vifi
+ 1 > mrt
->maxvif
)
997 mrt
->maxvif
= vifi
+ 1;
998 write_unlock_bh(&mrt_lock
);
1002 static struct mfc6_cache
*ip6mr_cache_find(struct mr6_table
*mrt
,
1003 const struct in6_addr
*origin
,
1004 const struct in6_addr
*mcastgrp
)
1006 int line
= MFC6_HASH(mcastgrp
, origin
);
1007 struct mfc6_cache
*c
;
1009 list_for_each_entry(c
, &mrt
->mfc6_cache_array
[line
], list
) {
1010 if (ipv6_addr_equal(&c
->mf6c_origin
, origin
) &&
1011 ipv6_addr_equal(&c
->mf6c_mcastgrp
, mcastgrp
))
1018 * Allocate a multicast cache entry
1020 static struct mfc6_cache
*ip6mr_cache_alloc(void)
1022 struct mfc6_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_KERNEL
);
1025 c
->mfc_un
.res
.minvif
= MAXMIFS
;
1029 static struct mfc6_cache
*ip6mr_cache_alloc_unres(void)
1031 struct mfc6_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_ATOMIC
);
1034 skb_queue_head_init(&c
->mfc_un
.unres
.unresolved
);
1035 c
->mfc_un
.unres
.expires
= jiffies
+ 10 * HZ
;
1040 * A cache entry has gone into a resolved state from queued
1043 static void ip6mr_cache_resolve(struct net
*net
, struct mr6_table
*mrt
,
1044 struct mfc6_cache
*uc
, struct mfc6_cache
*c
)
1046 struct sk_buff
*skb
;
1049 * Play the pending entries through our router
1052 while((skb
= __skb_dequeue(&uc
->mfc_un
.unres
.unresolved
))) {
1053 if (ipv6_hdr(skb
)->version
== 0) {
1054 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct ipv6hdr
));
1056 if (__ip6mr_fill_mroute(mrt
, skb
, c
, NLMSG_DATA(nlh
)) > 0) {
1057 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - (u8
*)nlh
;
1059 nlh
->nlmsg_type
= NLMSG_ERROR
;
1060 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
1061 skb_trim(skb
, nlh
->nlmsg_len
);
1062 ((struct nlmsgerr
*)NLMSG_DATA(nlh
))->error
= -EMSGSIZE
;
1064 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).portid
);
1066 ip6_mr_forward(net
, mrt
, skb
, c
);
1071 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1072 * expects the following bizarre scheme.
1074 * Called under mrt_lock.
1077 static int ip6mr_cache_report(struct mr6_table
*mrt
, struct sk_buff
*pkt
,
1078 mifi_t mifi
, int assert)
1080 struct sk_buff
*skb
;
1081 struct mrt6msg
*msg
;
1084 #ifdef CONFIG_IPV6_PIMSM_V2
1085 if (assert == MRT6MSG_WHOLEPKT
)
1086 skb
= skb_realloc_headroom(pkt
, -skb_network_offset(pkt
)
1090 skb
= alloc_skb(sizeof(struct ipv6hdr
) + sizeof(*msg
), GFP_ATOMIC
);
1095 /* I suppose that internal messages
1096 * do not require checksums */
1098 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1100 #ifdef CONFIG_IPV6_PIMSM_V2
1101 if (assert == MRT6MSG_WHOLEPKT
) {
1102 /* Ugly, but we have no choice with this interface.
1103 Duplicate old header, fix length etc.
1104 And all this only to mangle msg->im6_msgtype and
1105 to set msg->im6_mbz to "mbz" :-)
1107 skb_push(skb
, -skb_network_offset(pkt
));
1109 skb_push(skb
, sizeof(*msg
));
1110 skb_reset_transport_header(skb
);
1111 msg
= (struct mrt6msg
*)skb_transport_header(skb
);
1113 msg
->im6_msgtype
= MRT6MSG_WHOLEPKT
;
1114 msg
->im6_mif
= mrt
->mroute_reg_vif_num
;
1116 msg
->im6_src
= ipv6_hdr(pkt
)->saddr
;
1117 msg
->im6_dst
= ipv6_hdr(pkt
)->daddr
;
1119 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1124 * Copy the IP header
1127 skb_put(skb
, sizeof(struct ipv6hdr
));
1128 skb_reset_network_header(skb
);
1129 skb_copy_to_linear_data(skb
, ipv6_hdr(pkt
), sizeof(struct ipv6hdr
));
1134 skb_put(skb
, sizeof(*msg
));
1135 skb_reset_transport_header(skb
);
1136 msg
= (struct mrt6msg
*)skb_transport_header(skb
);
1139 msg
->im6_msgtype
= assert;
1140 msg
->im6_mif
= mifi
;
1142 msg
->im6_src
= ipv6_hdr(pkt
)->saddr
;
1143 msg
->im6_dst
= ipv6_hdr(pkt
)->daddr
;
1145 skb_dst_set(skb
, dst_clone(skb_dst(pkt
)));
1146 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1149 if (mrt
->mroute6_sk
== NULL
) {
1155 * Deliver to user space multicast routing algorithms
1157 ret
= sock_queue_rcv_skb(mrt
->mroute6_sk
, skb
);
1159 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1167 * Queue a packet for resolution. It gets locked cache entry!
1171 ip6mr_cache_unresolved(struct mr6_table
*mrt
, mifi_t mifi
, struct sk_buff
*skb
)
1175 struct mfc6_cache
*c
;
1177 spin_lock_bh(&mfc_unres_lock
);
1178 list_for_each_entry(c
, &mrt
->mfc6_unres_queue
, list
) {
1179 if (ipv6_addr_equal(&c
->mf6c_mcastgrp
, &ipv6_hdr(skb
)->daddr
) &&
1180 ipv6_addr_equal(&c
->mf6c_origin
, &ipv6_hdr(skb
)->saddr
)) {
1188 * Create a new entry if allowable
1191 if (atomic_read(&mrt
->cache_resolve_queue_len
) >= 10 ||
1192 (c
= ip6mr_cache_alloc_unres()) == NULL
) {
1193 spin_unlock_bh(&mfc_unres_lock
);
1200 * Fill in the new cache entry
1202 c
->mf6c_parent
= -1;
1203 c
->mf6c_origin
= ipv6_hdr(skb
)->saddr
;
1204 c
->mf6c_mcastgrp
= ipv6_hdr(skb
)->daddr
;
1207 * Reflect first query at pim6sd
1209 err
= ip6mr_cache_report(mrt
, skb
, mifi
, MRT6MSG_NOCACHE
);
1211 /* If the report failed throw the cache entry
1214 spin_unlock_bh(&mfc_unres_lock
);
1216 ip6mr_cache_free(c
);
1221 atomic_inc(&mrt
->cache_resolve_queue_len
);
1222 list_add(&c
->list
, &mrt
->mfc6_unres_queue
);
1224 ipmr_do_expire_process(mrt
);
1228 * See if we can append the packet
1230 if (c
->mfc_un
.unres
.unresolved
.qlen
> 3) {
1234 skb_queue_tail(&c
->mfc_un
.unres
.unresolved
, skb
);
1238 spin_unlock_bh(&mfc_unres_lock
);
1243 * MFC6 cache manipulation by user space
1246 static int ip6mr_mfc_delete(struct mr6_table
*mrt
, struct mf6cctl
*mfc
)
1249 struct mfc6_cache
*c
, *next
;
1251 line
= MFC6_HASH(&mfc
->mf6cc_mcastgrp
.sin6_addr
, &mfc
->mf6cc_origin
.sin6_addr
);
1253 list_for_each_entry_safe(c
, next
, &mrt
->mfc6_cache_array
[line
], list
) {
1254 if (ipv6_addr_equal(&c
->mf6c_origin
, &mfc
->mf6cc_origin
.sin6_addr
) &&
1255 ipv6_addr_equal(&c
->mf6c_mcastgrp
, &mfc
->mf6cc_mcastgrp
.sin6_addr
)) {
1256 write_lock_bh(&mrt_lock
);
1258 write_unlock_bh(&mrt_lock
);
1260 ip6mr_cache_free(c
);
1267 static int ip6mr_device_event(struct notifier_block
*this,
1268 unsigned long event
, void *ptr
)
1270 struct net_device
*dev
= ptr
;
1271 struct net
*net
= dev_net(dev
);
1272 struct mr6_table
*mrt
;
1273 struct mif_device
*v
;
1277 if (event
!= NETDEV_UNREGISTER
)
1280 ip6mr_for_each_table(mrt
, net
) {
1281 v
= &mrt
->vif6_table
[0];
1282 for (ct
= 0; ct
< mrt
->maxvif
; ct
++, v
++) {
1284 mif6_delete(mrt
, ct
, &list
);
1287 unregister_netdevice_many(&list
);
1292 static struct notifier_block ip6_mr_notifier
= {
1293 .notifier_call
= ip6mr_device_event
1297 * Setup for IP multicast routing
1300 static int __net_init
ip6mr_net_init(struct net
*net
)
1304 err
= ip6mr_rules_init(net
);
1308 #ifdef CONFIG_PROC_FS
1310 if (!proc_net_fops_create(net
, "ip6_mr_vif", 0, &ip6mr_vif_fops
))
1312 if (!proc_net_fops_create(net
, "ip6_mr_cache", 0, &ip6mr_mfc_fops
))
1313 goto proc_cache_fail
;
1318 #ifdef CONFIG_PROC_FS
1320 proc_net_remove(net
, "ip6_mr_vif");
1322 ip6mr_rules_exit(net
);
1328 static void __net_exit
ip6mr_net_exit(struct net
*net
)
1330 #ifdef CONFIG_PROC_FS
1331 proc_net_remove(net
, "ip6_mr_cache");
1332 proc_net_remove(net
, "ip6_mr_vif");
1334 ip6mr_rules_exit(net
);
1337 static struct pernet_operations ip6mr_net_ops
= {
1338 .init
= ip6mr_net_init
,
1339 .exit
= ip6mr_net_exit
,
1342 int __init
ip6_mr_init(void)
1346 mrt_cachep
= kmem_cache_create("ip6_mrt_cache",
1347 sizeof(struct mfc6_cache
),
1348 0, SLAB_HWCACHE_ALIGN
,
1353 err
= register_pernet_subsys(&ip6mr_net_ops
);
1355 goto reg_pernet_fail
;
1357 err
= register_netdevice_notifier(&ip6_mr_notifier
);
1359 goto reg_notif_fail
;
1360 #ifdef CONFIG_IPV6_PIMSM_V2
1361 if (inet6_add_protocol(&pim6_protocol
, IPPROTO_PIM
) < 0) {
1362 pr_err("%s: can't add PIM protocol\n", __func__
);
1364 goto add_proto_fail
;
1367 rtnl_register(RTNL_FAMILY_IP6MR
, RTM_GETROUTE
, NULL
,
1368 ip6mr_rtm_dumproute
, NULL
);
1370 #ifdef CONFIG_IPV6_PIMSM_V2
1372 unregister_netdevice_notifier(&ip6_mr_notifier
);
1375 unregister_pernet_subsys(&ip6mr_net_ops
);
1377 kmem_cache_destroy(mrt_cachep
);
1381 void ip6_mr_cleanup(void)
1383 unregister_netdevice_notifier(&ip6_mr_notifier
);
1384 unregister_pernet_subsys(&ip6mr_net_ops
);
1385 kmem_cache_destroy(mrt_cachep
);
1388 static int ip6mr_mfc_add(struct net
*net
, struct mr6_table
*mrt
,
1389 struct mf6cctl
*mfc
, int mrtsock
)
1393 struct mfc6_cache
*uc
, *c
;
1394 unsigned char ttls
[MAXMIFS
];
1397 if (mfc
->mf6cc_parent
>= MAXMIFS
)
1400 memset(ttls
, 255, MAXMIFS
);
1401 for (i
= 0; i
< MAXMIFS
; i
++) {
1402 if (IF_ISSET(i
, &mfc
->mf6cc_ifset
))
1407 line
= MFC6_HASH(&mfc
->mf6cc_mcastgrp
.sin6_addr
, &mfc
->mf6cc_origin
.sin6_addr
);
1409 list_for_each_entry(c
, &mrt
->mfc6_cache_array
[line
], list
) {
1410 if (ipv6_addr_equal(&c
->mf6c_origin
, &mfc
->mf6cc_origin
.sin6_addr
) &&
1411 ipv6_addr_equal(&c
->mf6c_mcastgrp
, &mfc
->mf6cc_mcastgrp
.sin6_addr
)) {
1418 write_lock_bh(&mrt_lock
);
1419 c
->mf6c_parent
= mfc
->mf6cc_parent
;
1420 ip6mr_update_thresholds(mrt
, c
, ttls
);
1422 c
->mfc_flags
|= MFC_STATIC
;
1423 write_unlock_bh(&mrt_lock
);
1427 if (!ipv6_addr_is_multicast(&mfc
->mf6cc_mcastgrp
.sin6_addr
))
1430 c
= ip6mr_cache_alloc();
1434 c
->mf6c_origin
= mfc
->mf6cc_origin
.sin6_addr
;
1435 c
->mf6c_mcastgrp
= mfc
->mf6cc_mcastgrp
.sin6_addr
;
1436 c
->mf6c_parent
= mfc
->mf6cc_parent
;
1437 ip6mr_update_thresholds(mrt
, c
, ttls
);
1439 c
->mfc_flags
|= MFC_STATIC
;
1441 write_lock_bh(&mrt_lock
);
1442 list_add(&c
->list
, &mrt
->mfc6_cache_array
[line
]);
1443 write_unlock_bh(&mrt_lock
);
1446 * Check to see if we resolved a queued list. If so we
1447 * need to send on the frames and tidy up.
1450 spin_lock_bh(&mfc_unres_lock
);
1451 list_for_each_entry(uc
, &mrt
->mfc6_unres_queue
, list
) {
1452 if (ipv6_addr_equal(&uc
->mf6c_origin
, &c
->mf6c_origin
) &&
1453 ipv6_addr_equal(&uc
->mf6c_mcastgrp
, &c
->mf6c_mcastgrp
)) {
1454 list_del(&uc
->list
);
1455 atomic_dec(&mrt
->cache_resolve_queue_len
);
1460 if (list_empty(&mrt
->mfc6_unres_queue
))
1461 del_timer(&mrt
->ipmr_expire_timer
);
1462 spin_unlock_bh(&mfc_unres_lock
);
1465 ip6mr_cache_resolve(net
, mrt
, uc
, c
);
1466 ip6mr_cache_free(uc
);
1472 * Close the multicast socket, and clear the vif tables etc
1475 static void mroute_clean_tables(struct mr6_table
*mrt
)
1479 struct mfc6_cache
*c
, *next
;
1482 * Shut down all active vif entries
1484 for (i
= 0; i
< mrt
->maxvif
; i
++) {
1485 if (!(mrt
->vif6_table
[i
].flags
& VIFF_STATIC
))
1486 mif6_delete(mrt
, i
, &list
);
1488 unregister_netdevice_many(&list
);
1493 for (i
= 0; i
< MFC6_LINES
; i
++) {
1494 list_for_each_entry_safe(c
, next
, &mrt
->mfc6_cache_array
[i
], list
) {
1495 if (c
->mfc_flags
& MFC_STATIC
)
1497 write_lock_bh(&mrt_lock
);
1499 write_unlock_bh(&mrt_lock
);
1501 ip6mr_cache_free(c
);
1505 if (atomic_read(&mrt
->cache_resolve_queue_len
) != 0) {
1506 spin_lock_bh(&mfc_unres_lock
);
1507 list_for_each_entry_safe(c
, next
, &mrt
->mfc6_unres_queue
, list
) {
1509 ip6mr_destroy_unres(mrt
, c
);
1511 spin_unlock_bh(&mfc_unres_lock
);
1515 static int ip6mr_sk_init(struct mr6_table
*mrt
, struct sock
*sk
)
1518 struct net
*net
= sock_net(sk
);
1521 write_lock_bh(&mrt_lock
);
1522 if (likely(mrt
->mroute6_sk
== NULL
)) {
1523 mrt
->mroute6_sk
= sk
;
1524 net
->ipv6
.devconf_all
->mc_forwarding
++;
1525 inet6_netconf_notify_devconf(net
, NETCONFA_MC_FORWARDING
,
1526 NETCONFA_IFINDEX_ALL
,
1527 net
->ipv6
.devconf_all
);
1531 write_unlock_bh(&mrt_lock
);
1538 int ip6mr_sk_done(struct sock
*sk
)
1541 struct net
*net
= sock_net(sk
);
1542 struct mr6_table
*mrt
;
1545 ip6mr_for_each_table(mrt
, net
) {
1546 if (sk
== mrt
->mroute6_sk
) {
1547 write_lock_bh(&mrt_lock
);
1548 mrt
->mroute6_sk
= NULL
;
1549 net
->ipv6
.devconf_all
->mc_forwarding
--;
1550 inet6_netconf_notify_devconf(net
,
1551 NETCONFA_MC_FORWARDING
,
1552 NETCONFA_IFINDEX_ALL
,
1553 net
->ipv6
.devconf_all
);
1554 write_unlock_bh(&mrt_lock
);
1556 mroute_clean_tables(mrt
);
1566 struct sock
*mroute6_socket(struct net
*net
, struct sk_buff
*skb
)
1568 struct mr6_table
*mrt
;
1569 struct flowi6 fl6
= {
1570 .flowi6_iif
= skb
->skb_iif
,
1571 .flowi6_oif
= skb
->dev
->ifindex
,
1572 .flowi6_mark
= skb
->mark
,
1575 if (ip6mr_fib_lookup(net
, &fl6
, &mrt
) < 0)
1578 return mrt
->mroute6_sk
;
1582 * Socket options and virtual interface manipulation. The whole
1583 * virtual interface system is a complete heap, but unfortunately
1584 * that's how BSD mrouted happens to think. Maybe one day with a proper
1585 * MOSPF/PIM router set up we can clean this up.
1588 int ip6_mroute_setsockopt(struct sock
*sk
, int optname
, char __user
*optval
, unsigned int optlen
)
1594 struct net
*net
= sock_net(sk
);
1595 struct mr6_table
*mrt
;
1597 mrt
= ip6mr_get_table(net
, raw6_sk(sk
)->ip6mr_table
? : RT6_TABLE_DFLT
);
1601 if (optname
!= MRT6_INIT
) {
1602 if (sk
!= mrt
->mroute6_sk
&& !ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
1608 if (sk
->sk_type
!= SOCK_RAW
||
1609 inet_sk(sk
)->inet_num
!= IPPROTO_ICMPV6
)
1611 if (optlen
< sizeof(int))
1614 return ip6mr_sk_init(mrt
, sk
);
1617 return ip6mr_sk_done(sk
);
1620 if (optlen
< sizeof(vif
))
1622 if (copy_from_user(&vif
, optval
, sizeof(vif
)))
1624 if (vif
.mif6c_mifi
>= MAXMIFS
)
1627 ret
= mif6_add(net
, mrt
, &vif
, sk
== mrt
->mroute6_sk
);
1632 if (optlen
< sizeof(mifi_t
))
1634 if (copy_from_user(&mifi
, optval
, sizeof(mifi_t
)))
1637 ret
= mif6_delete(mrt
, mifi
, NULL
);
1642 * Manipulate the forwarding caches. These live
1643 * in a sort of kernel/user symbiosis.
1647 if (optlen
< sizeof(mfc
))
1649 if (copy_from_user(&mfc
, optval
, sizeof(mfc
)))
1652 if (optname
== MRT6_DEL_MFC
)
1653 ret
= ip6mr_mfc_delete(mrt
, &mfc
);
1655 ret
= ip6mr_mfc_add(net
, mrt
, &mfc
, sk
== mrt
->mroute6_sk
);
1660 * Control PIM assert (to activate pim will activate assert)
1666 if (optlen
!= sizeof(v
))
1668 if (get_user(v
, (int __user
*)optval
))
1670 mrt
->mroute_do_assert
= v
;
1674 #ifdef CONFIG_IPV6_PIMSM_V2
1679 if (optlen
!= sizeof(v
))
1681 if (get_user(v
, (int __user
*)optval
))
1686 if (v
!= mrt
->mroute_do_pim
) {
1687 mrt
->mroute_do_pim
= v
;
1688 mrt
->mroute_do_assert
= v
;
1695 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1700 if (optlen
!= sizeof(u32
))
1702 if (get_user(v
, (u32 __user
*)optval
))
1704 if (sk
== mrt
->mroute6_sk
)
1709 if (!ip6mr_new_table(net
, v
))
1711 raw6_sk(sk
)->ip6mr_table
= v
;
1717 * Spurious command, or MRT6_VERSION which you cannot
1721 return -ENOPROTOOPT
;
1726 * Getsock opt support for the multicast routing system.
1729 int ip6_mroute_getsockopt(struct sock
*sk
, int optname
, char __user
*optval
,
1734 struct net
*net
= sock_net(sk
);
1735 struct mr6_table
*mrt
;
1737 mrt
= ip6mr_get_table(net
, raw6_sk(sk
)->ip6mr_table
? : RT6_TABLE_DFLT
);
1745 #ifdef CONFIG_IPV6_PIMSM_V2
1747 val
= mrt
->mroute_do_pim
;
1751 val
= mrt
->mroute_do_assert
;
1754 return -ENOPROTOOPT
;
1757 if (get_user(olr
, optlen
))
1760 olr
= min_t(int, olr
, sizeof(int));
1764 if (put_user(olr
, optlen
))
1766 if (copy_to_user(optval
, &val
, olr
))
1772 * The IP multicast ioctl support routines.
1775 int ip6mr_ioctl(struct sock
*sk
, int cmd
, void __user
*arg
)
1777 struct sioc_sg_req6 sr
;
1778 struct sioc_mif_req6 vr
;
1779 struct mif_device
*vif
;
1780 struct mfc6_cache
*c
;
1781 struct net
*net
= sock_net(sk
);
1782 struct mr6_table
*mrt
;
1784 mrt
= ip6mr_get_table(net
, raw6_sk(sk
)->ip6mr_table
? : RT6_TABLE_DFLT
);
1789 case SIOCGETMIFCNT_IN6
:
1790 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1792 if (vr
.mifi
>= mrt
->maxvif
)
1794 read_lock(&mrt_lock
);
1795 vif
= &mrt
->vif6_table
[vr
.mifi
];
1796 if (MIF_EXISTS(mrt
, vr
.mifi
)) {
1797 vr
.icount
= vif
->pkt_in
;
1798 vr
.ocount
= vif
->pkt_out
;
1799 vr
.ibytes
= vif
->bytes_in
;
1800 vr
.obytes
= vif
->bytes_out
;
1801 read_unlock(&mrt_lock
);
1803 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1807 read_unlock(&mrt_lock
);
1808 return -EADDRNOTAVAIL
;
1809 case SIOCGETSGCNT_IN6
:
1810 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1813 read_lock(&mrt_lock
);
1814 c
= ip6mr_cache_find(mrt
, &sr
.src
.sin6_addr
, &sr
.grp
.sin6_addr
);
1816 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1817 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1818 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1819 read_unlock(&mrt_lock
);
1821 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1825 read_unlock(&mrt_lock
);
1826 return -EADDRNOTAVAIL
;
1828 return -ENOIOCTLCMD
;
1832 #ifdef CONFIG_COMPAT
1833 struct compat_sioc_sg_req6
{
1834 struct sockaddr_in6 src
;
1835 struct sockaddr_in6 grp
;
1836 compat_ulong_t pktcnt
;
1837 compat_ulong_t bytecnt
;
1838 compat_ulong_t wrong_if
;
1841 struct compat_sioc_mif_req6
{
1843 compat_ulong_t icount
;
1844 compat_ulong_t ocount
;
1845 compat_ulong_t ibytes
;
1846 compat_ulong_t obytes
;
1849 int ip6mr_compat_ioctl(struct sock
*sk
, unsigned int cmd
, void __user
*arg
)
1851 struct compat_sioc_sg_req6 sr
;
1852 struct compat_sioc_mif_req6 vr
;
1853 struct mif_device
*vif
;
1854 struct mfc6_cache
*c
;
1855 struct net
*net
= sock_net(sk
);
1856 struct mr6_table
*mrt
;
1858 mrt
= ip6mr_get_table(net
, raw6_sk(sk
)->ip6mr_table
? : RT6_TABLE_DFLT
);
1863 case SIOCGETMIFCNT_IN6
:
1864 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1866 if (vr
.mifi
>= mrt
->maxvif
)
1868 read_lock(&mrt_lock
);
1869 vif
= &mrt
->vif6_table
[vr
.mifi
];
1870 if (MIF_EXISTS(mrt
, vr
.mifi
)) {
1871 vr
.icount
= vif
->pkt_in
;
1872 vr
.ocount
= vif
->pkt_out
;
1873 vr
.ibytes
= vif
->bytes_in
;
1874 vr
.obytes
= vif
->bytes_out
;
1875 read_unlock(&mrt_lock
);
1877 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1881 read_unlock(&mrt_lock
);
1882 return -EADDRNOTAVAIL
;
1883 case SIOCGETSGCNT_IN6
:
1884 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1887 read_lock(&mrt_lock
);
1888 c
= ip6mr_cache_find(mrt
, &sr
.src
.sin6_addr
, &sr
.grp
.sin6_addr
);
1890 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1891 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1892 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1893 read_unlock(&mrt_lock
);
1895 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1899 read_unlock(&mrt_lock
);
1900 return -EADDRNOTAVAIL
;
1902 return -ENOIOCTLCMD
;
1907 static inline int ip6mr_forward2_finish(struct sk_buff
*skb
)
1909 IP6_INC_STATS_BH(dev_net(skb_dst(skb
)->dev
), ip6_dst_idev(skb_dst(skb
)),
1910 IPSTATS_MIB_OUTFORWDATAGRAMS
);
1911 IP6_ADD_STATS_BH(dev_net(skb_dst(skb
)->dev
), ip6_dst_idev(skb_dst(skb
)),
1912 IPSTATS_MIB_OUTOCTETS
, skb
->len
);
1913 return dst_output(skb
);
1917 * Processing handlers for ip6mr_forward
1920 static int ip6mr_forward2(struct net
*net
, struct mr6_table
*mrt
,
1921 struct sk_buff
*skb
, struct mfc6_cache
*c
, int vifi
)
1923 struct ipv6hdr
*ipv6h
;
1924 struct mif_device
*vif
= &mrt
->vif6_table
[vifi
];
1925 struct net_device
*dev
;
1926 struct dst_entry
*dst
;
1929 if (vif
->dev
== NULL
)
1932 #ifdef CONFIG_IPV6_PIMSM_V2
1933 if (vif
->flags
& MIFF_REGISTER
) {
1935 vif
->bytes_out
+= skb
->len
;
1936 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1937 vif
->dev
->stats
.tx_packets
++;
1938 ip6mr_cache_report(mrt
, skb
, vifi
, MRT6MSG_WHOLEPKT
);
1943 ipv6h
= ipv6_hdr(skb
);
1945 fl6
= (struct flowi6
) {
1946 .flowi6_oif
= vif
->link
,
1947 .daddr
= ipv6h
->daddr
,
1950 dst
= ip6_route_output(net
, NULL
, &fl6
);
1957 skb_dst_set(skb
, dst
);
1960 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1961 * not only before forwarding, but after forwarding on all output
1962 * interfaces. It is clear, if mrouter runs a multicasting
1963 * program, it should receive packets not depending to what interface
1964 * program is joined.
1965 * If we will not make it, the program will have to join on all
1966 * interfaces. On the other hand, multihoming host (or router, but
1967 * not mrouter) cannot join to more than one interface - it will
1968 * result in receiving multiple packets.
1973 vif
->bytes_out
+= skb
->len
;
1975 /* We are about to write */
1976 /* XXX: extension headers? */
1977 if (skb_cow(skb
, sizeof(*ipv6h
) + LL_RESERVED_SPACE(dev
)))
1980 ipv6h
= ipv6_hdr(skb
);
1983 IP6CB(skb
)->flags
|= IP6SKB_FORWARDED
;
1985 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
, skb
, skb
->dev
, dev
,
1986 ip6mr_forward2_finish
);
1993 static int ip6mr_find_vif(struct mr6_table
*mrt
, struct net_device
*dev
)
1997 for (ct
= mrt
->maxvif
- 1; ct
>= 0; ct
--) {
1998 if (mrt
->vif6_table
[ct
].dev
== dev
)
2004 static int ip6_mr_forward(struct net
*net
, struct mr6_table
*mrt
,
2005 struct sk_buff
*skb
, struct mfc6_cache
*cache
)
2010 vif
= cache
->mf6c_parent
;
2011 cache
->mfc_un
.res
.pkt
++;
2012 cache
->mfc_un
.res
.bytes
+= skb
->len
;
2015 * Wrong interface: drop packet and (maybe) send PIM assert.
2017 if (mrt
->vif6_table
[vif
].dev
!= skb
->dev
) {
2020 cache
->mfc_un
.res
.wrong_if
++;
2021 true_vifi
= ip6mr_find_vif(mrt
, skb
->dev
);
2023 if (true_vifi
>= 0 && mrt
->mroute_do_assert
&&
2024 /* pimsm uses asserts, when switching from RPT to SPT,
2025 so that we cannot check that packet arrived on an oif.
2026 It is bad, but otherwise we would need to move pretty
2027 large chunk of pimd to kernel. Ough... --ANK
2029 (mrt
->mroute_do_pim
||
2030 cache
->mfc_un
.res
.ttls
[true_vifi
] < 255) &&
2032 cache
->mfc_un
.res
.last_assert
+ MFC_ASSERT_THRESH
)) {
2033 cache
->mfc_un
.res
.last_assert
= jiffies
;
2034 ip6mr_cache_report(mrt
, skb
, true_vifi
, MRT6MSG_WRONGMIF
);
2039 mrt
->vif6_table
[vif
].pkt_in
++;
2040 mrt
->vif6_table
[vif
].bytes_in
+= skb
->len
;
2045 for (ct
= cache
->mfc_un
.res
.maxvif
- 1; ct
>= cache
->mfc_un
.res
.minvif
; ct
--) {
2046 if (ipv6_hdr(skb
)->hop_limit
> cache
->mfc_un
.res
.ttls
[ct
]) {
2048 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
2050 ip6mr_forward2(net
, mrt
, skb2
, cache
, psend
);
2056 ip6mr_forward2(net
, mrt
, skb
, cache
, psend
);
2067 * Multicast packets for forwarding arrive here
2070 int ip6_mr_input(struct sk_buff
*skb
)
2072 struct mfc6_cache
*cache
;
2073 struct net
*net
= dev_net(skb
->dev
);
2074 struct mr6_table
*mrt
;
2075 struct flowi6 fl6
= {
2076 .flowi6_iif
= skb
->dev
->ifindex
,
2077 .flowi6_mark
= skb
->mark
,
2081 err
= ip6mr_fib_lookup(net
, &fl6
, &mrt
);
2087 read_lock(&mrt_lock
);
2088 cache
= ip6mr_cache_find(mrt
,
2089 &ipv6_hdr(skb
)->saddr
, &ipv6_hdr(skb
)->daddr
);
2092 * No usable cache entry
2094 if (cache
== NULL
) {
2097 vif
= ip6mr_find_vif(mrt
, skb
->dev
);
2099 int err
= ip6mr_cache_unresolved(mrt
, vif
, skb
);
2100 read_unlock(&mrt_lock
);
2104 read_unlock(&mrt_lock
);
2109 ip6_mr_forward(net
, mrt
, skb
, cache
);
2111 read_unlock(&mrt_lock
);
2117 static int __ip6mr_fill_mroute(struct mr6_table
*mrt
, struct sk_buff
*skb
,
2118 struct mfc6_cache
*c
, struct rtmsg
*rtm
)
2121 struct rtnexthop
*nhp
;
2122 struct nlattr
*mp_attr
;
2123 struct rta_mfc_stats mfcs
;
2125 /* If cache is unresolved, don't try to parse IIF and OIF */
2126 if (c
->mf6c_parent
>= MAXMIFS
)
2129 if (MIF_EXISTS(mrt
, c
->mf6c_parent
) &&
2130 nla_put_u32(skb
, RTA_IIF
, mrt
->vif6_table
[c
->mf6c_parent
].dev
->ifindex
) < 0)
2132 mp_attr
= nla_nest_start(skb
, RTA_MULTIPATH
);
2133 if (mp_attr
== NULL
)
2136 for (ct
= c
->mfc_un
.res
.minvif
; ct
< c
->mfc_un
.res
.maxvif
; ct
++) {
2137 if (MIF_EXISTS(mrt
, ct
) && c
->mfc_un
.res
.ttls
[ct
] < 255) {
2138 nhp
= nla_reserve_nohdr(skb
, sizeof(*nhp
));
2140 nla_nest_cancel(skb
, mp_attr
);
2144 nhp
->rtnh_flags
= 0;
2145 nhp
->rtnh_hops
= c
->mfc_un
.res
.ttls
[ct
];
2146 nhp
->rtnh_ifindex
= mrt
->vif6_table
[ct
].dev
->ifindex
;
2147 nhp
->rtnh_len
= sizeof(*nhp
);
2151 nla_nest_end(skb
, mp_attr
);
2153 mfcs
.mfcs_packets
= c
->mfc_un
.res
.pkt
;
2154 mfcs
.mfcs_bytes
= c
->mfc_un
.res
.bytes
;
2155 mfcs
.mfcs_wrong_if
= c
->mfc_un
.res
.wrong_if
;
2156 if (nla_put(skb
, RTA_MFC_STATS
, sizeof(mfcs
), &mfcs
) < 0)
2159 rtm
->rtm_type
= RTN_MULTICAST
;
2163 int ip6mr_get_route(struct net
*net
,
2164 struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
2167 struct mr6_table
*mrt
;
2168 struct mfc6_cache
*cache
;
2169 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
2171 mrt
= ip6mr_get_table(net
, RT6_TABLE_DFLT
);
2175 read_lock(&mrt_lock
);
2176 cache
= ip6mr_cache_find(mrt
, &rt
->rt6i_src
.addr
, &rt
->rt6i_dst
.addr
);
2179 struct sk_buff
*skb2
;
2180 struct ipv6hdr
*iph
;
2181 struct net_device
*dev
;
2185 read_unlock(&mrt_lock
);
2190 if (dev
== NULL
|| (vif
= ip6mr_find_vif(mrt
, dev
)) < 0) {
2191 read_unlock(&mrt_lock
);
2195 /* really correct? */
2196 skb2
= alloc_skb(sizeof(struct ipv6hdr
), GFP_ATOMIC
);
2198 read_unlock(&mrt_lock
);
2202 skb_reset_transport_header(skb2
);
2204 skb_put(skb2
, sizeof(struct ipv6hdr
));
2205 skb_reset_network_header(skb2
);
2207 iph
= ipv6_hdr(skb2
);
2210 iph
->flow_lbl
[0] = 0;
2211 iph
->flow_lbl
[1] = 0;
2212 iph
->flow_lbl
[2] = 0;
2213 iph
->payload_len
= 0;
2214 iph
->nexthdr
= IPPROTO_NONE
;
2216 iph
->saddr
= rt
->rt6i_src
.addr
;
2217 iph
->daddr
= rt
->rt6i_dst
.addr
;
2219 err
= ip6mr_cache_unresolved(mrt
, vif
, skb2
);
2220 read_unlock(&mrt_lock
);
2225 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
2226 cache
->mfc_flags
|= MFC_NOTIFY
;
2228 err
= __ip6mr_fill_mroute(mrt
, skb
, cache
, rtm
);
2229 read_unlock(&mrt_lock
);
2233 static int ip6mr_fill_mroute(struct mr6_table
*mrt
, struct sk_buff
*skb
,
2234 u32 portid
, u32 seq
, struct mfc6_cache
*c
)
2236 struct nlmsghdr
*nlh
;
2239 nlh
= nlmsg_put(skb
, portid
, seq
, RTM_NEWROUTE
, sizeof(*rtm
), NLM_F_MULTI
);
2243 rtm
= nlmsg_data(nlh
);
2244 rtm
->rtm_family
= RTNL_FAMILY_IPMR
;
2245 rtm
->rtm_dst_len
= 128;
2246 rtm
->rtm_src_len
= 128;
2248 rtm
->rtm_table
= mrt
->id
;
2249 if (nla_put_u32(skb
, RTA_TABLE
, mrt
->id
))
2250 goto nla_put_failure
;
2251 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2252 rtm
->rtm_protocol
= RTPROT_UNSPEC
;
2255 if (nla_put(skb
, RTA_SRC
, 16, &c
->mf6c_origin
) ||
2256 nla_put(skb
, RTA_DST
, 16, &c
->mf6c_mcastgrp
))
2257 goto nla_put_failure
;
2258 if (__ip6mr_fill_mroute(mrt
, skb
, c
, rtm
) < 0)
2259 goto nla_put_failure
;
2261 return nlmsg_end(skb
, nlh
);
2264 nlmsg_cancel(skb
, nlh
);
2268 static int ip6mr_rtm_dumproute(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2270 struct net
*net
= sock_net(skb
->sk
);
2271 struct mr6_table
*mrt
;
2272 struct mfc6_cache
*mfc
;
2273 unsigned int t
= 0, s_t
;
2274 unsigned int h
= 0, s_h
;
2275 unsigned int e
= 0, s_e
;
2281 read_lock(&mrt_lock
);
2282 ip6mr_for_each_table(mrt
, net
) {
2287 for (h
= s_h
; h
< MFC6_LINES
; h
++) {
2288 list_for_each_entry(mfc
, &mrt
->mfc6_cache_array
[h
], list
) {
2291 if (ip6mr_fill_mroute(mrt
, skb
,
2292 NETLINK_CB(cb
->skb
).portid
,
2306 read_unlock(&mrt_lock
);