2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
61 #include <asm/uaccess.h>
64 #include <linux/sysctl.h>
67 static struct rt6_info
*ip6_rt_copy(struct rt6_info
*ort
,
68 const struct in6_addr
*dest
);
69 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
70 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
71 static unsigned int ip6_mtu(const struct dst_entry
*dst
);
72 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
73 static void ip6_dst_destroy(struct dst_entry
*);
74 static void ip6_dst_ifdown(struct dst_entry
*,
75 struct net_device
*dev
, int how
);
76 static int ip6_dst_gc(struct dst_ops
*ops
);
78 static int ip6_pkt_discard(struct sk_buff
*skb
);
79 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
80 static void ip6_link_failure(struct sk_buff
*skb
);
81 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
85 const struct in6_addr
*prefix
, int prefixlen
,
86 const struct in6_addr
*gwaddr
, int ifindex
,
88 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
89 const struct in6_addr
*prefix
, int prefixlen
,
90 const struct in6_addr
*gwaddr
, int ifindex
);
93 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
95 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
96 struct inet_peer
*peer
;
99 if (!(rt
->dst
.flags
& DST_HOST
))
103 rt6_bind_peer(rt
, 1);
105 peer
= rt
->rt6i_peer
;
107 u32
*old_p
= __DST_METRICS_PTR(old
);
108 unsigned long prev
, new;
111 if (inet_metrics_new(peer
))
112 memcpy(p
, old_p
, sizeof(u32
) * RTAX_MAX
);
114 new = (unsigned long) p
;
115 prev
= cmpxchg(&dst
->_metrics
, old
, new);
118 p
= __DST_METRICS_PTR(prev
);
119 if (prev
& DST_METRICS_READ_ONLY
)
126 static inline const void *choose_neigh_daddr(struct rt6_info
*rt
, const void *daddr
)
128 struct in6_addr
*p
= &rt
->rt6i_gateway
;
130 if (!ipv6_addr_any(p
))
131 return (const void *) p
;
135 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
, const void *daddr
)
137 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
140 daddr
= choose_neigh_daddr(rt
, daddr
);
141 n
= __ipv6_neigh_lookup(&nd_tbl
, dst
->dev
, daddr
);
144 return neigh_create(&nd_tbl
, daddr
, dst
->dev
);
147 static int rt6_bind_neighbour(struct rt6_info
*rt
, struct net_device
*dev
)
149 struct neighbour
*n
= __ipv6_neigh_lookup(&nd_tbl
, dev
, &rt
->rt6i_gateway
);
151 n
= neigh_create(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
155 dst_set_neighbour(&rt
->dst
, n
);
160 static struct dst_ops ip6_dst_ops_template
= {
162 .protocol
= cpu_to_be16(ETH_P_IPV6
),
165 .check
= ip6_dst_check
,
166 .default_advmss
= ip6_default_advmss
,
168 .cow_metrics
= ipv6_cow_metrics
,
169 .destroy
= ip6_dst_destroy
,
170 .ifdown
= ip6_dst_ifdown
,
171 .negative_advice
= ip6_negative_advice
,
172 .link_failure
= ip6_link_failure
,
173 .update_pmtu
= ip6_rt_update_pmtu
,
174 .local_out
= __ip6_local_out
,
175 .neigh_lookup
= ip6_neigh_lookup
,
178 static unsigned int ip6_blackhole_mtu(const struct dst_entry
*dst
)
180 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
182 return mtu
? : dst
->dev
->mtu
;
185 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
189 static u32
*ip6_rt_blackhole_cow_metrics(struct dst_entry
*dst
,
195 static struct dst_ops ip6_dst_blackhole_ops
= {
197 .protocol
= cpu_to_be16(ETH_P_IPV6
),
198 .destroy
= ip6_dst_destroy
,
199 .check
= ip6_dst_check
,
200 .mtu
= ip6_blackhole_mtu
,
201 .default_advmss
= ip6_default_advmss
,
202 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
203 .cow_metrics
= ip6_rt_blackhole_cow_metrics
,
204 .neigh_lookup
= ip6_neigh_lookup
,
207 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
208 [RTAX_HOPLIMIT
- 1] = 255,
211 static struct rt6_info ip6_null_entry_template
= {
213 .__refcnt
= ATOMIC_INIT(1),
216 .error
= -ENETUNREACH
,
217 .input
= ip6_pkt_discard
,
218 .output
= ip6_pkt_discard_out
,
220 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
221 .rt6i_protocol
= RTPROT_KERNEL
,
222 .rt6i_metric
= ~(u32
) 0,
223 .rt6i_ref
= ATOMIC_INIT(1),
226 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
228 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
229 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
231 static struct rt6_info ip6_prohibit_entry_template
= {
233 .__refcnt
= ATOMIC_INIT(1),
237 .input
= ip6_pkt_prohibit
,
238 .output
= ip6_pkt_prohibit_out
,
240 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
241 .rt6i_protocol
= RTPROT_KERNEL
,
242 .rt6i_metric
= ~(u32
) 0,
243 .rt6i_ref
= ATOMIC_INIT(1),
246 static struct rt6_info ip6_blk_hole_entry_template
= {
248 .__refcnt
= ATOMIC_INIT(1),
252 .input
= dst_discard
,
253 .output
= dst_discard
,
255 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
256 .rt6i_protocol
= RTPROT_KERNEL
,
257 .rt6i_metric
= ~(u32
) 0,
258 .rt6i_ref
= ATOMIC_INIT(1),
263 /* allocate dst with ip6_dst_ops */
264 static inline struct rt6_info
*ip6_dst_alloc(struct dst_ops
*ops
,
265 struct net_device
*dev
,
268 struct rt6_info
*rt
= dst_alloc(ops
, dev
, 0, 0, flags
);
271 memset(&rt
->rt6i_table
, 0,
272 sizeof(*rt
) - sizeof(struct dst_entry
));
277 static void ip6_dst_destroy(struct dst_entry
*dst
)
279 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
280 struct inet6_dev
*idev
= rt
->rt6i_idev
;
281 struct inet_peer
*peer
= rt
->rt6i_peer
;
283 if (!(rt
->dst
.flags
& DST_HOST
))
284 dst_destroy_metrics_generic(dst
);
287 rt
->rt6i_idev
= NULL
;
291 if (!(rt
->rt6i_flags
& RTF_EXPIRES
) && dst
->from
)
292 dst_release(dst
->from
);
295 rt
->rt6i_peer
= NULL
;
300 static atomic_t __rt6_peer_genid
= ATOMIC_INIT(0);
302 static u32
rt6_peer_genid(void)
304 return atomic_read(&__rt6_peer_genid
);
307 void rt6_bind_peer(struct rt6_info
*rt
, int create
)
309 struct inet_peer
*peer
;
311 peer
= inet_getpeer_v6(&rt
->rt6i_dst
.addr
, create
);
312 if (peer
&& cmpxchg(&rt
->rt6i_peer
, NULL
, peer
) != NULL
)
315 rt
->rt6i_peer_genid
= rt6_peer_genid();
318 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
321 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
322 struct inet6_dev
*idev
= rt
->rt6i_idev
;
323 struct net_device
*loopback_dev
=
324 dev_net(dev
)->loopback_dev
;
326 if (dev
!= loopback_dev
&& idev
&& idev
->dev
== dev
) {
327 struct inet6_dev
*loopback_idev
=
328 in6_dev_get(loopback_dev
);
330 rt
->rt6i_idev
= loopback_idev
;
336 static bool rt6_check_expired(const struct rt6_info
*rt
)
338 struct rt6_info
*ort
= NULL
;
340 if (rt
->rt6i_flags
& RTF_EXPIRES
) {
341 if (time_after(jiffies
, rt
->dst
.expires
))
343 } else if (rt
->dst
.from
) {
344 ort
= (struct rt6_info
*) rt
->dst
.from
;
345 return (ort
->rt6i_flags
& RTF_EXPIRES
) &&
346 time_after(jiffies
, ort
->dst
.expires
);
351 static bool rt6_need_strict(const struct in6_addr
*daddr
)
353 return ipv6_addr_type(daddr
) &
354 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
| IPV6_ADDR_LOOPBACK
);
358 * Route lookup. Any table->tb6_lock is implied.
361 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
363 const struct in6_addr
*saddr
,
367 struct rt6_info
*local
= NULL
;
368 struct rt6_info
*sprt
;
370 if (!oif
&& ipv6_addr_any(saddr
))
373 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
374 struct net_device
*dev
= sprt
->dst
.dev
;
377 if (dev
->ifindex
== oif
)
379 if (dev
->flags
& IFF_LOOPBACK
) {
380 if (!sprt
->rt6i_idev
||
381 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
382 if (flags
& RT6_LOOKUP_F_IFACE
&& oif
)
384 if (local
&& (!oif
||
385 local
->rt6i_idev
->dev
->ifindex
== oif
))
391 if (ipv6_chk_addr(net
, saddr
, dev
,
392 flags
& RT6_LOOKUP_F_IFACE
))
401 if (flags
& RT6_LOOKUP_F_IFACE
)
402 return net
->ipv6
.ip6_null_entry
;
408 #ifdef CONFIG_IPV6_ROUTER_PREF
409 static void rt6_probe(struct rt6_info
*rt
)
411 struct neighbour
*neigh
;
413 * Okay, this does not seem to be appropriate
414 * for now, however, we need to check if it
415 * is really so; aka Router Reachability Probing.
417 * Router Reachability Probe MUST be rate-limited
418 * to no more than one per minute.
421 neigh
= rt
? dst_get_neighbour_noref(&rt
->dst
) : NULL
;
422 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
424 read_lock_bh(&neigh
->lock
);
425 if (!(neigh
->nud_state
& NUD_VALID
) &&
426 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
427 struct in6_addr mcaddr
;
428 struct in6_addr
*target
;
430 neigh
->updated
= jiffies
;
431 read_unlock_bh(&neigh
->lock
);
433 target
= (struct in6_addr
*)&neigh
->primary_key
;
434 addrconf_addr_solict_mult(target
, &mcaddr
);
435 ndisc_send_ns(rt
->dst
.dev
, NULL
, target
, &mcaddr
, NULL
);
437 read_unlock_bh(&neigh
->lock
);
443 static inline void rt6_probe(struct rt6_info
*rt
)
449 * Default Router Selection (RFC 2461 6.3.6)
451 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
453 struct net_device
*dev
= rt
->dst
.dev
;
454 if (!oif
|| dev
->ifindex
== oif
)
456 if ((dev
->flags
& IFF_LOOPBACK
) &&
457 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
462 static inline int rt6_check_neigh(struct rt6_info
*rt
)
464 struct neighbour
*neigh
;
468 neigh
= dst_get_neighbour_noref(&rt
->dst
);
469 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
470 !(rt
->rt6i_flags
& RTF_GATEWAY
))
473 read_lock_bh(&neigh
->lock
);
474 if (neigh
->nud_state
& NUD_VALID
)
476 #ifdef CONFIG_IPV6_ROUTER_PREF
477 else if (neigh
->nud_state
& NUD_FAILED
)
482 read_unlock_bh(&neigh
->lock
);
489 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
494 m
= rt6_check_dev(rt
, oif
);
495 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
497 #ifdef CONFIG_IPV6_ROUTER_PREF
498 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
500 n
= rt6_check_neigh(rt
);
501 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
506 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
507 int *mpri
, struct rt6_info
*match
)
511 if (rt6_check_expired(rt
))
514 m
= rt6_score_route(rt
, oif
, strict
);
519 if (strict
& RT6_LOOKUP_F_REACHABLE
)
523 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
531 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
532 struct rt6_info
*rr_head
,
533 u32 metric
, int oif
, int strict
)
535 struct rt6_info
*rt
, *match
;
539 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
540 rt
= rt
->dst
.rt6_next
)
541 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
542 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
543 rt
= rt
->dst
.rt6_next
)
544 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
549 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
551 struct rt6_info
*match
, *rt0
;
556 fn
->rr_ptr
= rt0
= fn
->leaf
;
558 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
561 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
562 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
564 /* no entries matched; do round-robin */
565 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
572 net
= dev_net(rt0
->dst
.dev
);
573 return match
? match
: net
->ipv6
.ip6_null_entry
;
576 #ifdef CONFIG_IPV6_ROUTE_INFO
577 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
578 const struct in6_addr
*gwaddr
)
580 struct net
*net
= dev_net(dev
);
581 struct route_info
*rinfo
= (struct route_info
*) opt
;
582 struct in6_addr prefix_buf
, *prefix
;
584 unsigned long lifetime
;
587 if (len
< sizeof(struct route_info
)) {
591 /* Sanity check for prefix_len and length */
592 if (rinfo
->length
> 3) {
594 } else if (rinfo
->prefix_len
> 128) {
596 } else if (rinfo
->prefix_len
> 64) {
597 if (rinfo
->length
< 2) {
600 } else if (rinfo
->prefix_len
> 0) {
601 if (rinfo
->length
< 1) {
606 pref
= rinfo
->route_pref
;
607 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
610 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
612 if (rinfo
->length
== 3)
613 prefix
= (struct in6_addr
*)rinfo
->prefix
;
615 /* this function is safe */
616 ipv6_addr_prefix(&prefix_buf
,
617 (struct in6_addr
*)rinfo
->prefix
,
619 prefix
= &prefix_buf
;
622 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
625 if (rt
&& !lifetime
) {
631 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
634 rt
->rt6i_flags
= RTF_ROUTEINFO
|
635 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
638 if (!addrconf_finite_timeout(lifetime
))
639 rt6_clean_expires(rt
);
641 rt6_set_expires(rt
, jiffies
+ HZ
* lifetime
);
643 dst_release(&rt
->dst
);
649 #define BACKTRACK(__net, saddr) \
651 if (rt == __net->ipv6.ip6_null_entry) { \
652 struct fib6_node *pn; \
654 if (fn->fn_flags & RTN_TL_ROOT) \
657 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
658 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
661 if (fn->fn_flags & RTN_RTINFO) \
667 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
668 struct fib6_table
*table
,
669 struct flowi6
*fl6
, int flags
)
671 struct fib6_node
*fn
;
674 read_lock_bh(&table
->tb6_lock
);
675 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
678 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
679 BACKTRACK(net
, &fl6
->saddr
);
681 dst_use(&rt
->dst
, jiffies
);
682 read_unlock_bh(&table
->tb6_lock
);
687 struct dst_entry
* ip6_route_lookup(struct net
*net
, struct flowi6
*fl6
,
690 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_lookup
);
692 EXPORT_SYMBOL_GPL(ip6_route_lookup
);
694 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
695 const struct in6_addr
*saddr
, int oif
, int strict
)
697 struct flowi6 fl6
= {
701 struct dst_entry
*dst
;
702 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
705 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
706 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
709 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
711 return (struct rt6_info
*) dst
;
718 EXPORT_SYMBOL(rt6_lookup
);
720 /* ip6_ins_rt is called with FREE table->tb6_lock.
721 It takes new route entry, the addition fails by any reason the
722 route is freed. In any case, if caller does not hold it, it may
726 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
729 struct fib6_table
*table
;
731 table
= rt
->rt6i_table
;
732 write_lock_bh(&table
->tb6_lock
);
733 err
= fib6_add(&table
->tb6_root
, rt
, info
);
734 write_unlock_bh(&table
->tb6_lock
);
739 int ip6_ins_rt(struct rt6_info
*rt
)
741 struct nl_info info
= {
742 .nl_net
= dev_net(rt
->dst
.dev
),
744 return __ip6_ins_rt(rt
, &info
);
747 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
,
748 const struct in6_addr
*daddr
,
749 const struct in6_addr
*saddr
)
757 rt
= ip6_rt_copy(ort
, daddr
);
760 int attempts
= !in_softirq();
762 if (!(rt
->rt6i_flags
& RTF_GATEWAY
)) {
763 if (ort
->rt6i_dst
.plen
!= 128 &&
764 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
765 rt
->rt6i_flags
|= RTF_ANYCAST
;
766 rt
->rt6i_gateway
= *daddr
;
769 rt
->rt6i_flags
|= RTF_CACHE
;
771 #ifdef CONFIG_IPV6_SUBTREES
772 if (rt
->rt6i_src
.plen
&& saddr
) {
773 rt
->rt6i_src
.addr
= *saddr
;
774 rt
->rt6i_src
.plen
= 128;
779 if (rt6_bind_neighbour(rt
, rt
->dst
.dev
)) {
780 struct net
*net
= dev_net(rt
->dst
.dev
);
781 int saved_rt_min_interval
=
782 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
783 int saved_rt_elasticity
=
784 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
786 if (attempts
-- > 0) {
787 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 1;
788 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= 0;
790 ip6_dst_gc(&net
->ipv6
.ip6_dst_ops
);
792 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
=
794 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
=
795 saved_rt_min_interval
;
799 net_warn_ratelimited("Neighbour table overflow\n");
808 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
,
809 const struct in6_addr
*daddr
)
811 struct rt6_info
*rt
= ip6_rt_copy(ort
, daddr
);
814 rt
->rt6i_flags
|= RTF_CACHE
;
815 dst_set_neighbour(&rt
->dst
, neigh_clone(dst_get_neighbour_noref_raw(&ort
->dst
)));
820 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
821 struct flowi6
*fl6
, int flags
)
823 struct fib6_node
*fn
;
824 struct rt6_info
*rt
, *nrt
;
828 int reachable
= net
->ipv6
.devconf_all
->forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
830 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
833 read_lock_bh(&table
->tb6_lock
);
836 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
839 rt
= rt6_select(fn
, oif
, strict
| reachable
);
841 BACKTRACK(net
, &fl6
->saddr
);
842 if (rt
== net
->ipv6
.ip6_null_entry
||
843 rt
->rt6i_flags
& RTF_CACHE
)
847 read_unlock_bh(&table
->tb6_lock
);
849 if (!dst_get_neighbour_noref_raw(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
850 nrt
= rt6_alloc_cow(rt
, &fl6
->daddr
, &fl6
->saddr
);
851 else if (!(rt
->dst
.flags
& DST_HOST
))
852 nrt
= rt6_alloc_clone(rt
, &fl6
->daddr
);
856 dst_release(&rt
->dst
);
857 rt
= nrt
? : net
->ipv6
.ip6_null_entry
;
861 err
= ip6_ins_rt(nrt
);
870 * Race condition! In the gap, when table->tb6_lock was
871 * released someone could insert this route. Relookup.
873 dst_release(&rt
->dst
);
882 read_unlock_bh(&table
->tb6_lock
);
884 rt
->dst
.lastuse
= jiffies
;
890 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
891 struct flowi6
*fl6
, int flags
)
893 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
896 static struct dst_entry
*ip6_route_input_lookup(struct net
*net
,
897 struct net_device
*dev
,
898 struct flowi6
*fl6
, int flags
)
900 if (rt6_need_strict(&fl6
->daddr
) && dev
->type
!= ARPHRD_PIMREG
)
901 flags
|= RT6_LOOKUP_F_IFACE
;
903 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_input
);
906 void ip6_route_input(struct sk_buff
*skb
)
908 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
909 struct net
*net
= dev_net(skb
->dev
);
910 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
911 struct flowi6 fl6
= {
912 .flowi6_iif
= skb
->dev
->ifindex
,
915 .flowlabel
= (* (__be32
*) iph
) & IPV6_FLOWINFO_MASK
,
916 .flowi6_mark
= skb
->mark
,
917 .flowi6_proto
= iph
->nexthdr
,
920 skb_dst_set(skb
, ip6_route_input_lookup(net
, skb
->dev
, &fl6
, flags
));
923 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
924 struct flowi6
*fl6
, int flags
)
926 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
929 struct dst_entry
* ip6_route_output(struct net
*net
, const struct sock
*sk
,
934 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
))
935 flags
|= RT6_LOOKUP_F_IFACE
;
937 if (!ipv6_addr_any(&fl6
->saddr
))
938 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
940 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
942 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
945 EXPORT_SYMBOL(ip6_route_output
);
947 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
949 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
950 struct dst_entry
*new = NULL
;
952 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, 0, 0);
954 memset(&rt
->rt6i_table
, 0, sizeof(*rt
) - sizeof(struct dst_entry
));
959 new->input
= dst_discard
;
960 new->output
= dst_discard
;
962 if (dst_metrics_read_only(&ort
->dst
))
963 new->_metrics
= ort
->dst
._metrics
;
965 dst_copy_metrics(new, &ort
->dst
);
966 rt
->rt6i_idev
= ort
->rt6i_idev
;
968 in6_dev_hold(rt
->rt6i_idev
);
970 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
971 rt
->rt6i_flags
= ort
->rt6i_flags
;
972 rt6_clean_expires(rt
);
975 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
976 #ifdef CONFIG_IPV6_SUBTREES
977 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
983 dst_release(dst_orig
);
984 return new ? new : ERR_PTR(-ENOMEM
);
988 * Destination cache support functions
991 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
995 rt
= (struct rt6_info
*) dst
;
997 if (rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
)) {
998 if (rt
->rt6i_peer_genid
!= rt6_peer_genid()) {
1000 rt6_bind_peer(rt
, 0);
1001 rt
->rt6i_peer_genid
= rt6_peer_genid();
1008 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
1010 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1013 if (rt
->rt6i_flags
& RTF_CACHE
) {
1014 if (rt6_check_expired(rt
)) {
1026 static void ip6_link_failure(struct sk_buff
*skb
)
1028 struct rt6_info
*rt
;
1030 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
1032 rt
= (struct rt6_info
*) skb_dst(skb
);
1034 if (rt
->rt6i_flags
& RTF_CACHE
)
1035 rt6_update_expires(rt
, 0);
1036 else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
1037 rt
->rt6i_node
->fn_sernum
= -1;
1041 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
1043 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1045 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
1046 rt6
->rt6i_flags
|= RTF_MODIFIED
;
1047 if (mtu
< IPV6_MIN_MTU
) {
1048 u32 features
= dst_metric(dst
, RTAX_FEATURES
);
1050 features
|= RTAX_FEATURE_ALLFRAG
;
1051 dst_metric_set(dst
, RTAX_FEATURES
, features
);
1053 dst_metric_set(dst
, RTAX_MTU
, mtu
);
1057 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1059 struct net_device
*dev
= dst
->dev
;
1060 unsigned int mtu
= dst_mtu(dst
);
1061 struct net
*net
= dev_net(dev
);
1063 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1065 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1066 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1069 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071 * IPV6_MAXPLEN is also valid and means: "any MSS,
1072 * rely only on pmtu discovery"
1074 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1079 static unsigned int ip6_mtu(const struct dst_entry
*dst
)
1081 struct inet6_dev
*idev
;
1082 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
1090 idev
= __in6_dev_get(dst
->dev
);
1092 mtu
= idev
->cnf
.mtu6
;
1098 static struct dst_entry
*icmp6_dst_gc_list
;
1099 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1101 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1102 struct neighbour
*neigh
,
1105 struct dst_entry
*dst
;
1106 struct rt6_info
*rt
;
1107 struct inet6_dev
*idev
= in6_dev_get(dev
);
1108 struct net
*net
= dev_net(dev
);
1110 if (unlikely(!idev
))
1111 return ERR_PTR(-ENODEV
);
1113 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
, 0);
1114 if (unlikely(!rt
)) {
1116 dst
= ERR_PTR(-ENOMEM
);
1123 neigh
= ip6_neigh_lookup(&rt
->dst
, &fl6
->daddr
);
1124 if (IS_ERR(neigh
)) {
1127 return ERR_CAST(neigh
);
1131 rt
->dst
.flags
|= DST_HOST
;
1132 rt
->dst
.output
= ip6_output
;
1133 dst_set_neighbour(&rt
->dst
, neigh
);
1134 atomic_set(&rt
->dst
.__refcnt
, 1);
1135 rt
->rt6i_dst
.addr
= fl6
->daddr
;
1136 rt
->rt6i_dst
.plen
= 128;
1137 rt
->rt6i_idev
= idev
;
1138 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 255);
1140 spin_lock_bh(&icmp6_dst_lock
);
1141 rt
->dst
.next
= icmp6_dst_gc_list
;
1142 icmp6_dst_gc_list
= &rt
->dst
;
1143 spin_unlock_bh(&icmp6_dst_lock
);
1145 fib6_force_start_gc(net
);
1147 dst
= xfrm_lookup(net
, &rt
->dst
, flowi6_to_flowi(fl6
), NULL
, 0);
1153 int icmp6_dst_gc(void)
1155 struct dst_entry
*dst
, **pprev
;
1158 spin_lock_bh(&icmp6_dst_lock
);
1159 pprev
= &icmp6_dst_gc_list
;
1161 while ((dst
= *pprev
) != NULL
) {
1162 if (!atomic_read(&dst
->__refcnt
)) {
1171 spin_unlock_bh(&icmp6_dst_lock
);
1176 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1179 struct dst_entry
*dst
, **pprev
;
1181 spin_lock_bh(&icmp6_dst_lock
);
1182 pprev
= &icmp6_dst_gc_list
;
1183 while ((dst
= *pprev
) != NULL
) {
1184 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1185 if (func(rt
, arg
)) {
1192 spin_unlock_bh(&icmp6_dst_lock
);
1195 static int ip6_dst_gc(struct dst_ops
*ops
)
1197 unsigned long now
= jiffies
;
1198 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1199 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1200 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1201 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1202 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1203 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1206 entries
= dst_entries_get_fast(ops
);
1207 if (time_after(rt_last_gc
+ rt_min_interval
, now
) &&
1208 entries
<= rt_max_size
)
1211 net
->ipv6
.ip6_rt_gc_expire
++;
1212 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
);
1213 net
->ipv6
.ip6_rt_last_gc
= now
;
1214 entries
= dst_entries_get_slow(ops
);
1215 if (entries
< ops
->gc_thresh
)
1216 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1218 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1219 return entries
> rt_max_size
;
1222 /* Clean host part of a prefix. Not necessary in radix tree,
1223 but results in cleaner routing tables.
1225 Remove it only when all the things will work!
1228 int ip6_dst_hoplimit(struct dst_entry
*dst
)
1230 int hoplimit
= dst_metric_raw(dst
, RTAX_HOPLIMIT
);
1231 if (hoplimit
== 0) {
1232 struct net_device
*dev
= dst
->dev
;
1233 struct inet6_dev
*idev
;
1236 idev
= __in6_dev_get(dev
);
1238 hoplimit
= idev
->cnf
.hop_limit
;
1240 hoplimit
= dev_net(dev
)->ipv6
.devconf_all
->hop_limit
;
1245 EXPORT_SYMBOL(ip6_dst_hoplimit
);
1251 int ip6_route_add(struct fib6_config
*cfg
)
1254 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1255 struct rt6_info
*rt
= NULL
;
1256 struct net_device
*dev
= NULL
;
1257 struct inet6_dev
*idev
= NULL
;
1258 struct fib6_table
*table
;
1261 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1263 #ifndef CONFIG_IPV6_SUBTREES
1264 if (cfg
->fc_src_len
)
1267 if (cfg
->fc_ifindex
) {
1269 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1272 idev
= in6_dev_get(dev
);
1277 if (cfg
->fc_metric
== 0)
1278 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1281 if (cfg
->fc_nlinfo
.nlh
&&
1282 !(cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
1283 table
= fib6_get_table(net
, cfg
->fc_table
);
1285 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1286 table
= fib6_new_table(net
, cfg
->fc_table
);
1289 table
= fib6_new_table(net
, cfg
->fc_table
);
1295 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, NULL
, DST_NOCOUNT
);
1302 rt
->dst
.obsolete
= -1;
1304 if (cfg
->fc_flags
& RTF_EXPIRES
)
1305 rt6_set_expires(rt
, jiffies
+
1306 clock_t_to_jiffies(cfg
->fc_expires
));
1308 rt6_clean_expires(rt
);
1310 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1311 cfg
->fc_protocol
= RTPROT_BOOT
;
1312 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1314 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1316 if (addr_type
& IPV6_ADDR_MULTICAST
)
1317 rt
->dst
.input
= ip6_mc_input
;
1318 else if (cfg
->fc_flags
& RTF_LOCAL
)
1319 rt
->dst
.input
= ip6_input
;
1321 rt
->dst
.input
= ip6_forward
;
1323 rt
->dst
.output
= ip6_output
;
1325 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1326 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1327 if (rt
->rt6i_dst
.plen
== 128)
1328 rt
->dst
.flags
|= DST_HOST
;
1330 if (!(rt
->dst
.flags
& DST_HOST
) && cfg
->fc_mx
) {
1331 u32
*metrics
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1336 dst_init_metrics(&rt
->dst
, metrics
, 0);
1338 #ifdef CONFIG_IPV6_SUBTREES
1339 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1340 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1343 rt
->rt6i_metric
= cfg
->fc_metric
;
1345 /* We cannot add true routes via loopback here,
1346 they would result in kernel looping; promote them to reject routes
1348 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1349 (dev
&& (dev
->flags
& IFF_LOOPBACK
) &&
1350 !(addr_type
& IPV6_ADDR_LOOPBACK
) &&
1351 !(cfg
->fc_flags
& RTF_LOCAL
))) {
1352 /* hold loopback dev/idev if we haven't done so. */
1353 if (dev
!= net
->loopback_dev
) {
1358 dev
= net
->loopback_dev
;
1360 idev
= in6_dev_get(dev
);
1366 rt
->dst
.output
= ip6_pkt_discard_out
;
1367 rt
->dst
.input
= ip6_pkt_discard
;
1368 rt
->dst
.error
= -ENETUNREACH
;
1369 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1373 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1374 const struct in6_addr
*gw_addr
;
1377 gw_addr
= &cfg
->fc_gateway
;
1378 rt
->rt6i_gateway
= *gw_addr
;
1379 gwa_type
= ipv6_addr_type(gw_addr
);
1381 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1382 struct rt6_info
*grt
;
1384 /* IPv6 strictly inhibits using not link-local
1385 addresses as nexthop address.
1386 Otherwise, router will not able to send redirects.
1387 It is very good, but in some (rare!) circumstances
1388 (SIT, PtP, NBMA NOARP links) it is handy to allow
1389 some exceptions. --ANK
1392 if (!(gwa_type
& IPV6_ADDR_UNICAST
))
1395 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1397 err
= -EHOSTUNREACH
;
1401 if (dev
!= grt
->dst
.dev
) {
1402 dst_release(&grt
->dst
);
1407 idev
= grt
->rt6i_idev
;
1409 in6_dev_hold(grt
->rt6i_idev
);
1411 if (!(grt
->rt6i_flags
& RTF_GATEWAY
))
1413 dst_release(&grt
->dst
);
1419 if (!dev
|| (dev
->flags
& IFF_LOOPBACK
))
1427 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
1428 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
1432 rt
->rt6i_prefsrc
.addr
= cfg
->fc_prefsrc
;
1433 rt
->rt6i_prefsrc
.plen
= 128;
1435 rt
->rt6i_prefsrc
.plen
= 0;
1437 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1438 err
= rt6_bind_neighbour(rt
, dev
);
1443 rt
->rt6i_flags
= cfg
->fc_flags
;
1450 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1451 int type
= nla_type(nla
);
1454 if (type
> RTAX_MAX
) {
1459 dst_metric_set(&rt
->dst
, type
, nla_get_u32(nla
));
1465 rt
->rt6i_idev
= idev
;
1466 rt
->rt6i_table
= table
;
1468 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
1470 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1482 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1485 struct fib6_table
*table
;
1486 struct net
*net
= dev_net(rt
->dst
.dev
);
1488 if (rt
== net
->ipv6
.ip6_null_entry
)
1491 table
= rt
->rt6i_table
;
1492 write_lock_bh(&table
->tb6_lock
);
1494 err
= fib6_del(rt
, info
);
1495 dst_release(&rt
->dst
);
1497 write_unlock_bh(&table
->tb6_lock
);
1502 int ip6_del_rt(struct rt6_info
*rt
)
1504 struct nl_info info
= {
1505 .nl_net
= dev_net(rt
->dst
.dev
),
1507 return __ip6_del_rt(rt
, &info
);
1510 static int ip6_route_del(struct fib6_config
*cfg
)
1512 struct fib6_table
*table
;
1513 struct fib6_node
*fn
;
1514 struct rt6_info
*rt
;
1517 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
1521 read_lock_bh(&table
->tb6_lock
);
1523 fn
= fib6_locate(&table
->tb6_root
,
1524 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1525 &cfg
->fc_src
, cfg
->fc_src_len
);
1528 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1529 if (cfg
->fc_ifindex
&&
1531 rt
->dst
.dev
->ifindex
!= cfg
->fc_ifindex
))
1533 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1534 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1536 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1539 read_unlock_bh(&table
->tb6_lock
);
1541 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1544 read_unlock_bh(&table
->tb6_lock
);
1552 struct ip6rd_flowi
{
1554 struct in6_addr gateway
;
1557 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1558 struct fib6_table
*table
,
1562 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1563 struct rt6_info
*rt
;
1564 struct fib6_node
*fn
;
1567 * Get the "current" route for this destination and
1568 * check if the redirect has come from approriate router.
1570 * RFC 2461 specifies that redirects should only be
1571 * accepted if they come from the nexthop to the target.
1572 * Due to the way the routes are chosen, this notion
1573 * is a bit fuzzy and one might need to check all possible
1577 read_lock_bh(&table
->tb6_lock
);
1578 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1580 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1582 * Current route is on-link; redirect is always invalid.
1584 * Seems, previous statement is not true. It could
1585 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586 * But then router serving it might decide, that we should
1587 * know truth 8)8) --ANK (980726).
1589 if (rt6_check_expired(rt
))
1591 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1593 if (fl6
->flowi6_oif
!= rt
->dst
.dev
->ifindex
)
1595 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1601 rt
= net
->ipv6
.ip6_null_entry
;
1602 BACKTRACK(net
, &fl6
->saddr
);
1606 read_unlock_bh(&table
->tb6_lock
);
1611 static struct rt6_info
*ip6_route_redirect(const struct in6_addr
*dest
,
1612 const struct in6_addr
*src
,
1613 const struct in6_addr
*gateway
,
1614 struct net_device
*dev
)
1616 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1617 struct net
*net
= dev_net(dev
);
1618 struct ip6rd_flowi rdfl
= {
1620 .flowi6_oif
= dev
->ifindex
,
1626 rdfl
.gateway
= *gateway
;
1628 if (rt6_need_strict(dest
))
1629 flags
|= RT6_LOOKUP_F_IFACE
;
1631 return (struct rt6_info
*)fib6_rule_lookup(net
, &rdfl
.fl6
,
1632 flags
, __ip6_route_redirect
);
1635 void rt6_redirect(const struct in6_addr
*dest
, const struct in6_addr
*src
,
1636 const struct in6_addr
*saddr
,
1637 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1639 struct rt6_info
*rt
, *nrt
= NULL
;
1640 struct netevent_redirect netevent
;
1641 struct net
*net
= dev_net(neigh
->dev
);
1643 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1645 if (rt
== net
->ipv6
.ip6_null_entry
) {
1646 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1651 * We have finally decided to accept it.
1654 neigh_update(neigh
, lladdr
, NUD_STALE
,
1655 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1656 NEIGH_UPDATE_F_OVERRIDE
|
1657 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1658 NEIGH_UPDATE_F_ISROUTER
))
1662 * Redirect received -> path was valid.
1663 * Look, redirects are sent only in response to data packets,
1664 * so that this nexthop apparently is reachable. --ANK
1666 dst_confirm(&rt
->dst
);
1668 /* Duplicate redirect: silently ignore. */
1669 if (neigh
== dst_get_neighbour_noref_raw(&rt
->dst
))
1672 nrt
= ip6_rt_copy(rt
, dest
);
1676 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1678 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1680 nrt
->rt6i_gateway
= *(struct in6_addr
*)neigh
->primary_key
;
1681 dst_set_neighbour(&nrt
->dst
, neigh_clone(neigh
));
1683 if (ip6_ins_rt(nrt
))
1686 netevent
.old
= &rt
->dst
;
1687 netevent
.new = &nrt
->dst
;
1688 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1690 if (rt
->rt6i_flags
& RTF_CACHE
) {
1696 dst_release(&rt
->dst
);
1700 * Handle ICMP "packet too big" messages
1701 * i.e. Path MTU discovery
1704 static void rt6_do_pmtu_disc(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1705 struct net
*net
, u32 pmtu
, int ifindex
)
1707 struct rt6_info
*rt
, *nrt
;
1710 rt
= rt6_lookup(net
, daddr
, saddr
, ifindex
, 0);
1714 if (rt6_check_expired(rt
)) {
1719 if (pmtu
>= dst_mtu(&rt
->dst
))
1722 if (pmtu
< IPV6_MIN_MTU
) {
1724 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1725 * MTU (1280) and a fragment header should always be included
1726 * after a node receiving Too Big message reporting PMTU is
1727 * less than the IPv6 Minimum Link MTU.
1729 pmtu
= IPV6_MIN_MTU
;
1733 /* New mtu received -> path was valid.
1734 They are sent only in response to data packets,
1735 so that this nexthop apparently is reachable. --ANK
1737 dst_confirm(&rt
->dst
);
1739 /* Host route. If it is static, it would be better
1740 not to override it, but add new one, so that
1741 when cache entry will expire old pmtu
1742 would return automatically.
1744 if (rt
->rt6i_flags
& RTF_CACHE
) {
1745 dst_metric_set(&rt
->dst
, RTAX_MTU
, pmtu
);
1747 u32 features
= dst_metric(&rt
->dst
, RTAX_FEATURES
);
1748 features
|= RTAX_FEATURE_ALLFRAG
;
1749 dst_metric_set(&rt
->dst
, RTAX_FEATURES
, features
);
1751 rt6_update_expires(rt
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1752 rt
->rt6i_flags
|= RTF_MODIFIED
;
1757 Two cases are possible:
1758 1. It is connected route. Action: COW
1759 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1761 if (!dst_get_neighbour_noref_raw(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1762 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1764 nrt
= rt6_alloc_clone(rt
, daddr
);
1767 dst_metric_set(&nrt
->dst
, RTAX_MTU
, pmtu
);
1769 u32 features
= dst_metric(&nrt
->dst
, RTAX_FEATURES
);
1770 features
|= RTAX_FEATURE_ALLFRAG
;
1771 dst_metric_set(&nrt
->dst
, RTAX_FEATURES
, features
);
1774 /* According to RFC 1981, detecting PMTU increase shouldn't be
1775 * happened within 5 mins, the recommended timer is 10 mins.
1776 * Here this route expiration time is set to ip6_rt_mtu_expires
1777 * which is 10 mins. After 10 mins the decreased pmtu is expired
1778 * and detecting PMTU increase will be automatically happened.
1780 rt6_update_expires(nrt
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1781 nrt
->rt6i_flags
|= RTF_DYNAMIC
;
1785 dst_release(&rt
->dst
);
1788 void rt6_pmtu_discovery(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1789 struct net_device
*dev
, u32 pmtu
)
1791 struct net
*net
= dev_net(dev
);
1794 * RFC 1981 states that a node "MUST reduce the size of the packets it
1795 * is sending along the path" that caused the Packet Too Big message.
1796 * Since it's not possible in the general case to determine which
1797 * interface was used to send the original packet, we update the MTU
1798 * on the interface that will be used to send future packets. We also
1799 * update the MTU on the interface that received the Packet Too Big in
1800 * case the original packet was forced out that interface with
1801 * SO_BINDTODEVICE or similar. This is the next best thing to the
1802 * correct behaviour, which would be to update the MTU on all
1805 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, 0);
1806 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, dev
->ifindex
);
1810 * Misc support functions
1813 static struct rt6_info
*ip6_rt_copy(struct rt6_info
*ort
,
1814 const struct in6_addr
*dest
)
1816 struct net
*net
= dev_net(ort
->dst
.dev
);
1817 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
1821 rt
->dst
.input
= ort
->dst
.input
;
1822 rt
->dst
.output
= ort
->dst
.output
;
1823 rt
->dst
.flags
|= DST_HOST
;
1825 rt
->rt6i_dst
.addr
= *dest
;
1826 rt
->rt6i_dst
.plen
= 128;
1827 dst_copy_metrics(&rt
->dst
, &ort
->dst
);
1828 rt
->dst
.error
= ort
->dst
.error
;
1829 rt
->rt6i_idev
= ort
->rt6i_idev
;
1831 in6_dev_hold(rt
->rt6i_idev
);
1832 rt
->dst
.lastuse
= jiffies
;
1834 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1835 rt
->rt6i_flags
= ort
->rt6i_flags
;
1836 if ((ort
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) ==
1837 (RTF_DEFAULT
| RTF_ADDRCONF
))
1838 rt6_set_from(rt
, ort
);
1840 rt6_clean_expires(rt
);
1841 rt
->rt6i_metric
= 0;
1843 #ifdef CONFIG_IPV6_SUBTREES
1844 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1846 memcpy(&rt
->rt6i_prefsrc
, &ort
->rt6i_prefsrc
, sizeof(struct rt6key
));
1847 rt
->rt6i_table
= ort
->rt6i_table
;
1852 #ifdef CONFIG_IPV6_ROUTE_INFO
1853 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
1854 const struct in6_addr
*prefix
, int prefixlen
,
1855 const struct in6_addr
*gwaddr
, int ifindex
)
1857 struct fib6_node
*fn
;
1858 struct rt6_info
*rt
= NULL
;
1859 struct fib6_table
*table
;
1861 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
1865 write_lock_bh(&table
->tb6_lock
);
1866 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1870 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1871 if (rt
->dst
.dev
->ifindex
!= ifindex
)
1873 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1875 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1881 write_unlock_bh(&table
->tb6_lock
);
1885 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
1886 const struct in6_addr
*prefix
, int prefixlen
,
1887 const struct in6_addr
*gwaddr
, int ifindex
,
1890 struct fib6_config cfg
= {
1891 .fc_table
= RT6_TABLE_INFO
,
1892 .fc_metric
= IP6_RT_PRIO_USER
,
1893 .fc_ifindex
= ifindex
,
1894 .fc_dst_len
= prefixlen
,
1895 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1896 RTF_UP
| RTF_PREF(pref
),
1898 .fc_nlinfo
.nlh
= NULL
,
1899 .fc_nlinfo
.nl_net
= net
,
1902 cfg
.fc_dst
= *prefix
;
1903 cfg
.fc_gateway
= *gwaddr
;
1905 /* We should treat it as a default route if prefix length is 0. */
1907 cfg
.fc_flags
|= RTF_DEFAULT
;
1909 ip6_route_add(&cfg
);
1911 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
1915 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
1917 struct rt6_info
*rt
;
1918 struct fib6_table
*table
;
1920 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
1924 write_lock_bh(&table
->tb6_lock
);
1925 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->dst
.rt6_next
) {
1926 if (dev
== rt
->dst
.dev
&&
1927 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1928 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1933 write_unlock_bh(&table
->tb6_lock
);
1937 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
1938 struct net_device
*dev
,
1941 struct fib6_config cfg
= {
1942 .fc_table
= RT6_TABLE_DFLT
,
1943 .fc_metric
= IP6_RT_PRIO_USER
,
1944 .fc_ifindex
= dev
->ifindex
,
1945 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1946 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1948 .fc_nlinfo
.nlh
= NULL
,
1949 .fc_nlinfo
.nl_net
= dev_net(dev
),
1952 cfg
.fc_gateway
= *gwaddr
;
1954 ip6_route_add(&cfg
);
1956 return rt6_get_dflt_router(gwaddr
, dev
);
1959 void rt6_purge_dflt_routers(struct net
*net
)
1961 struct rt6_info
*rt
;
1962 struct fib6_table
*table
;
1964 /* NOTE: Keep consistent with rt6_get_dflt_router */
1965 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
1970 read_lock_bh(&table
->tb6_lock
);
1971 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1972 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1974 read_unlock_bh(&table
->tb6_lock
);
1979 read_unlock_bh(&table
->tb6_lock
);
1982 static void rtmsg_to_fib6_config(struct net
*net
,
1983 struct in6_rtmsg
*rtmsg
,
1984 struct fib6_config
*cfg
)
1986 memset(cfg
, 0, sizeof(*cfg
));
1988 cfg
->fc_table
= RT6_TABLE_MAIN
;
1989 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1990 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1991 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1992 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1993 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1994 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1996 cfg
->fc_nlinfo
.nl_net
= net
;
1998 cfg
->fc_dst
= rtmsg
->rtmsg_dst
;
1999 cfg
->fc_src
= rtmsg
->rtmsg_src
;
2000 cfg
->fc_gateway
= rtmsg
->rtmsg_gateway
;
2003 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
2005 struct fib6_config cfg
;
2006 struct in6_rtmsg rtmsg
;
2010 case SIOCADDRT
: /* Add a route */
2011 case SIOCDELRT
: /* Delete a route */
2012 if (!capable(CAP_NET_ADMIN
))
2014 err
= copy_from_user(&rtmsg
, arg
,
2015 sizeof(struct in6_rtmsg
));
2019 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
2024 err
= ip6_route_add(&cfg
);
2027 err
= ip6_route_del(&cfg
);
2041 * Drop the packet on the floor
2044 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
2047 struct dst_entry
*dst
= skb_dst(skb
);
2048 switch (ipstats_mib_noroutes
) {
2049 case IPSTATS_MIB_INNOROUTES
:
2050 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
2051 if (type
== IPV6_ADDR_ANY
) {
2052 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2053 IPSTATS_MIB_INADDRERRORS
);
2057 case IPSTATS_MIB_OUTNOROUTES
:
2058 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2059 ipstats_mib_noroutes
);
2062 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2067 static int ip6_pkt_discard(struct sk_buff
*skb
)
2069 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2072 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
2074 skb
->dev
= skb_dst(skb
)->dev
;
2075 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2078 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2080 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2082 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2085 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
2087 skb
->dev
= skb_dst(skb
)->dev
;
2088 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2094 * Allocate a dst for local (unicast / anycast) address.
2097 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2098 const struct in6_addr
*addr
,
2101 struct net
*net
= dev_net(idev
->dev
);
2102 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
2103 net
->loopback_dev
, 0);
2107 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2108 return ERR_PTR(-ENOMEM
);
2113 rt
->dst
.flags
|= DST_HOST
;
2114 rt
->dst
.input
= ip6_input
;
2115 rt
->dst
.output
= ip6_output
;
2116 rt
->rt6i_idev
= idev
;
2117 rt
->dst
.obsolete
= -1;
2119 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2121 rt
->rt6i_flags
|= RTF_ANYCAST
;
2123 rt
->rt6i_flags
|= RTF_LOCAL
;
2124 err
= rt6_bind_neighbour(rt
, rt
->dst
.dev
);
2127 return ERR_PTR(err
);
2130 rt
->rt6i_dst
.addr
= *addr
;
2131 rt
->rt6i_dst
.plen
= 128;
2132 rt
->rt6i_table
= fib6_get_table(net
, RT6_TABLE_LOCAL
);
2134 atomic_set(&rt
->dst
.__refcnt
, 1);
2139 int ip6_route_get_saddr(struct net
*net
,
2140 struct rt6_info
*rt
,
2141 const struct in6_addr
*daddr
,
2143 struct in6_addr
*saddr
)
2145 struct inet6_dev
*idev
= ip6_dst_idev((struct dst_entry
*)rt
);
2147 if (rt
->rt6i_prefsrc
.plen
)
2148 *saddr
= rt
->rt6i_prefsrc
.addr
;
2150 err
= ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2151 daddr
, prefs
, saddr
);
2155 /* remove deleted ip from prefsrc entries */
2156 struct arg_dev_net_ip
{
2157 struct net_device
*dev
;
2159 struct in6_addr
*addr
;
2162 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2164 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2165 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2166 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2168 if (((void *)rt
->dst
.dev
== dev
|| !dev
) &&
2169 rt
!= net
->ipv6
.ip6_null_entry
&&
2170 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2171 /* remove prefsrc entry */
2172 rt
->rt6i_prefsrc
.plen
= 0;
2177 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2179 struct net
*net
= dev_net(ifp
->idev
->dev
);
2180 struct arg_dev_net_ip adni
= {
2181 .dev
= ifp
->idev
->dev
,
2185 fib6_clean_all(net
, fib6_remove_prefsrc
, 0, &adni
);
2188 struct arg_dev_net
{
2189 struct net_device
*dev
;
2193 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2195 const struct arg_dev_net
*adn
= arg
;
2196 const struct net_device
*dev
= adn
->dev
;
2198 if ((rt
->dst
.dev
== dev
|| !dev
) &&
2199 rt
!= adn
->net
->ipv6
.ip6_null_entry
)
2205 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2207 struct arg_dev_net adn
= {
2212 fib6_clean_all(net
, fib6_ifdown
, 0, &adn
);
2213 icmp6_clean_all(fib6_ifdown
, &adn
);
2216 struct rt6_mtu_change_arg
{
2217 struct net_device
*dev
;
2221 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2223 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2224 struct inet6_dev
*idev
;
2226 /* In IPv6 pmtu discovery is not optional,
2227 so that RTAX_MTU lock cannot disable it.
2228 We still use this lock to block changes
2229 caused by addrconf/ndisc.
2232 idev
= __in6_dev_get(arg
->dev
);
2236 /* For administrative MTU increase, there is no way to discover
2237 IPv6 PMTU increase, so PMTU increase should be updated here.
2238 Since RFC 1981 doesn't include administrative MTU increase
2239 update PMTU increase is a MUST. (i.e. jumbo frame)
2242 If new MTU is less than route PMTU, this new MTU will be the
2243 lowest MTU in the path, update the route PMTU to reflect PMTU
2244 decreases; if new MTU is greater than route PMTU, and the
2245 old MTU is the lowest MTU in the path, update the route PMTU
2246 to reflect the increase. In this case if the other nodes' MTU
2247 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2250 if (rt
->dst
.dev
== arg
->dev
&&
2251 !dst_metric_locked(&rt
->dst
, RTAX_MTU
) &&
2252 (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2253 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2254 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
))) {
2255 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2260 void rt6_mtu_change(struct net_device
*dev
, unsigned int mtu
)
2262 struct rt6_mtu_change_arg arg
= {
2267 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, 0, &arg
);
2270 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2271 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2272 [RTA_OIF
] = { .type
= NLA_U32
},
2273 [RTA_IIF
] = { .type
= NLA_U32
},
2274 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2275 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2278 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2279 struct fib6_config
*cfg
)
2282 struct nlattr
*tb
[RTA_MAX
+1];
2285 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2290 rtm
= nlmsg_data(nlh
);
2291 memset(cfg
, 0, sizeof(*cfg
));
2293 cfg
->fc_table
= rtm
->rtm_table
;
2294 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2295 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2296 cfg
->fc_flags
= RTF_UP
;
2297 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2299 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2300 cfg
->fc_flags
|= RTF_REJECT
;
2302 if (rtm
->rtm_type
== RTN_LOCAL
)
2303 cfg
->fc_flags
|= RTF_LOCAL
;
2305 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2306 cfg
->fc_nlinfo
.nlh
= nlh
;
2307 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2309 if (tb
[RTA_GATEWAY
]) {
2310 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2311 cfg
->fc_flags
|= RTF_GATEWAY
;
2315 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2317 if (nla_len(tb
[RTA_DST
]) < plen
)
2320 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2324 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2326 if (nla_len(tb
[RTA_SRC
]) < plen
)
2329 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2332 if (tb
[RTA_PREFSRC
])
2333 nla_memcpy(&cfg
->fc_prefsrc
, tb
[RTA_PREFSRC
], 16);
2336 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2338 if (tb
[RTA_PRIORITY
])
2339 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2341 if (tb
[RTA_METRICS
]) {
2342 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2343 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2347 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2354 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2356 struct fib6_config cfg
;
2359 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2363 return ip6_route_del(&cfg
);
2366 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2368 struct fib6_config cfg
;
2371 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2375 return ip6_route_add(&cfg
);
2378 static inline size_t rt6_nlmsg_size(void)
2380 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2381 + nla_total_size(16) /* RTA_SRC */
2382 + nla_total_size(16) /* RTA_DST */
2383 + nla_total_size(16) /* RTA_GATEWAY */
2384 + nla_total_size(16) /* RTA_PREFSRC */
2385 + nla_total_size(4) /* RTA_TABLE */
2386 + nla_total_size(4) /* RTA_IIF */
2387 + nla_total_size(4) /* RTA_OIF */
2388 + nla_total_size(4) /* RTA_PRIORITY */
2389 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2390 + nla_total_size(sizeof(struct rta_cacheinfo
));
2393 static int rt6_fill_node(struct net
*net
,
2394 struct sk_buff
*skb
, struct rt6_info
*rt
,
2395 struct in6_addr
*dst
, struct in6_addr
*src
,
2396 int iif
, int type
, u32 pid
, u32 seq
,
2397 int prefix
, int nowait
, unsigned int flags
)
2399 const struct inet_peer
*peer
;
2401 struct nlmsghdr
*nlh
;
2404 struct neighbour
*n
;
2407 if (prefix
) { /* user wants prefix routes only */
2408 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2409 /* success since this is not a prefix route */
2414 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2418 rtm
= nlmsg_data(nlh
);
2419 rtm
->rtm_family
= AF_INET6
;
2420 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2421 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2424 table
= rt
->rt6i_table
->tb6_id
;
2426 table
= RT6_TABLE_UNSPEC
;
2427 rtm
->rtm_table
= table
;
2428 if (nla_put_u32(skb
, RTA_TABLE
, table
))
2429 goto nla_put_failure
;
2430 if (rt
->rt6i_flags
& RTF_REJECT
)
2431 rtm
->rtm_type
= RTN_UNREACHABLE
;
2432 else if (rt
->rt6i_flags
& RTF_LOCAL
)
2433 rtm
->rtm_type
= RTN_LOCAL
;
2434 else if (rt
->dst
.dev
&& (rt
->dst
.dev
->flags
& IFF_LOOPBACK
))
2435 rtm
->rtm_type
= RTN_LOCAL
;
2437 rtm
->rtm_type
= RTN_UNICAST
;
2439 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2440 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2441 if (rt
->rt6i_flags
& RTF_DYNAMIC
)
2442 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2443 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2444 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2445 else if (rt
->rt6i_flags
& RTF_DEFAULT
)
2446 rtm
->rtm_protocol
= RTPROT_RA
;
2448 if (rt
->rt6i_flags
& RTF_CACHE
)
2449 rtm
->rtm_flags
|= RTM_F_CLONED
;
2452 if (nla_put(skb
, RTA_DST
, 16, dst
))
2453 goto nla_put_failure
;
2454 rtm
->rtm_dst_len
= 128;
2455 } else if (rtm
->rtm_dst_len
)
2456 if (nla_put(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
))
2457 goto nla_put_failure
;
2458 #ifdef CONFIG_IPV6_SUBTREES
2460 if (nla_put(skb
, RTA_SRC
, 16, src
))
2461 goto nla_put_failure
;
2462 rtm
->rtm_src_len
= 128;
2463 } else if (rtm
->rtm_src_len
&&
2464 nla_put(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
))
2465 goto nla_put_failure
;
2468 #ifdef CONFIG_IPV6_MROUTE
2469 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2470 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
2475 goto nla_put_failure
;
2477 if (err
== -EMSGSIZE
)
2478 goto nla_put_failure
;
2483 if (nla_put_u32(skb
, RTA_IIF
, iif
))
2484 goto nla_put_failure
;
2486 struct in6_addr saddr_buf
;
2487 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0 &&
2488 nla_put(skb
, RTA_PREFSRC
, 16, &saddr_buf
))
2489 goto nla_put_failure
;
2492 if (rt
->rt6i_prefsrc
.plen
) {
2493 struct in6_addr saddr_buf
;
2494 saddr_buf
= rt
->rt6i_prefsrc
.addr
;
2495 if (nla_put(skb
, RTA_PREFSRC
, 16, &saddr_buf
))
2496 goto nla_put_failure
;
2499 if (rtnetlink_put_metrics(skb
, dst_metrics_ptr(&rt
->dst
)) < 0)
2500 goto nla_put_failure
;
2503 n
= dst_get_neighbour_noref(&rt
->dst
);
2505 if (nla_put(skb
, RTA_GATEWAY
, 16, &n
->primary_key
) < 0) {
2507 goto nla_put_failure
;
2513 nla_put_u32(skb
, RTA_OIF
, rt
->dst
.dev
->ifindex
))
2514 goto nla_put_failure
;
2515 if (nla_put_u32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
))
2516 goto nla_put_failure
;
2517 if (!(rt
->rt6i_flags
& RTF_EXPIRES
))
2519 else if (rt
->dst
.expires
- jiffies
< INT_MAX
)
2520 expires
= rt
->dst
.expires
- jiffies
;
2524 peer
= rt
->rt6i_peer
;
2526 if (peer
&& peer
->tcp_ts_stamp
) {
2528 tsage
= get_seconds() - peer
->tcp_ts_stamp
;
2531 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, ts
, tsage
,
2532 expires
, rt
->dst
.error
) < 0)
2533 goto nla_put_failure
;
2535 return nlmsg_end(skb
, nlh
);
2538 nlmsg_cancel(skb
, nlh
);
2542 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2544 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2547 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2548 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2549 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2553 return rt6_fill_node(arg
->net
,
2554 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2555 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2556 prefix
, 0, NLM_F_MULTI
);
2559 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2561 struct net
*net
= sock_net(in_skb
->sk
);
2562 struct nlattr
*tb
[RTA_MAX
+1];
2563 struct rt6_info
*rt
;
2564 struct sk_buff
*skb
;
2567 int err
, iif
= 0, oif
= 0;
2569 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2574 memset(&fl6
, 0, sizeof(fl6
));
2577 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2580 fl6
.saddr
= *(struct in6_addr
*)nla_data(tb
[RTA_SRC
]);
2584 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2587 fl6
.daddr
= *(struct in6_addr
*)nla_data(tb
[RTA_DST
]);
2591 iif
= nla_get_u32(tb
[RTA_IIF
]);
2594 oif
= nla_get_u32(tb
[RTA_OIF
]);
2597 struct net_device
*dev
;
2600 dev
= __dev_get_by_index(net
, iif
);
2606 fl6
.flowi6_iif
= iif
;
2608 if (!ipv6_addr_any(&fl6
.saddr
))
2609 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
2611 rt
= (struct rt6_info
*)ip6_route_input_lookup(net
, dev
, &fl6
,
2614 fl6
.flowi6_oif
= oif
;
2616 rt
= (struct rt6_info
*)ip6_route_output(net
, NULL
, &fl6
);
2619 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2621 dst_release(&rt
->dst
);
2626 /* Reserve room for dummy headers, this skb can pass
2627 through good chunk of routing engine.
2629 skb_reset_mac_header(skb
);
2630 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2632 skb_dst_set(skb
, &rt
->dst
);
2634 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
2635 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2636 nlh
->nlmsg_seq
, 0, 0, 0);
2642 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).pid
);
2647 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2649 struct sk_buff
*skb
;
2650 struct net
*net
= info
->nl_net
;
2655 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
2657 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2661 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
2662 event
, info
->pid
, seq
, 0, 0, 0);
2664 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2665 WARN_ON(err
== -EMSGSIZE
);
2669 rtnl_notify(skb
, net
, info
->pid
, RTNLGRP_IPV6_ROUTE
,
2670 info
->nlh
, gfp_any());
2674 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
2677 static int ip6_route_dev_notify(struct notifier_block
*this,
2678 unsigned long event
, void *data
)
2680 struct net_device
*dev
= (struct net_device
*)data
;
2681 struct net
*net
= dev_net(dev
);
2683 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
2684 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
2685 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
2686 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
2688 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
2689 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
2690 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
2701 #ifdef CONFIG_PROC_FS
2712 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2714 struct seq_file
*m
= p_arg
;
2715 struct neighbour
*n
;
2717 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
2719 #ifdef CONFIG_IPV6_SUBTREES
2720 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
2722 seq_puts(m
, "00000000000000000000000000000000 00 ");
2725 n
= dst_get_neighbour_noref(&rt
->dst
);
2727 seq_printf(m
, "%pi6", n
->primary_key
);
2729 seq_puts(m
, "00000000000000000000000000000000");
2732 seq_printf(m
, " %08x %08x %08x %08x %8s\n",
2733 rt
->rt6i_metric
, atomic_read(&rt
->dst
.__refcnt
),
2734 rt
->dst
.__use
, rt
->rt6i_flags
,
2735 rt
->dst
.dev
? rt
->dst
.dev
->name
: "");
2739 static int ipv6_route_show(struct seq_file
*m
, void *v
)
2741 struct net
*net
= (struct net
*)m
->private;
2742 fib6_clean_all_ro(net
, rt6_info_route
, 0, m
);
2746 static int ipv6_route_open(struct inode
*inode
, struct file
*file
)
2748 return single_open_net(inode
, file
, ipv6_route_show
);
2751 static const struct file_operations ipv6_route_proc_fops
= {
2752 .owner
= THIS_MODULE
,
2753 .open
= ipv6_route_open
,
2755 .llseek
= seq_lseek
,
2756 .release
= single_release_net
,
2759 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2761 struct net
*net
= (struct net
*)seq
->private;
2762 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2763 net
->ipv6
.rt6_stats
->fib_nodes
,
2764 net
->ipv6
.rt6_stats
->fib_route_nodes
,
2765 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
2766 net
->ipv6
.rt6_stats
->fib_rt_entries
,
2767 net
->ipv6
.rt6_stats
->fib_rt_cache
,
2768 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
2769 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
2774 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2776 return single_open_net(inode
, file
, rt6_stats_seq_show
);
2779 static const struct file_operations rt6_stats_seq_fops
= {
2780 .owner
= THIS_MODULE
,
2781 .open
= rt6_stats_seq_open
,
2783 .llseek
= seq_lseek
,
2784 .release
= single_release_net
,
2786 #endif /* CONFIG_PROC_FS */
2788 #ifdef CONFIG_SYSCTL
2791 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
,
2792 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2799 net
= (struct net
*)ctl
->extra1
;
2800 delay
= net
->ipv6
.sysctl
.flush_delay
;
2801 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
2802 fib6_run_gc(delay
<= 0 ? ~0UL : (unsigned long)delay
, net
);
2806 ctl_table ipv6_route_table_template
[] = {
2808 .procname
= "flush",
2809 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
2810 .maxlen
= sizeof(int),
2812 .proc_handler
= ipv6_sysctl_rtcache_flush
2815 .procname
= "gc_thresh",
2816 .data
= &ip6_dst_ops_template
.gc_thresh
,
2817 .maxlen
= sizeof(int),
2819 .proc_handler
= proc_dointvec
,
2822 .procname
= "max_size",
2823 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
2824 .maxlen
= sizeof(int),
2826 .proc_handler
= proc_dointvec
,
2829 .procname
= "gc_min_interval",
2830 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2831 .maxlen
= sizeof(int),
2833 .proc_handler
= proc_dointvec_jiffies
,
2836 .procname
= "gc_timeout",
2837 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
2838 .maxlen
= sizeof(int),
2840 .proc_handler
= proc_dointvec_jiffies
,
2843 .procname
= "gc_interval",
2844 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
2845 .maxlen
= sizeof(int),
2847 .proc_handler
= proc_dointvec_jiffies
,
2850 .procname
= "gc_elasticity",
2851 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
2852 .maxlen
= sizeof(int),
2854 .proc_handler
= proc_dointvec
,
2857 .procname
= "mtu_expires",
2858 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
2859 .maxlen
= sizeof(int),
2861 .proc_handler
= proc_dointvec_jiffies
,
2864 .procname
= "min_adv_mss",
2865 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
2866 .maxlen
= sizeof(int),
2868 .proc_handler
= proc_dointvec
,
2871 .procname
= "gc_min_interval_ms",
2872 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2873 .maxlen
= sizeof(int),
2875 .proc_handler
= proc_dointvec_ms_jiffies
,
2880 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
2882 struct ctl_table
*table
;
2884 table
= kmemdup(ipv6_route_table_template
,
2885 sizeof(ipv6_route_table_template
),
2889 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
2890 table
[0].extra1
= net
;
2891 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
2892 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
2893 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2894 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
2895 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
2896 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
2897 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
2898 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
2899 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2906 static int __net_init
ip6_route_net_init(struct net
*net
)
2910 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
2911 sizeof(net
->ipv6
.ip6_dst_ops
));
2913 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
2914 goto out_ip6_dst_ops
;
2916 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
2917 sizeof(*net
->ipv6
.ip6_null_entry
),
2919 if (!net
->ipv6
.ip6_null_entry
)
2920 goto out_ip6_dst_entries
;
2921 net
->ipv6
.ip6_null_entry
->dst
.path
=
2922 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
2923 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2924 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
2925 ip6_template_metrics
, true);
2927 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2928 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
2929 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
2931 if (!net
->ipv6
.ip6_prohibit_entry
)
2932 goto out_ip6_null_entry
;
2933 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
2934 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
2935 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2936 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
2937 ip6_template_metrics
, true);
2939 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
2940 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
2942 if (!net
->ipv6
.ip6_blk_hole_entry
)
2943 goto out_ip6_prohibit_entry
;
2944 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
2945 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
2946 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2947 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
2948 ip6_template_metrics
, true);
2951 net
->ipv6
.sysctl
.flush_delay
= 0;
2952 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
2953 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
2954 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
2955 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
2956 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
2957 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
2958 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
2960 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
2966 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967 out_ip6_prohibit_entry
:
2968 kfree(net
->ipv6
.ip6_prohibit_entry
);
2970 kfree(net
->ipv6
.ip6_null_entry
);
2972 out_ip6_dst_entries
:
2973 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2978 static void __net_exit
ip6_route_net_exit(struct net
*net
)
2980 kfree(net
->ipv6
.ip6_null_entry
);
2981 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2982 kfree(net
->ipv6
.ip6_prohibit_entry
);
2983 kfree(net
->ipv6
.ip6_blk_hole_entry
);
2985 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2988 static int __net_init
ip6_route_net_init_late(struct net
*net
)
2990 #ifdef CONFIG_PROC_FS
2991 proc_net_fops_create(net
, "ipv6_route", 0, &ipv6_route_proc_fops
);
2992 proc_net_fops_create(net
, "rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2997 static void __net_exit
ip6_route_net_exit_late(struct net
*net
)
2999 #ifdef CONFIG_PROC_FS
3000 proc_net_remove(net
, "ipv6_route");
3001 proc_net_remove(net
, "rt6_stats");
3005 static struct pernet_operations ip6_route_net_ops
= {
3006 .init
= ip6_route_net_init
,
3007 .exit
= ip6_route_net_exit
,
3010 static struct pernet_operations ip6_route_net_late_ops
= {
3011 .init
= ip6_route_net_init_late
,
3012 .exit
= ip6_route_net_exit_late
,
3015 static struct notifier_block ip6_route_dev_notifier
= {
3016 .notifier_call
= ip6_route_dev_notify
,
3020 int __init
ip6_route_init(void)
3025 ip6_dst_ops_template
.kmem_cachep
=
3026 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
3027 SLAB_HWCACHE_ALIGN
, NULL
);
3028 if (!ip6_dst_ops_template
.kmem_cachep
)
3031 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
3033 goto out_kmem_cache
;
3035 ret
= register_pernet_subsys(&ip6_route_net_ops
);
3037 goto out_dst_entries
;
3039 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
3041 /* Registering of the loopback is done before this portion of code,
3042 * the loopback reference in rt6_info will not be taken, do it
3043 * manually for init_net */
3044 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
3045 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3046 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3047 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
3048 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3049 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
3050 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3054 goto out_register_subsys
;
3060 ret
= fib6_rules_init();
3064 ret
= register_pernet_subsys(&ip6_route_net_late_ops
);
3066 goto fib6_rules_init
;
3069 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
3070 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
3071 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
3072 goto out_register_late_subsys
;
3074 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
3076 goto out_register_late_subsys
;
3081 out_register_late_subsys
:
3082 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3084 fib6_rules_cleanup();
3089 out_register_subsys
:
3090 unregister_pernet_subsys(&ip6_route_net_ops
);
3092 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3094 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
3098 void ip6_route_cleanup(void)
3100 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
3101 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3102 fib6_rules_cleanup();
3105 unregister_pernet_subsys(&ip6_route_net_ops
);
3106 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3107 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);