net: Allow userns root to control ipv6
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void ip6_dst_destroy(struct dst_entry *);
75 static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
77 static int ip6_dst_gc(struct dst_ops *ops);
78
79 static int ip6_pkt_discard(struct sk_buff *skb);
80 static int ip6_pkt_discard_out(struct sk_buff *skb);
81 static void ip6_link_failure(struct sk_buff *skb);
82 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
91 unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
101 u32 *p = NULL;
102
103 if (!(rt->dst.flags & DST_HOST))
104 return NULL;
105
106 peer = rt6_get_peer_create(rt);
107 if (peer) {
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
110
111 p = peer->metrics;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
117
118 if (prev != old) {
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
121 p = NULL;
122 }
123 }
124 return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct sk_buff *skb,
129 const void *daddr)
130 {
131 struct in6_addr *p = &rt->rt6i_gateway;
132
133 if (!ipv6_addr_any(p))
134 return (const void *) p;
135 else if (skb)
136 return &ipv6_hdr(skb)->daddr;
137 return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct sk_buff *skb,
142 const void *daddr)
143 {
144 struct rt6_info *rt = (struct rt6_info *) dst;
145 struct neighbour *n;
146
147 daddr = choose_neigh_daddr(rt, skb, daddr);
148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149 if (n)
150 return n;
151 return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157 if (!n) {
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159 if (IS_ERR(n))
160 return PTR_ERR(n);
161 }
162 rt->n = n;
163
164 return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168 .family = AF_INET6,
169 .protocol = cpu_to_be16(ETH_P_IPV6),
170 .gc = ip6_dst_gc,
171 .gc_thresh = 1024,
172 .check = ip6_dst_check,
173 .default_advmss = ip6_default_advmss,
174 .mtu = ip6_mtu,
175 .cow_metrics = ipv6_cow_metrics,
176 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu,
181 .redirect = rt6_do_redirect,
182 .local_out = __ip6_local_out,
183 .neigh_lookup = ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190 return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199 struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204 unsigned long old)
205 {
206 return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210 .family = AF_INET6,
211 .protocol = cpu_to_be16(ETH_P_IPV6),
212 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check,
214 .mtu = ip6_blackhole_mtu,
215 .default_advmss = ip6_default_advmss,
216 .update_pmtu = ip6_rt_blackhole_update_pmtu,
217 .redirect = ip6_rt_blackhole_redirect,
218 .cow_metrics = ip6_rt_blackhole_cow_metrics,
219 .neigh_lookup = ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223 [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = DST_OBSOLETE_FORCE_CHK,
231 .error = -ENETUNREACH,
232 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out,
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
236 .rt6i_protocol = RTPROT_KERNEL,
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
250 .obsolete = DST_OBSOLETE_FORCE_CHK,
251 .error = -EACCES,
252 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out,
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
256 .rt6i_protocol = RTPROT_KERNEL,
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262 .dst = {
263 .__refcnt = ATOMIC_INIT(1),
264 .__use = 1,
265 .obsolete = DST_OBSOLETE_FORCE_CHK,
266 .error = -EINVAL,
267 .input = dst_discard,
268 .output = dst_discard,
269 },
270 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
271 .rt6i_protocol = RTPROT_KERNEL,
272 .rt6i_metric = ~(u32) 0,
273 .rt6i_ref = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280 struct net_device *dev,
281 int flags,
282 struct fib6_table *table)
283 {
284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285 0, DST_OBSOLETE_FORCE_CHK, flags);
286
287 if (rt) {
288 struct dst_entry *dst = &rt->dst;
289
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292 rt->rt6i_genid = rt_genid(net);
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
295 }
296 return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev;
303
304 if (rt->n)
305 neigh_release(rt->n);
306
307 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst);
309
310 if (idev) {
311 rt->rt6i_idev = NULL;
312 in6_dev_put(idev);
313 }
314
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316 dst_release(dst->from);
317
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
320 inet_putpeer(peer);
321 }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326 struct inet_peer_base *base;
327 struct inet_peer *peer;
328
329 base = inetpeer_base_ptr(rt->_rt6i_peer);
330 if (!base)
331 return;
332
333 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334 if (peer) {
335 if (!rt6_set_peer(rt, peer))
336 inet_putpeer(peer);
337 }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341 int how)
342 {
343 struct rt6_info *rt = (struct rt6_info *)dst;
344 struct inet6_dev *idev = rt->rt6i_idev;
345 struct net_device *loopback_dev =
346 dev_net(dev)->loopback_dev;
347
348 if (dev != loopback_dev) {
349 if (idev && idev->dev == dev) {
350 struct inet6_dev *loopback_idev =
351 in6_dev_get(loopback_dev);
352 if (loopback_idev) {
353 rt->rt6i_idev = loopback_idev;
354 in6_dev_put(idev);
355 }
356 }
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
360 dev_put(dev);
361 }
362 }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367 if (rt->rt6i_flags & RTF_EXPIRES) {
368 if (time_after(jiffies, rt->dst.expires))
369 return true;
370 } else if (rt->dst.from) {
371 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372 }
373 return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378 return ipv6_addr_type(daddr) &
379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
385 */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
388 {
389 unsigned int val = fl6->flowi6_proto;
390
391 val ^= (__force u32)fl6->daddr.s6_addr32[0];
392 val ^= (__force u32)fl6->daddr.s6_addr32[1];
393 val ^= (__force u32)fl6->daddr.s6_addr32[2];
394 val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396 val ^= (__force u32)fl6->saddr.s6_addr32[0];
397 val ^= (__force u32)fl6->saddr.s6_addr32[1];
398 val ^= (__force u32)fl6->saddr.s6_addr32[2];
399 val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401 /* Work only if this not encapsulated */
402 switch (fl6->flowi6_proto) {
403 case IPPROTO_UDP:
404 case IPPROTO_TCP:
405 case IPPROTO_SCTP:
406 val ^= (__force u16)fl6->fl6_sport;
407 val ^= (__force u16)fl6->fl6_dport;
408 break;
409
410 case IPPROTO_ICMPV6:
411 val ^= (__force u16)fl6->fl6_icmp_type;
412 val ^= (__force u16)fl6->fl6_icmp_code;
413 break;
414 }
415 /* RFC6438 recommands to use flowlabel */
416 val ^= (__force u32)fl6->flowlabel;
417
418 /* Perhaps, we need to tune, this function? */
419 val = val ^ (val >> 7) ^ (val >> 12);
420 return val % candidate_count;
421 }
422
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424 struct flowi6 *fl6)
425 {
426 struct rt6_info *sibling, *next_sibling;
427 int route_choosen;
428
429 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430 /* Don't change the route, if route_choosen == 0
431 * (siblings does not include ourself)
432 */
433 if (route_choosen)
434 list_for_each_entry_safe(sibling, next_sibling,
435 &match->rt6i_siblings, rt6i_siblings) {
436 route_choosen--;
437 if (route_choosen == 0) {
438 match = sibling;
439 break;
440 }
441 }
442 return match;
443 }
444
445 /*
446 * Route lookup. Any table->tb6_lock is implied.
447 */
448
449 static inline struct rt6_info *rt6_device_match(struct net *net,
450 struct rt6_info *rt,
451 const struct in6_addr *saddr,
452 int oif,
453 int flags)
454 {
455 struct rt6_info *local = NULL;
456 struct rt6_info *sprt;
457
458 if (!oif && ipv6_addr_any(saddr))
459 goto out;
460
461 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462 struct net_device *dev = sprt->dst.dev;
463
464 if (oif) {
465 if (dev->ifindex == oif)
466 return sprt;
467 if (dev->flags & IFF_LOOPBACK) {
468 if (!sprt->rt6i_idev ||
469 sprt->rt6i_idev->dev->ifindex != oif) {
470 if (flags & RT6_LOOKUP_F_IFACE && oif)
471 continue;
472 if (local && (!oif ||
473 local->rt6i_idev->dev->ifindex == oif))
474 continue;
475 }
476 local = sprt;
477 }
478 } else {
479 if (ipv6_chk_addr(net, saddr, dev,
480 flags & RT6_LOOKUP_F_IFACE))
481 return sprt;
482 }
483 }
484
485 if (oif) {
486 if (local)
487 return local;
488
489 if (flags & RT6_LOOKUP_F_IFACE)
490 return net->ipv6.ip6_null_entry;
491 }
492 out:
493 return rt;
494 }
495
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
498 {
499 struct neighbour *neigh;
500 /*
501 * Okay, this does not seem to be appropriate
502 * for now, however, we need to check if it
503 * is really so; aka Router Reachability Probing.
504 *
505 * Router Reachability Probe MUST be rate-limited
506 * to no more than one per minute.
507 */
508 neigh = rt ? rt->n : NULL;
509 if (!neigh || (neigh->nud_state & NUD_VALID))
510 return;
511 read_lock_bh(&neigh->lock);
512 if (!(neigh->nud_state & NUD_VALID) &&
513 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514 struct in6_addr mcaddr;
515 struct in6_addr *target;
516
517 neigh->updated = jiffies;
518 read_unlock_bh(&neigh->lock);
519
520 target = (struct in6_addr *)&neigh->primary_key;
521 addrconf_addr_solict_mult(target, &mcaddr);
522 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523 } else {
524 read_unlock_bh(&neigh->lock);
525 }
526 }
527 #else
528 static inline void rt6_probe(struct rt6_info *rt)
529 {
530 }
531 #endif
532
533 /*
534 * Default Router Selection (RFC 2461 6.3.6)
535 */
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537 {
538 struct net_device *dev = rt->dst.dev;
539 if (!oif || dev->ifindex == oif)
540 return 2;
541 if ((dev->flags & IFF_LOOPBACK) &&
542 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543 return 1;
544 return 0;
545 }
546
547 static inline int rt6_check_neigh(struct rt6_info *rt)
548 {
549 struct neighbour *neigh;
550 int m;
551
552 neigh = rt->n;
553 if (rt->rt6i_flags & RTF_NONEXTHOP ||
554 !(rt->rt6i_flags & RTF_GATEWAY))
555 m = 1;
556 else if (neigh) {
557 read_lock_bh(&neigh->lock);
558 if (neigh->nud_state & NUD_VALID)
559 m = 2;
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561 else if (neigh->nud_state & NUD_FAILED)
562 m = 0;
563 #endif
564 else
565 m = 1;
566 read_unlock_bh(&neigh->lock);
567 } else
568 m = 0;
569 return m;
570 }
571
572 static int rt6_score_route(struct rt6_info *rt, int oif,
573 int strict)
574 {
575 int m, n;
576
577 m = rt6_check_dev(rt, oif);
578 if (!m && (strict & RT6_LOOKUP_F_IFACE))
579 return -1;
580 #ifdef CONFIG_IPV6_ROUTER_PREF
581 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
582 #endif
583 n = rt6_check_neigh(rt);
584 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
585 return -1;
586 return m;
587 }
588
589 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
590 int *mpri, struct rt6_info *match)
591 {
592 int m;
593
594 if (rt6_check_expired(rt))
595 goto out;
596
597 m = rt6_score_route(rt, oif, strict);
598 if (m < 0)
599 goto out;
600
601 if (m > *mpri) {
602 if (strict & RT6_LOOKUP_F_REACHABLE)
603 rt6_probe(match);
604 *mpri = m;
605 match = rt;
606 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
607 rt6_probe(rt);
608 }
609
610 out:
611 return match;
612 }
613
614 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
615 struct rt6_info *rr_head,
616 u32 metric, int oif, int strict)
617 {
618 struct rt6_info *rt, *match;
619 int mpri = -1;
620
621 match = NULL;
622 for (rt = rr_head; rt && rt->rt6i_metric == metric;
623 rt = rt->dst.rt6_next)
624 match = find_match(rt, oif, strict, &mpri, match);
625 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
626 rt = rt->dst.rt6_next)
627 match = find_match(rt, oif, strict, &mpri, match);
628
629 return match;
630 }
631
632 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
633 {
634 struct rt6_info *match, *rt0;
635 struct net *net;
636
637 rt0 = fn->rr_ptr;
638 if (!rt0)
639 fn->rr_ptr = rt0 = fn->leaf;
640
641 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
642
643 if (!match &&
644 (strict & RT6_LOOKUP_F_REACHABLE)) {
645 struct rt6_info *next = rt0->dst.rt6_next;
646
647 /* no entries matched; do round-robin */
648 if (!next || next->rt6i_metric != rt0->rt6i_metric)
649 next = fn->leaf;
650
651 if (next != rt0)
652 fn->rr_ptr = next;
653 }
654
655 net = dev_net(rt0->dst.dev);
656 return match ? match : net->ipv6.ip6_null_entry;
657 }
658
659 #ifdef CONFIG_IPV6_ROUTE_INFO
660 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
661 const struct in6_addr *gwaddr)
662 {
663 struct net *net = dev_net(dev);
664 struct route_info *rinfo = (struct route_info *) opt;
665 struct in6_addr prefix_buf, *prefix;
666 unsigned int pref;
667 unsigned long lifetime;
668 struct rt6_info *rt;
669
670 if (len < sizeof(struct route_info)) {
671 return -EINVAL;
672 }
673
674 /* Sanity check for prefix_len and length */
675 if (rinfo->length > 3) {
676 return -EINVAL;
677 } else if (rinfo->prefix_len > 128) {
678 return -EINVAL;
679 } else if (rinfo->prefix_len > 64) {
680 if (rinfo->length < 2) {
681 return -EINVAL;
682 }
683 } else if (rinfo->prefix_len > 0) {
684 if (rinfo->length < 1) {
685 return -EINVAL;
686 }
687 }
688
689 pref = rinfo->route_pref;
690 if (pref == ICMPV6_ROUTER_PREF_INVALID)
691 return -EINVAL;
692
693 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
694
695 if (rinfo->length == 3)
696 prefix = (struct in6_addr *)rinfo->prefix;
697 else {
698 /* this function is safe */
699 ipv6_addr_prefix(&prefix_buf,
700 (struct in6_addr *)rinfo->prefix,
701 rinfo->prefix_len);
702 prefix = &prefix_buf;
703 }
704
705 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
706 dev->ifindex);
707
708 if (rt && !lifetime) {
709 ip6_del_rt(rt);
710 rt = NULL;
711 }
712
713 if (!rt && lifetime)
714 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
715 pref);
716 else if (rt)
717 rt->rt6i_flags = RTF_ROUTEINFO |
718 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
719
720 if (rt) {
721 if (!addrconf_finite_timeout(lifetime))
722 rt6_clean_expires(rt);
723 else
724 rt6_set_expires(rt, jiffies + HZ * lifetime);
725
726 ip6_rt_put(rt);
727 }
728 return 0;
729 }
730 #endif
731
732 #define BACKTRACK(__net, saddr) \
733 do { \
734 if (rt == __net->ipv6.ip6_null_entry) { \
735 struct fib6_node *pn; \
736 while (1) { \
737 if (fn->fn_flags & RTN_TL_ROOT) \
738 goto out; \
739 pn = fn->parent; \
740 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
741 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
742 else \
743 fn = pn; \
744 if (fn->fn_flags & RTN_RTINFO) \
745 goto restart; \
746 } \
747 } \
748 } while (0)
749
750 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
751 struct fib6_table *table,
752 struct flowi6 *fl6, int flags)
753 {
754 struct fib6_node *fn;
755 struct rt6_info *rt;
756
757 read_lock_bh(&table->tb6_lock);
758 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
759 restart:
760 rt = fn->leaf;
761 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763 rt = rt6_multipath_select(rt, fl6);
764 BACKTRACK(net, &fl6->saddr);
765 out:
766 dst_use(&rt->dst, jiffies);
767 read_unlock_bh(&table->tb6_lock);
768 return rt;
769
770 }
771
772 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
773 int flags)
774 {
775 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
776 }
777 EXPORT_SYMBOL_GPL(ip6_route_lookup);
778
779 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
780 const struct in6_addr *saddr, int oif, int strict)
781 {
782 struct flowi6 fl6 = {
783 .flowi6_oif = oif,
784 .daddr = *daddr,
785 };
786 struct dst_entry *dst;
787 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
788
789 if (saddr) {
790 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
791 flags |= RT6_LOOKUP_F_HAS_SADDR;
792 }
793
794 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
795 if (dst->error == 0)
796 return (struct rt6_info *) dst;
797
798 dst_release(dst);
799
800 return NULL;
801 }
802
803 EXPORT_SYMBOL(rt6_lookup);
804
805 /* ip6_ins_rt is called with FREE table->tb6_lock.
806 It takes new route entry, the addition fails by any reason the
807 route is freed. In any case, if caller does not hold it, it may
808 be destroyed.
809 */
810
811 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
812 {
813 int err;
814 struct fib6_table *table;
815
816 table = rt->rt6i_table;
817 write_lock_bh(&table->tb6_lock);
818 err = fib6_add(&table->tb6_root, rt, info);
819 write_unlock_bh(&table->tb6_lock);
820
821 return err;
822 }
823
824 int ip6_ins_rt(struct rt6_info *rt)
825 {
826 struct nl_info info = {
827 .nl_net = dev_net(rt->dst.dev),
828 };
829 return __ip6_ins_rt(rt, &info);
830 }
831
832 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
833 const struct in6_addr *daddr,
834 const struct in6_addr *saddr)
835 {
836 struct rt6_info *rt;
837
838 /*
839 * Clone the route.
840 */
841
842 rt = ip6_rt_copy(ort, daddr);
843
844 if (rt) {
845 int attempts = !in_softirq();
846
847 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
848 if (ort->rt6i_dst.plen != 128 &&
849 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
850 rt->rt6i_flags |= RTF_ANYCAST;
851 rt->rt6i_gateway = *daddr;
852 }
853
854 rt->rt6i_flags |= RTF_CACHE;
855
856 #ifdef CONFIG_IPV6_SUBTREES
857 if (rt->rt6i_src.plen && saddr) {
858 rt->rt6i_src.addr = *saddr;
859 rt->rt6i_src.plen = 128;
860 }
861 #endif
862
863 retry:
864 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
865 struct net *net = dev_net(rt->dst.dev);
866 int saved_rt_min_interval =
867 net->ipv6.sysctl.ip6_rt_gc_min_interval;
868 int saved_rt_elasticity =
869 net->ipv6.sysctl.ip6_rt_gc_elasticity;
870
871 if (attempts-- > 0) {
872 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
873 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
874
875 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
876
877 net->ipv6.sysctl.ip6_rt_gc_elasticity =
878 saved_rt_elasticity;
879 net->ipv6.sysctl.ip6_rt_gc_min_interval =
880 saved_rt_min_interval;
881 goto retry;
882 }
883
884 net_warn_ratelimited("Neighbour table overflow\n");
885 dst_free(&rt->dst);
886 return NULL;
887 }
888 }
889
890 return rt;
891 }
892
893 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894 const struct in6_addr *daddr)
895 {
896 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
897
898 if (rt) {
899 rt->rt6i_flags |= RTF_CACHE;
900 rt->n = neigh_clone(ort->n);
901 }
902 return rt;
903 }
904
905 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
906 struct flowi6 *fl6, int flags)
907 {
908 struct fib6_node *fn;
909 struct rt6_info *rt, *nrt;
910 int strict = 0;
911 int attempts = 3;
912 int err;
913 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
914
915 strict |= flags & RT6_LOOKUP_F_IFACE;
916
917 relookup:
918 read_lock_bh(&table->tb6_lock);
919
920 restart_2:
921 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
922
923 restart:
924 rt = rt6_select(fn, oif, strict | reachable);
925 if (rt->rt6i_nsiblings && oif == 0)
926 rt = rt6_multipath_select(rt, fl6);
927 BACKTRACK(net, &fl6->saddr);
928 if (rt == net->ipv6.ip6_null_entry ||
929 rt->rt6i_flags & RTF_CACHE)
930 goto out;
931
932 dst_hold(&rt->dst);
933 read_unlock_bh(&table->tb6_lock);
934
935 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
936 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
937 else if (!(rt->dst.flags & DST_HOST))
938 nrt = rt6_alloc_clone(rt, &fl6->daddr);
939 else
940 goto out2;
941
942 ip6_rt_put(rt);
943 rt = nrt ? : net->ipv6.ip6_null_entry;
944
945 dst_hold(&rt->dst);
946 if (nrt) {
947 err = ip6_ins_rt(nrt);
948 if (!err)
949 goto out2;
950 }
951
952 if (--attempts <= 0)
953 goto out2;
954
955 /*
956 * Race condition! In the gap, when table->tb6_lock was
957 * released someone could insert this route. Relookup.
958 */
959 ip6_rt_put(rt);
960 goto relookup;
961
962 out:
963 if (reachable) {
964 reachable = 0;
965 goto restart_2;
966 }
967 dst_hold(&rt->dst);
968 read_unlock_bh(&table->tb6_lock);
969 out2:
970 rt->dst.lastuse = jiffies;
971 rt->dst.__use++;
972
973 return rt;
974 }
975
976 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
977 struct flowi6 *fl6, int flags)
978 {
979 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
980 }
981
982 static struct dst_entry *ip6_route_input_lookup(struct net *net,
983 struct net_device *dev,
984 struct flowi6 *fl6, int flags)
985 {
986 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
987 flags |= RT6_LOOKUP_F_IFACE;
988
989 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
990 }
991
992 void ip6_route_input(struct sk_buff *skb)
993 {
994 const struct ipv6hdr *iph = ipv6_hdr(skb);
995 struct net *net = dev_net(skb->dev);
996 int flags = RT6_LOOKUP_F_HAS_SADDR;
997 struct flowi6 fl6 = {
998 .flowi6_iif = skb->dev->ifindex,
999 .daddr = iph->daddr,
1000 .saddr = iph->saddr,
1001 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1002 .flowi6_mark = skb->mark,
1003 .flowi6_proto = iph->nexthdr,
1004 };
1005
1006 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1007 }
1008
1009 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1010 struct flowi6 *fl6, int flags)
1011 {
1012 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1013 }
1014
1015 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1016 struct flowi6 *fl6)
1017 {
1018 int flags = 0;
1019
1020 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1021
1022 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1023 flags |= RT6_LOOKUP_F_IFACE;
1024
1025 if (!ipv6_addr_any(&fl6->saddr))
1026 flags |= RT6_LOOKUP_F_HAS_SADDR;
1027 else if (sk)
1028 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1029
1030 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1031 }
1032
1033 EXPORT_SYMBOL(ip6_route_output);
1034
1035 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1036 {
1037 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1038 struct dst_entry *new = NULL;
1039
1040 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1041 if (rt) {
1042 new = &rt->dst;
1043
1044 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1045 rt6_init_peer(rt, net->ipv6.peers);
1046
1047 new->__use = 1;
1048 new->input = dst_discard;
1049 new->output = dst_discard;
1050
1051 if (dst_metrics_read_only(&ort->dst))
1052 new->_metrics = ort->dst._metrics;
1053 else
1054 dst_copy_metrics(new, &ort->dst);
1055 rt->rt6i_idev = ort->rt6i_idev;
1056 if (rt->rt6i_idev)
1057 in6_dev_hold(rt->rt6i_idev);
1058
1059 rt->rt6i_gateway = ort->rt6i_gateway;
1060 rt->rt6i_flags = ort->rt6i_flags;
1061 rt6_clean_expires(rt);
1062 rt->rt6i_metric = 0;
1063
1064 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1065 #ifdef CONFIG_IPV6_SUBTREES
1066 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1067 #endif
1068
1069 dst_free(new);
1070 }
1071
1072 dst_release(dst_orig);
1073 return new ? new : ERR_PTR(-ENOMEM);
1074 }
1075
1076 /*
1077 * Destination cache support functions
1078 */
1079
1080 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1081 {
1082 struct rt6_info *rt;
1083
1084 rt = (struct rt6_info *) dst;
1085
1086 /* All IPV6 dsts are created with ->obsolete set to the value
1087 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1088 * into this function always.
1089 */
1090 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1091 return NULL;
1092
1093 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1094 return dst;
1095
1096 return NULL;
1097 }
1098
1099 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1100 {
1101 struct rt6_info *rt = (struct rt6_info *) dst;
1102
1103 if (rt) {
1104 if (rt->rt6i_flags & RTF_CACHE) {
1105 if (rt6_check_expired(rt)) {
1106 ip6_del_rt(rt);
1107 dst = NULL;
1108 }
1109 } else {
1110 dst_release(dst);
1111 dst = NULL;
1112 }
1113 }
1114 return dst;
1115 }
1116
1117 static void ip6_link_failure(struct sk_buff *skb)
1118 {
1119 struct rt6_info *rt;
1120
1121 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1122
1123 rt = (struct rt6_info *) skb_dst(skb);
1124 if (rt) {
1125 if (rt->rt6i_flags & RTF_CACHE)
1126 rt6_update_expires(rt, 0);
1127 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1128 rt->rt6i_node->fn_sernum = -1;
1129 }
1130 }
1131
1132 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1133 struct sk_buff *skb, u32 mtu)
1134 {
1135 struct rt6_info *rt6 = (struct rt6_info*)dst;
1136
1137 dst_confirm(dst);
1138 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1139 struct net *net = dev_net(dst->dev);
1140
1141 rt6->rt6i_flags |= RTF_MODIFIED;
1142 if (mtu < IPV6_MIN_MTU) {
1143 u32 features = dst_metric(dst, RTAX_FEATURES);
1144 mtu = IPV6_MIN_MTU;
1145 features |= RTAX_FEATURE_ALLFRAG;
1146 dst_metric_set(dst, RTAX_FEATURES, features);
1147 }
1148 dst_metric_set(dst, RTAX_MTU, mtu);
1149 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1150 }
1151 }
1152
1153 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1154 int oif, u32 mark)
1155 {
1156 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1157 struct dst_entry *dst;
1158 struct flowi6 fl6;
1159
1160 memset(&fl6, 0, sizeof(fl6));
1161 fl6.flowi6_oif = oif;
1162 fl6.flowi6_mark = mark;
1163 fl6.flowi6_flags = 0;
1164 fl6.daddr = iph->daddr;
1165 fl6.saddr = iph->saddr;
1166 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1167
1168 dst = ip6_route_output(net, NULL, &fl6);
1169 if (!dst->error)
1170 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1171 dst_release(dst);
1172 }
1173 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1174
1175 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1176 {
1177 ip6_update_pmtu(skb, sock_net(sk), mtu,
1178 sk->sk_bound_dev_if, sk->sk_mark);
1179 }
1180 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1181
1182 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1183 {
1184 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1185 struct dst_entry *dst;
1186 struct flowi6 fl6;
1187
1188 memset(&fl6, 0, sizeof(fl6));
1189 fl6.flowi6_oif = oif;
1190 fl6.flowi6_mark = mark;
1191 fl6.flowi6_flags = 0;
1192 fl6.daddr = iph->daddr;
1193 fl6.saddr = iph->saddr;
1194 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1195
1196 dst = ip6_route_output(net, NULL, &fl6);
1197 if (!dst->error)
1198 rt6_do_redirect(dst, NULL, skb);
1199 dst_release(dst);
1200 }
1201 EXPORT_SYMBOL_GPL(ip6_redirect);
1202
1203 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1204 {
1205 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1206 }
1207 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1208
1209 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1210 {
1211 struct net_device *dev = dst->dev;
1212 unsigned int mtu = dst_mtu(dst);
1213 struct net *net = dev_net(dev);
1214
1215 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1216
1217 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1218 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1219
1220 /*
1221 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1222 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1223 * IPV6_MAXPLEN is also valid and means: "any MSS,
1224 * rely only on pmtu discovery"
1225 */
1226 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1227 mtu = IPV6_MAXPLEN;
1228 return mtu;
1229 }
1230
1231 static unsigned int ip6_mtu(const struct dst_entry *dst)
1232 {
1233 struct inet6_dev *idev;
1234 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1235
1236 if (mtu)
1237 return mtu;
1238
1239 mtu = IPV6_MIN_MTU;
1240
1241 rcu_read_lock();
1242 idev = __in6_dev_get(dst->dev);
1243 if (idev)
1244 mtu = idev->cnf.mtu6;
1245 rcu_read_unlock();
1246
1247 return mtu;
1248 }
1249
1250 static struct dst_entry *icmp6_dst_gc_list;
1251 static DEFINE_SPINLOCK(icmp6_dst_lock);
1252
1253 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1254 struct neighbour *neigh,
1255 struct flowi6 *fl6)
1256 {
1257 struct dst_entry *dst;
1258 struct rt6_info *rt;
1259 struct inet6_dev *idev = in6_dev_get(dev);
1260 struct net *net = dev_net(dev);
1261
1262 if (unlikely(!idev))
1263 return ERR_PTR(-ENODEV);
1264
1265 rt = ip6_dst_alloc(net, dev, 0, NULL);
1266 if (unlikely(!rt)) {
1267 in6_dev_put(idev);
1268 dst = ERR_PTR(-ENOMEM);
1269 goto out;
1270 }
1271
1272 if (neigh)
1273 neigh_hold(neigh);
1274 else {
1275 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1276 if (IS_ERR(neigh)) {
1277 in6_dev_put(idev);
1278 dst_free(&rt->dst);
1279 return ERR_CAST(neigh);
1280 }
1281 }
1282
1283 rt->dst.flags |= DST_HOST;
1284 rt->dst.output = ip6_output;
1285 rt->n = neigh;
1286 atomic_set(&rt->dst.__refcnt, 1);
1287 rt->rt6i_dst.addr = fl6->daddr;
1288 rt->rt6i_dst.plen = 128;
1289 rt->rt6i_idev = idev;
1290 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1291
1292 spin_lock_bh(&icmp6_dst_lock);
1293 rt->dst.next = icmp6_dst_gc_list;
1294 icmp6_dst_gc_list = &rt->dst;
1295 spin_unlock_bh(&icmp6_dst_lock);
1296
1297 fib6_force_start_gc(net);
1298
1299 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1300
1301 out:
1302 return dst;
1303 }
1304
1305 int icmp6_dst_gc(void)
1306 {
1307 struct dst_entry *dst, **pprev;
1308 int more = 0;
1309
1310 spin_lock_bh(&icmp6_dst_lock);
1311 pprev = &icmp6_dst_gc_list;
1312
1313 while ((dst = *pprev) != NULL) {
1314 if (!atomic_read(&dst->__refcnt)) {
1315 *pprev = dst->next;
1316 dst_free(dst);
1317 } else {
1318 pprev = &dst->next;
1319 ++more;
1320 }
1321 }
1322
1323 spin_unlock_bh(&icmp6_dst_lock);
1324
1325 return more;
1326 }
1327
1328 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1329 void *arg)
1330 {
1331 struct dst_entry *dst, **pprev;
1332
1333 spin_lock_bh(&icmp6_dst_lock);
1334 pprev = &icmp6_dst_gc_list;
1335 while ((dst = *pprev) != NULL) {
1336 struct rt6_info *rt = (struct rt6_info *) dst;
1337 if (func(rt, arg)) {
1338 *pprev = dst->next;
1339 dst_free(dst);
1340 } else {
1341 pprev = &dst->next;
1342 }
1343 }
1344 spin_unlock_bh(&icmp6_dst_lock);
1345 }
1346
1347 static int ip6_dst_gc(struct dst_ops *ops)
1348 {
1349 unsigned long now = jiffies;
1350 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1351 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1352 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1353 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1354 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1355 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1356 int entries;
1357
1358 entries = dst_entries_get_fast(ops);
1359 if (time_after(rt_last_gc + rt_min_interval, now) &&
1360 entries <= rt_max_size)
1361 goto out;
1362
1363 net->ipv6.ip6_rt_gc_expire++;
1364 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1365 net->ipv6.ip6_rt_last_gc = now;
1366 entries = dst_entries_get_slow(ops);
1367 if (entries < ops->gc_thresh)
1368 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1369 out:
1370 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1371 return entries > rt_max_size;
1372 }
1373
1374 int ip6_dst_hoplimit(struct dst_entry *dst)
1375 {
1376 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1377 if (hoplimit == 0) {
1378 struct net_device *dev = dst->dev;
1379 struct inet6_dev *idev;
1380
1381 rcu_read_lock();
1382 idev = __in6_dev_get(dev);
1383 if (idev)
1384 hoplimit = idev->cnf.hop_limit;
1385 else
1386 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1387 rcu_read_unlock();
1388 }
1389 return hoplimit;
1390 }
1391 EXPORT_SYMBOL(ip6_dst_hoplimit);
1392
1393 /*
1394 *
1395 */
1396
1397 int ip6_route_add(struct fib6_config *cfg)
1398 {
1399 int err;
1400 struct net *net = cfg->fc_nlinfo.nl_net;
1401 struct rt6_info *rt = NULL;
1402 struct net_device *dev = NULL;
1403 struct inet6_dev *idev = NULL;
1404 struct fib6_table *table;
1405 int addr_type;
1406
1407 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1408 return -EINVAL;
1409 #ifndef CONFIG_IPV6_SUBTREES
1410 if (cfg->fc_src_len)
1411 return -EINVAL;
1412 #endif
1413 if (cfg->fc_ifindex) {
1414 err = -ENODEV;
1415 dev = dev_get_by_index(net, cfg->fc_ifindex);
1416 if (!dev)
1417 goto out;
1418 idev = in6_dev_get(dev);
1419 if (!idev)
1420 goto out;
1421 }
1422
1423 if (cfg->fc_metric == 0)
1424 cfg->fc_metric = IP6_RT_PRIO_USER;
1425
1426 err = -ENOBUFS;
1427 if (cfg->fc_nlinfo.nlh &&
1428 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1429 table = fib6_get_table(net, cfg->fc_table);
1430 if (!table) {
1431 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1432 table = fib6_new_table(net, cfg->fc_table);
1433 }
1434 } else {
1435 table = fib6_new_table(net, cfg->fc_table);
1436 }
1437
1438 if (!table)
1439 goto out;
1440
1441 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1442
1443 if (!rt) {
1444 err = -ENOMEM;
1445 goto out;
1446 }
1447
1448 if (cfg->fc_flags & RTF_EXPIRES)
1449 rt6_set_expires(rt, jiffies +
1450 clock_t_to_jiffies(cfg->fc_expires));
1451 else
1452 rt6_clean_expires(rt);
1453
1454 if (cfg->fc_protocol == RTPROT_UNSPEC)
1455 cfg->fc_protocol = RTPROT_BOOT;
1456 rt->rt6i_protocol = cfg->fc_protocol;
1457
1458 addr_type = ipv6_addr_type(&cfg->fc_dst);
1459
1460 if (addr_type & IPV6_ADDR_MULTICAST)
1461 rt->dst.input = ip6_mc_input;
1462 else if (cfg->fc_flags & RTF_LOCAL)
1463 rt->dst.input = ip6_input;
1464 else
1465 rt->dst.input = ip6_forward;
1466
1467 rt->dst.output = ip6_output;
1468
1469 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1470 rt->rt6i_dst.plen = cfg->fc_dst_len;
1471 if (rt->rt6i_dst.plen == 128)
1472 rt->dst.flags |= DST_HOST;
1473
1474 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1475 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1476 if (!metrics) {
1477 err = -ENOMEM;
1478 goto out;
1479 }
1480 dst_init_metrics(&rt->dst, metrics, 0);
1481 }
1482 #ifdef CONFIG_IPV6_SUBTREES
1483 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1484 rt->rt6i_src.plen = cfg->fc_src_len;
1485 #endif
1486
1487 rt->rt6i_metric = cfg->fc_metric;
1488
1489 /* We cannot add true routes via loopback here,
1490 they would result in kernel looping; promote them to reject routes
1491 */
1492 if ((cfg->fc_flags & RTF_REJECT) ||
1493 (dev && (dev->flags & IFF_LOOPBACK) &&
1494 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1495 !(cfg->fc_flags & RTF_LOCAL))) {
1496 /* hold loopback dev/idev if we haven't done so. */
1497 if (dev != net->loopback_dev) {
1498 if (dev) {
1499 dev_put(dev);
1500 in6_dev_put(idev);
1501 }
1502 dev = net->loopback_dev;
1503 dev_hold(dev);
1504 idev = in6_dev_get(dev);
1505 if (!idev) {
1506 err = -ENODEV;
1507 goto out;
1508 }
1509 }
1510 rt->dst.output = ip6_pkt_discard_out;
1511 rt->dst.input = ip6_pkt_discard;
1512 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1513 switch (cfg->fc_type) {
1514 case RTN_BLACKHOLE:
1515 rt->dst.error = -EINVAL;
1516 break;
1517 case RTN_PROHIBIT:
1518 rt->dst.error = -EACCES;
1519 break;
1520 case RTN_THROW:
1521 rt->dst.error = -EAGAIN;
1522 break;
1523 default:
1524 rt->dst.error = -ENETUNREACH;
1525 break;
1526 }
1527 goto install_route;
1528 }
1529
1530 if (cfg->fc_flags & RTF_GATEWAY) {
1531 const struct in6_addr *gw_addr;
1532 int gwa_type;
1533
1534 gw_addr = &cfg->fc_gateway;
1535 rt->rt6i_gateway = *gw_addr;
1536 gwa_type = ipv6_addr_type(gw_addr);
1537
1538 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1539 struct rt6_info *grt;
1540
1541 /* IPv6 strictly inhibits using not link-local
1542 addresses as nexthop address.
1543 Otherwise, router will not able to send redirects.
1544 It is very good, but in some (rare!) circumstances
1545 (SIT, PtP, NBMA NOARP links) it is handy to allow
1546 some exceptions. --ANK
1547 */
1548 err = -EINVAL;
1549 if (!(gwa_type & IPV6_ADDR_UNICAST))
1550 goto out;
1551
1552 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1553
1554 err = -EHOSTUNREACH;
1555 if (!grt)
1556 goto out;
1557 if (dev) {
1558 if (dev != grt->dst.dev) {
1559 ip6_rt_put(grt);
1560 goto out;
1561 }
1562 } else {
1563 dev = grt->dst.dev;
1564 idev = grt->rt6i_idev;
1565 dev_hold(dev);
1566 in6_dev_hold(grt->rt6i_idev);
1567 }
1568 if (!(grt->rt6i_flags & RTF_GATEWAY))
1569 err = 0;
1570 ip6_rt_put(grt);
1571
1572 if (err)
1573 goto out;
1574 }
1575 err = -EINVAL;
1576 if (!dev || (dev->flags & IFF_LOOPBACK))
1577 goto out;
1578 }
1579
1580 err = -ENODEV;
1581 if (!dev)
1582 goto out;
1583
1584 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1585 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1586 err = -EINVAL;
1587 goto out;
1588 }
1589 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1590 rt->rt6i_prefsrc.plen = 128;
1591 } else
1592 rt->rt6i_prefsrc.plen = 0;
1593
1594 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1595 err = rt6_bind_neighbour(rt, dev);
1596 if (err)
1597 goto out;
1598 }
1599
1600 rt->rt6i_flags = cfg->fc_flags;
1601
1602 install_route:
1603 if (cfg->fc_mx) {
1604 struct nlattr *nla;
1605 int remaining;
1606
1607 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1608 int type = nla_type(nla);
1609
1610 if (type) {
1611 if (type > RTAX_MAX) {
1612 err = -EINVAL;
1613 goto out;
1614 }
1615
1616 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1617 }
1618 }
1619 }
1620
1621 rt->dst.dev = dev;
1622 rt->rt6i_idev = idev;
1623 rt->rt6i_table = table;
1624
1625 cfg->fc_nlinfo.nl_net = dev_net(dev);
1626
1627 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1628
1629 out:
1630 if (dev)
1631 dev_put(dev);
1632 if (idev)
1633 in6_dev_put(idev);
1634 if (rt)
1635 dst_free(&rt->dst);
1636 return err;
1637 }
1638
1639 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1640 {
1641 int err;
1642 struct fib6_table *table;
1643 struct net *net = dev_net(rt->dst.dev);
1644
1645 if (rt == net->ipv6.ip6_null_entry) {
1646 err = -ENOENT;
1647 goto out;
1648 }
1649
1650 table = rt->rt6i_table;
1651 write_lock_bh(&table->tb6_lock);
1652 err = fib6_del(rt, info);
1653 write_unlock_bh(&table->tb6_lock);
1654
1655 out:
1656 ip6_rt_put(rt);
1657 return err;
1658 }
1659
1660 int ip6_del_rt(struct rt6_info *rt)
1661 {
1662 struct nl_info info = {
1663 .nl_net = dev_net(rt->dst.dev),
1664 };
1665 return __ip6_del_rt(rt, &info);
1666 }
1667
1668 static int ip6_route_del(struct fib6_config *cfg)
1669 {
1670 struct fib6_table *table;
1671 struct fib6_node *fn;
1672 struct rt6_info *rt;
1673 int err = -ESRCH;
1674
1675 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1676 if (!table)
1677 return err;
1678
1679 read_lock_bh(&table->tb6_lock);
1680
1681 fn = fib6_locate(&table->tb6_root,
1682 &cfg->fc_dst, cfg->fc_dst_len,
1683 &cfg->fc_src, cfg->fc_src_len);
1684
1685 if (fn) {
1686 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1687 if (cfg->fc_ifindex &&
1688 (!rt->dst.dev ||
1689 rt->dst.dev->ifindex != cfg->fc_ifindex))
1690 continue;
1691 if (cfg->fc_flags & RTF_GATEWAY &&
1692 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1693 continue;
1694 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1695 continue;
1696 dst_hold(&rt->dst);
1697 read_unlock_bh(&table->tb6_lock);
1698
1699 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1700 }
1701 }
1702 read_unlock_bh(&table->tb6_lock);
1703
1704 return err;
1705 }
1706
1707 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1708 {
1709 struct net *net = dev_net(skb->dev);
1710 struct netevent_redirect netevent;
1711 struct rt6_info *rt, *nrt = NULL;
1712 const struct in6_addr *target;
1713 struct ndisc_options ndopts;
1714 const struct in6_addr *dest;
1715 struct neighbour *old_neigh;
1716 struct inet6_dev *in6_dev;
1717 struct neighbour *neigh;
1718 struct icmp6hdr *icmph;
1719 int optlen, on_link;
1720 u8 *lladdr;
1721
1722 optlen = skb->tail - skb->transport_header;
1723 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1724
1725 if (optlen < 0) {
1726 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1727 return;
1728 }
1729
1730 icmph = icmp6_hdr(skb);
1731 target = (const struct in6_addr *) (icmph + 1);
1732 dest = target + 1;
1733
1734 if (ipv6_addr_is_multicast(dest)) {
1735 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1736 return;
1737 }
1738
1739 on_link = 0;
1740 if (ipv6_addr_equal(dest, target)) {
1741 on_link = 1;
1742 } else if (ipv6_addr_type(target) !=
1743 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1744 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1745 return;
1746 }
1747
1748 in6_dev = __in6_dev_get(skb->dev);
1749 if (!in6_dev)
1750 return;
1751 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1752 return;
1753
1754 /* RFC2461 8.1:
1755 * The IP source address of the Redirect MUST be the same as the current
1756 * first-hop router for the specified ICMP Destination Address.
1757 */
1758
1759 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1760 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1761 return;
1762 }
1763
1764 lladdr = NULL;
1765 if (ndopts.nd_opts_tgt_lladdr) {
1766 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1767 skb->dev);
1768 if (!lladdr) {
1769 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1770 return;
1771 }
1772 }
1773
1774 rt = (struct rt6_info *) dst;
1775 if (rt == net->ipv6.ip6_null_entry) {
1776 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1777 return;
1778 }
1779
1780 /* Redirect received -> path was valid.
1781 * Look, redirects are sent only in response to data packets,
1782 * so that this nexthop apparently is reachable. --ANK
1783 */
1784 dst_confirm(&rt->dst);
1785
1786 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1787 if (!neigh)
1788 return;
1789
1790 /* Duplicate redirect: silently ignore. */
1791 old_neigh = rt->n;
1792 if (neigh == old_neigh)
1793 goto out;
1794
1795 /*
1796 * We have finally decided to accept it.
1797 */
1798
1799 neigh_update(neigh, lladdr, NUD_STALE,
1800 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1801 NEIGH_UPDATE_F_OVERRIDE|
1802 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1803 NEIGH_UPDATE_F_ISROUTER))
1804 );
1805
1806 nrt = ip6_rt_copy(rt, dest);
1807 if (!nrt)
1808 goto out;
1809
1810 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1811 if (on_link)
1812 nrt->rt6i_flags &= ~RTF_GATEWAY;
1813
1814 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1815 nrt->n = neigh_clone(neigh);
1816
1817 if (ip6_ins_rt(nrt))
1818 goto out;
1819
1820 netevent.old = &rt->dst;
1821 netevent.old_neigh = old_neigh;
1822 netevent.new = &nrt->dst;
1823 netevent.new_neigh = neigh;
1824 netevent.daddr = dest;
1825 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1826
1827 if (rt->rt6i_flags & RTF_CACHE) {
1828 rt = (struct rt6_info *) dst_clone(&rt->dst);
1829 ip6_del_rt(rt);
1830 }
1831
1832 out:
1833 neigh_release(neigh);
1834 }
1835
1836 /*
1837 * Misc support functions
1838 */
1839
1840 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1841 const struct in6_addr *dest)
1842 {
1843 struct net *net = dev_net(ort->dst.dev);
1844 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1845 ort->rt6i_table);
1846
1847 if (rt) {
1848 rt->dst.input = ort->dst.input;
1849 rt->dst.output = ort->dst.output;
1850 rt->dst.flags |= DST_HOST;
1851
1852 rt->rt6i_dst.addr = *dest;
1853 rt->rt6i_dst.plen = 128;
1854 dst_copy_metrics(&rt->dst, &ort->dst);
1855 rt->dst.error = ort->dst.error;
1856 rt->rt6i_idev = ort->rt6i_idev;
1857 if (rt->rt6i_idev)
1858 in6_dev_hold(rt->rt6i_idev);
1859 rt->dst.lastuse = jiffies;
1860
1861 rt->rt6i_gateway = ort->rt6i_gateway;
1862 rt->rt6i_flags = ort->rt6i_flags;
1863 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1864 (RTF_DEFAULT | RTF_ADDRCONF))
1865 rt6_set_from(rt, ort);
1866 else
1867 rt6_clean_expires(rt);
1868 rt->rt6i_metric = 0;
1869
1870 #ifdef CONFIG_IPV6_SUBTREES
1871 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1872 #endif
1873 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1874 rt->rt6i_table = ort->rt6i_table;
1875 }
1876 return rt;
1877 }
1878
1879 #ifdef CONFIG_IPV6_ROUTE_INFO
1880 static struct rt6_info *rt6_get_route_info(struct net *net,
1881 const struct in6_addr *prefix, int prefixlen,
1882 const struct in6_addr *gwaddr, int ifindex)
1883 {
1884 struct fib6_node *fn;
1885 struct rt6_info *rt = NULL;
1886 struct fib6_table *table;
1887
1888 table = fib6_get_table(net, RT6_TABLE_INFO);
1889 if (!table)
1890 return NULL;
1891
1892 read_lock_bh(&table->tb6_lock);
1893 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1894 if (!fn)
1895 goto out;
1896
1897 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1898 if (rt->dst.dev->ifindex != ifindex)
1899 continue;
1900 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1901 continue;
1902 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1903 continue;
1904 dst_hold(&rt->dst);
1905 break;
1906 }
1907 out:
1908 read_unlock_bh(&table->tb6_lock);
1909 return rt;
1910 }
1911
1912 static struct rt6_info *rt6_add_route_info(struct net *net,
1913 const struct in6_addr *prefix, int prefixlen,
1914 const struct in6_addr *gwaddr, int ifindex,
1915 unsigned int pref)
1916 {
1917 struct fib6_config cfg = {
1918 .fc_table = RT6_TABLE_INFO,
1919 .fc_metric = IP6_RT_PRIO_USER,
1920 .fc_ifindex = ifindex,
1921 .fc_dst_len = prefixlen,
1922 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1923 RTF_UP | RTF_PREF(pref),
1924 .fc_nlinfo.portid = 0,
1925 .fc_nlinfo.nlh = NULL,
1926 .fc_nlinfo.nl_net = net,
1927 };
1928
1929 cfg.fc_dst = *prefix;
1930 cfg.fc_gateway = *gwaddr;
1931
1932 /* We should treat it as a default route if prefix length is 0. */
1933 if (!prefixlen)
1934 cfg.fc_flags |= RTF_DEFAULT;
1935
1936 ip6_route_add(&cfg);
1937
1938 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1939 }
1940 #endif
1941
1942 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1943 {
1944 struct rt6_info *rt;
1945 struct fib6_table *table;
1946
1947 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1948 if (!table)
1949 return NULL;
1950
1951 read_lock_bh(&table->tb6_lock);
1952 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1953 if (dev == rt->dst.dev &&
1954 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1955 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1956 break;
1957 }
1958 if (rt)
1959 dst_hold(&rt->dst);
1960 read_unlock_bh(&table->tb6_lock);
1961 return rt;
1962 }
1963
1964 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1965 struct net_device *dev,
1966 unsigned int pref)
1967 {
1968 struct fib6_config cfg = {
1969 .fc_table = RT6_TABLE_DFLT,
1970 .fc_metric = IP6_RT_PRIO_USER,
1971 .fc_ifindex = dev->ifindex,
1972 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1973 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1974 .fc_nlinfo.portid = 0,
1975 .fc_nlinfo.nlh = NULL,
1976 .fc_nlinfo.nl_net = dev_net(dev),
1977 };
1978
1979 cfg.fc_gateway = *gwaddr;
1980
1981 ip6_route_add(&cfg);
1982
1983 return rt6_get_dflt_router(gwaddr, dev);
1984 }
1985
1986 void rt6_purge_dflt_routers(struct net *net)
1987 {
1988 struct rt6_info *rt;
1989 struct fib6_table *table;
1990
1991 /* NOTE: Keep consistent with rt6_get_dflt_router */
1992 table = fib6_get_table(net, RT6_TABLE_DFLT);
1993 if (!table)
1994 return;
1995
1996 restart:
1997 read_lock_bh(&table->tb6_lock);
1998 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1999 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2000 dst_hold(&rt->dst);
2001 read_unlock_bh(&table->tb6_lock);
2002 ip6_del_rt(rt);
2003 goto restart;
2004 }
2005 }
2006 read_unlock_bh(&table->tb6_lock);
2007 }
2008
2009 static void rtmsg_to_fib6_config(struct net *net,
2010 struct in6_rtmsg *rtmsg,
2011 struct fib6_config *cfg)
2012 {
2013 memset(cfg, 0, sizeof(*cfg));
2014
2015 cfg->fc_table = RT6_TABLE_MAIN;
2016 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2017 cfg->fc_metric = rtmsg->rtmsg_metric;
2018 cfg->fc_expires = rtmsg->rtmsg_info;
2019 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2020 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2021 cfg->fc_flags = rtmsg->rtmsg_flags;
2022
2023 cfg->fc_nlinfo.nl_net = net;
2024
2025 cfg->fc_dst = rtmsg->rtmsg_dst;
2026 cfg->fc_src = rtmsg->rtmsg_src;
2027 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2028 }
2029
2030 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2031 {
2032 struct fib6_config cfg;
2033 struct in6_rtmsg rtmsg;
2034 int err;
2035
2036 switch(cmd) {
2037 case SIOCADDRT: /* Add a route */
2038 case SIOCDELRT: /* Delete a route */
2039 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2040 return -EPERM;
2041 err = copy_from_user(&rtmsg, arg,
2042 sizeof(struct in6_rtmsg));
2043 if (err)
2044 return -EFAULT;
2045
2046 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2047
2048 rtnl_lock();
2049 switch (cmd) {
2050 case SIOCADDRT:
2051 err = ip6_route_add(&cfg);
2052 break;
2053 case SIOCDELRT:
2054 err = ip6_route_del(&cfg);
2055 break;
2056 default:
2057 err = -EINVAL;
2058 }
2059 rtnl_unlock();
2060
2061 return err;
2062 }
2063
2064 return -EINVAL;
2065 }
2066
2067 /*
2068 * Drop the packet on the floor
2069 */
2070
2071 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2072 {
2073 int type;
2074 struct dst_entry *dst = skb_dst(skb);
2075 switch (ipstats_mib_noroutes) {
2076 case IPSTATS_MIB_INNOROUTES:
2077 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2078 if (type == IPV6_ADDR_ANY) {
2079 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2080 IPSTATS_MIB_INADDRERRORS);
2081 break;
2082 }
2083 /* FALLTHROUGH */
2084 case IPSTATS_MIB_OUTNOROUTES:
2085 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2086 ipstats_mib_noroutes);
2087 break;
2088 }
2089 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2090 kfree_skb(skb);
2091 return 0;
2092 }
2093
2094 static int ip6_pkt_discard(struct sk_buff *skb)
2095 {
2096 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2097 }
2098
2099 static int ip6_pkt_discard_out(struct sk_buff *skb)
2100 {
2101 skb->dev = skb_dst(skb)->dev;
2102 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2103 }
2104
2105 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2106
2107 static int ip6_pkt_prohibit(struct sk_buff *skb)
2108 {
2109 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2110 }
2111
2112 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2113 {
2114 skb->dev = skb_dst(skb)->dev;
2115 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2116 }
2117
2118 #endif
2119
2120 /*
2121 * Allocate a dst for local (unicast / anycast) address.
2122 */
2123
2124 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2125 const struct in6_addr *addr,
2126 bool anycast)
2127 {
2128 struct net *net = dev_net(idev->dev);
2129 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2130 int err;
2131
2132 if (!rt) {
2133 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2134 return ERR_PTR(-ENOMEM);
2135 }
2136
2137 in6_dev_hold(idev);
2138
2139 rt->dst.flags |= DST_HOST;
2140 rt->dst.input = ip6_input;
2141 rt->dst.output = ip6_output;
2142 rt->rt6i_idev = idev;
2143
2144 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2145 if (anycast)
2146 rt->rt6i_flags |= RTF_ANYCAST;
2147 else
2148 rt->rt6i_flags |= RTF_LOCAL;
2149 err = rt6_bind_neighbour(rt, rt->dst.dev);
2150 if (err) {
2151 dst_free(&rt->dst);
2152 return ERR_PTR(err);
2153 }
2154
2155 rt->rt6i_dst.addr = *addr;
2156 rt->rt6i_dst.plen = 128;
2157 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2158
2159 atomic_set(&rt->dst.__refcnt, 1);
2160
2161 return rt;
2162 }
2163
2164 int ip6_route_get_saddr(struct net *net,
2165 struct rt6_info *rt,
2166 const struct in6_addr *daddr,
2167 unsigned int prefs,
2168 struct in6_addr *saddr)
2169 {
2170 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2171 int err = 0;
2172 if (rt->rt6i_prefsrc.plen)
2173 *saddr = rt->rt6i_prefsrc.addr;
2174 else
2175 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2176 daddr, prefs, saddr);
2177 return err;
2178 }
2179
2180 /* remove deleted ip from prefsrc entries */
2181 struct arg_dev_net_ip {
2182 struct net_device *dev;
2183 struct net *net;
2184 struct in6_addr *addr;
2185 };
2186
2187 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2188 {
2189 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2190 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2191 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2192
2193 if (((void *)rt->dst.dev == dev || !dev) &&
2194 rt != net->ipv6.ip6_null_entry &&
2195 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2196 /* remove prefsrc entry */
2197 rt->rt6i_prefsrc.plen = 0;
2198 }
2199 return 0;
2200 }
2201
2202 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2203 {
2204 struct net *net = dev_net(ifp->idev->dev);
2205 struct arg_dev_net_ip adni = {
2206 .dev = ifp->idev->dev,
2207 .net = net,
2208 .addr = &ifp->addr,
2209 };
2210 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2211 }
2212
2213 struct arg_dev_net {
2214 struct net_device *dev;
2215 struct net *net;
2216 };
2217
2218 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2219 {
2220 const struct arg_dev_net *adn = arg;
2221 const struct net_device *dev = adn->dev;
2222
2223 if ((rt->dst.dev == dev || !dev) &&
2224 rt != adn->net->ipv6.ip6_null_entry)
2225 return -1;
2226
2227 return 0;
2228 }
2229
2230 void rt6_ifdown(struct net *net, struct net_device *dev)
2231 {
2232 struct arg_dev_net adn = {
2233 .dev = dev,
2234 .net = net,
2235 };
2236
2237 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2238 icmp6_clean_all(fib6_ifdown, &adn);
2239 }
2240
2241 struct rt6_mtu_change_arg {
2242 struct net_device *dev;
2243 unsigned int mtu;
2244 };
2245
2246 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2247 {
2248 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2249 struct inet6_dev *idev;
2250
2251 /* In IPv6 pmtu discovery is not optional,
2252 so that RTAX_MTU lock cannot disable it.
2253 We still use this lock to block changes
2254 caused by addrconf/ndisc.
2255 */
2256
2257 idev = __in6_dev_get(arg->dev);
2258 if (!idev)
2259 return 0;
2260
2261 /* For administrative MTU increase, there is no way to discover
2262 IPv6 PMTU increase, so PMTU increase should be updated here.
2263 Since RFC 1981 doesn't include administrative MTU increase
2264 update PMTU increase is a MUST. (i.e. jumbo frame)
2265 */
2266 /*
2267 If new MTU is less than route PMTU, this new MTU will be the
2268 lowest MTU in the path, update the route PMTU to reflect PMTU
2269 decreases; if new MTU is greater than route PMTU, and the
2270 old MTU is the lowest MTU in the path, update the route PMTU
2271 to reflect the increase. In this case if the other nodes' MTU
2272 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2273 PMTU discouvery.
2274 */
2275 if (rt->dst.dev == arg->dev &&
2276 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2277 (dst_mtu(&rt->dst) >= arg->mtu ||
2278 (dst_mtu(&rt->dst) < arg->mtu &&
2279 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2280 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2281 }
2282 return 0;
2283 }
2284
2285 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2286 {
2287 struct rt6_mtu_change_arg arg = {
2288 .dev = dev,
2289 .mtu = mtu,
2290 };
2291
2292 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2293 }
2294
2295 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2296 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2297 [RTA_OIF] = { .type = NLA_U32 },
2298 [RTA_IIF] = { .type = NLA_U32 },
2299 [RTA_PRIORITY] = { .type = NLA_U32 },
2300 [RTA_METRICS] = { .type = NLA_NESTED },
2301 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2302 };
2303
2304 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2305 struct fib6_config *cfg)
2306 {
2307 struct rtmsg *rtm;
2308 struct nlattr *tb[RTA_MAX+1];
2309 int err;
2310
2311 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2312 if (err < 0)
2313 goto errout;
2314
2315 err = -EINVAL;
2316 rtm = nlmsg_data(nlh);
2317 memset(cfg, 0, sizeof(*cfg));
2318
2319 cfg->fc_table = rtm->rtm_table;
2320 cfg->fc_dst_len = rtm->rtm_dst_len;
2321 cfg->fc_src_len = rtm->rtm_src_len;
2322 cfg->fc_flags = RTF_UP;
2323 cfg->fc_protocol = rtm->rtm_protocol;
2324 cfg->fc_type = rtm->rtm_type;
2325
2326 if (rtm->rtm_type == RTN_UNREACHABLE ||
2327 rtm->rtm_type == RTN_BLACKHOLE ||
2328 rtm->rtm_type == RTN_PROHIBIT ||
2329 rtm->rtm_type == RTN_THROW)
2330 cfg->fc_flags |= RTF_REJECT;
2331
2332 if (rtm->rtm_type == RTN_LOCAL)
2333 cfg->fc_flags |= RTF_LOCAL;
2334
2335 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2336 cfg->fc_nlinfo.nlh = nlh;
2337 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2338
2339 if (tb[RTA_GATEWAY]) {
2340 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2341 cfg->fc_flags |= RTF_GATEWAY;
2342 }
2343
2344 if (tb[RTA_DST]) {
2345 int plen = (rtm->rtm_dst_len + 7) >> 3;
2346
2347 if (nla_len(tb[RTA_DST]) < plen)
2348 goto errout;
2349
2350 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2351 }
2352
2353 if (tb[RTA_SRC]) {
2354 int plen = (rtm->rtm_src_len + 7) >> 3;
2355
2356 if (nla_len(tb[RTA_SRC]) < plen)
2357 goto errout;
2358
2359 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2360 }
2361
2362 if (tb[RTA_PREFSRC])
2363 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2364
2365 if (tb[RTA_OIF])
2366 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2367
2368 if (tb[RTA_PRIORITY])
2369 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2370
2371 if (tb[RTA_METRICS]) {
2372 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2373 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2374 }
2375
2376 if (tb[RTA_TABLE])
2377 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2378
2379 if (tb[RTA_MULTIPATH]) {
2380 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2381 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2382 }
2383
2384 err = 0;
2385 errout:
2386 return err;
2387 }
2388
2389 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2390 {
2391 struct fib6_config r_cfg;
2392 struct rtnexthop *rtnh;
2393 int remaining;
2394 int attrlen;
2395 int err = 0, last_err = 0;
2396
2397 beginning:
2398 rtnh = (struct rtnexthop *)cfg->fc_mp;
2399 remaining = cfg->fc_mp_len;
2400
2401 /* Parse a Multipath Entry */
2402 while (rtnh_ok(rtnh, remaining)) {
2403 memcpy(&r_cfg, cfg, sizeof(*cfg));
2404 if (rtnh->rtnh_ifindex)
2405 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2406
2407 attrlen = rtnh_attrlen(rtnh);
2408 if (attrlen > 0) {
2409 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2410
2411 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2412 if (nla) {
2413 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2414 r_cfg.fc_flags |= RTF_GATEWAY;
2415 }
2416 }
2417 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2418 if (err) {
2419 last_err = err;
2420 /* If we are trying to remove a route, do not stop the
2421 * loop when ip6_route_del() fails (because next hop is
2422 * already gone), we should try to remove all next hops.
2423 */
2424 if (add) {
2425 /* If add fails, we should try to delete all
2426 * next hops that have been already added.
2427 */
2428 add = 0;
2429 goto beginning;
2430 }
2431 }
2432 /* Because each route is added like a single route we remove
2433 * this flag after the first nexthop (if there is a collision,
2434 * we have already fail to add the first nexthop:
2435 * fib6_add_rt2node() has reject it).
2436 */
2437 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2438 rtnh = rtnh_next(rtnh, &remaining);
2439 }
2440
2441 return last_err;
2442 }
2443
2444 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2445 {
2446 struct fib6_config cfg;
2447 int err;
2448
2449 if (!capable(CAP_NET_ADMIN))
2450 return -EPERM;
2451
2452 err = rtm_to_fib6_config(skb, nlh, &cfg);
2453 if (err < 0)
2454 return err;
2455
2456 if (cfg.fc_mp)
2457 return ip6_route_multipath(&cfg, 0);
2458 else
2459 return ip6_route_del(&cfg);
2460 }
2461
2462 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2463 {
2464 struct fib6_config cfg;
2465 int err;
2466
2467 if (!capable(CAP_NET_ADMIN))
2468 return -EPERM;
2469
2470 err = rtm_to_fib6_config(skb, nlh, &cfg);
2471 if (err < 0)
2472 return err;
2473
2474 if (cfg.fc_mp)
2475 return ip6_route_multipath(&cfg, 1);
2476 else
2477 return ip6_route_add(&cfg);
2478 }
2479
2480 static inline size_t rt6_nlmsg_size(void)
2481 {
2482 return NLMSG_ALIGN(sizeof(struct rtmsg))
2483 + nla_total_size(16) /* RTA_SRC */
2484 + nla_total_size(16) /* RTA_DST */
2485 + nla_total_size(16) /* RTA_GATEWAY */
2486 + nla_total_size(16) /* RTA_PREFSRC */
2487 + nla_total_size(4) /* RTA_TABLE */
2488 + nla_total_size(4) /* RTA_IIF */
2489 + nla_total_size(4) /* RTA_OIF */
2490 + nla_total_size(4) /* RTA_PRIORITY */
2491 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2492 + nla_total_size(sizeof(struct rta_cacheinfo));
2493 }
2494
2495 static int rt6_fill_node(struct net *net,
2496 struct sk_buff *skb, struct rt6_info *rt,
2497 struct in6_addr *dst, struct in6_addr *src,
2498 int iif, int type, u32 portid, u32 seq,
2499 int prefix, int nowait, unsigned int flags)
2500 {
2501 struct rtmsg *rtm;
2502 struct nlmsghdr *nlh;
2503 long expires;
2504 u32 table;
2505 struct neighbour *n;
2506
2507 if (prefix) { /* user wants prefix routes only */
2508 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2509 /* success since this is not a prefix route */
2510 return 1;
2511 }
2512 }
2513
2514 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2515 if (!nlh)
2516 return -EMSGSIZE;
2517
2518 rtm = nlmsg_data(nlh);
2519 rtm->rtm_family = AF_INET6;
2520 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2521 rtm->rtm_src_len = rt->rt6i_src.plen;
2522 rtm->rtm_tos = 0;
2523 if (rt->rt6i_table)
2524 table = rt->rt6i_table->tb6_id;
2525 else
2526 table = RT6_TABLE_UNSPEC;
2527 rtm->rtm_table = table;
2528 if (nla_put_u32(skb, RTA_TABLE, table))
2529 goto nla_put_failure;
2530 if (rt->rt6i_flags & RTF_REJECT) {
2531 switch (rt->dst.error) {
2532 case -EINVAL:
2533 rtm->rtm_type = RTN_BLACKHOLE;
2534 break;
2535 case -EACCES:
2536 rtm->rtm_type = RTN_PROHIBIT;
2537 break;
2538 case -EAGAIN:
2539 rtm->rtm_type = RTN_THROW;
2540 break;
2541 default:
2542 rtm->rtm_type = RTN_UNREACHABLE;
2543 break;
2544 }
2545 }
2546 else if (rt->rt6i_flags & RTF_LOCAL)
2547 rtm->rtm_type = RTN_LOCAL;
2548 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2549 rtm->rtm_type = RTN_LOCAL;
2550 else
2551 rtm->rtm_type = RTN_UNICAST;
2552 rtm->rtm_flags = 0;
2553 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2554 rtm->rtm_protocol = rt->rt6i_protocol;
2555 if (rt->rt6i_flags & RTF_DYNAMIC)
2556 rtm->rtm_protocol = RTPROT_REDIRECT;
2557 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2558 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2559 rtm->rtm_protocol = RTPROT_RA;
2560 else
2561 rtm->rtm_protocol = RTPROT_KERNEL;
2562 }
2563
2564 if (rt->rt6i_flags & RTF_CACHE)
2565 rtm->rtm_flags |= RTM_F_CLONED;
2566
2567 if (dst) {
2568 if (nla_put(skb, RTA_DST, 16, dst))
2569 goto nla_put_failure;
2570 rtm->rtm_dst_len = 128;
2571 } else if (rtm->rtm_dst_len)
2572 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2573 goto nla_put_failure;
2574 #ifdef CONFIG_IPV6_SUBTREES
2575 if (src) {
2576 if (nla_put(skb, RTA_SRC, 16, src))
2577 goto nla_put_failure;
2578 rtm->rtm_src_len = 128;
2579 } else if (rtm->rtm_src_len &&
2580 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2581 goto nla_put_failure;
2582 #endif
2583 if (iif) {
2584 #ifdef CONFIG_IPV6_MROUTE
2585 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2586 int err = ip6mr_get_route(net, skb, rtm, nowait);
2587 if (err <= 0) {
2588 if (!nowait) {
2589 if (err == 0)
2590 return 0;
2591 goto nla_put_failure;
2592 } else {
2593 if (err == -EMSGSIZE)
2594 goto nla_put_failure;
2595 }
2596 }
2597 } else
2598 #endif
2599 if (nla_put_u32(skb, RTA_IIF, iif))
2600 goto nla_put_failure;
2601 } else if (dst) {
2602 struct in6_addr saddr_buf;
2603 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2604 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2605 goto nla_put_failure;
2606 }
2607
2608 if (rt->rt6i_prefsrc.plen) {
2609 struct in6_addr saddr_buf;
2610 saddr_buf = rt->rt6i_prefsrc.addr;
2611 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2612 goto nla_put_failure;
2613 }
2614
2615 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2616 goto nla_put_failure;
2617
2618 n = rt->n;
2619 if (n) {
2620 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2621 goto nla_put_failure;
2622 }
2623
2624 if (rt->dst.dev &&
2625 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2626 goto nla_put_failure;
2627 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2628 goto nla_put_failure;
2629
2630 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2631
2632 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2633 goto nla_put_failure;
2634
2635 return nlmsg_end(skb, nlh);
2636
2637 nla_put_failure:
2638 nlmsg_cancel(skb, nlh);
2639 return -EMSGSIZE;
2640 }
2641
2642 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2643 {
2644 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2645 int prefix;
2646
2647 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2648 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2649 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2650 } else
2651 prefix = 0;
2652
2653 return rt6_fill_node(arg->net,
2654 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2655 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2656 prefix, 0, NLM_F_MULTI);
2657 }
2658
2659 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2660 {
2661 struct net *net = sock_net(in_skb->sk);
2662 struct nlattr *tb[RTA_MAX+1];
2663 struct rt6_info *rt;
2664 struct sk_buff *skb;
2665 struct rtmsg *rtm;
2666 struct flowi6 fl6;
2667 int err, iif = 0, oif = 0;
2668
2669 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2670 if (err < 0)
2671 goto errout;
2672
2673 err = -EINVAL;
2674 memset(&fl6, 0, sizeof(fl6));
2675
2676 if (tb[RTA_SRC]) {
2677 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2678 goto errout;
2679
2680 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2681 }
2682
2683 if (tb[RTA_DST]) {
2684 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2685 goto errout;
2686
2687 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2688 }
2689
2690 if (tb[RTA_IIF])
2691 iif = nla_get_u32(tb[RTA_IIF]);
2692
2693 if (tb[RTA_OIF])
2694 oif = nla_get_u32(tb[RTA_OIF]);
2695
2696 if (iif) {
2697 struct net_device *dev;
2698 int flags = 0;
2699
2700 dev = __dev_get_by_index(net, iif);
2701 if (!dev) {
2702 err = -ENODEV;
2703 goto errout;
2704 }
2705
2706 fl6.flowi6_iif = iif;
2707
2708 if (!ipv6_addr_any(&fl6.saddr))
2709 flags |= RT6_LOOKUP_F_HAS_SADDR;
2710
2711 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2712 flags);
2713 } else {
2714 fl6.flowi6_oif = oif;
2715
2716 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2717 }
2718
2719 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2720 if (!skb) {
2721 ip6_rt_put(rt);
2722 err = -ENOBUFS;
2723 goto errout;
2724 }
2725
2726 /* Reserve room for dummy headers, this skb can pass
2727 through good chunk of routing engine.
2728 */
2729 skb_reset_mac_header(skb);
2730 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2731
2732 skb_dst_set(skb, &rt->dst);
2733
2734 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2735 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2736 nlh->nlmsg_seq, 0, 0, 0);
2737 if (err < 0) {
2738 kfree_skb(skb);
2739 goto errout;
2740 }
2741
2742 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2743 errout:
2744 return err;
2745 }
2746
2747 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2748 {
2749 struct sk_buff *skb;
2750 struct net *net = info->nl_net;
2751 u32 seq;
2752 int err;
2753
2754 err = -ENOBUFS;
2755 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2756
2757 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2758 if (!skb)
2759 goto errout;
2760
2761 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2762 event, info->portid, seq, 0, 0, 0);
2763 if (err < 0) {
2764 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2765 WARN_ON(err == -EMSGSIZE);
2766 kfree_skb(skb);
2767 goto errout;
2768 }
2769 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2770 info->nlh, gfp_any());
2771 return;
2772 errout:
2773 if (err < 0)
2774 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2775 }
2776
2777 static int ip6_route_dev_notify(struct notifier_block *this,
2778 unsigned long event, void *data)
2779 {
2780 struct net_device *dev = (struct net_device *)data;
2781 struct net *net = dev_net(dev);
2782
2783 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2784 net->ipv6.ip6_null_entry->dst.dev = dev;
2785 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2786 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2787 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2788 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2789 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2790 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2791 #endif
2792 }
2793
2794 return NOTIFY_OK;
2795 }
2796
2797 /*
2798 * /proc
2799 */
2800
2801 #ifdef CONFIG_PROC_FS
2802
2803 struct rt6_proc_arg
2804 {
2805 char *buffer;
2806 int offset;
2807 int length;
2808 int skip;
2809 int len;
2810 };
2811
2812 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2813 {
2814 struct seq_file *m = p_arg;
2815 struct neighbour *n;
2816
2817 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2818
2819 #ifdef CONFIG_IPV6_SUBTREES
2820 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2821 #else
2822 seq_puts(m, "00000000000000000000000000000000 00 ");
2823 #endif
2824 n = rt->n;
2825 if (n) {
2826 seq_printf(m, "%pi6", n->primary_key);
2827 } else {
2828 seq_puts(m, "00000000000000000000000000000000");
2829 }
2830 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2831 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2832 rt->dst.__use, rt->rt6i_flags,
2833 rt->dst.dev ? rt->dst.dev->name : "");
2834 return 0;
2835 }
2836
2837 static int ipv6_route_show(struct seq_file *m, void *v)
2838 {
2839 struct net *net = (struct net *)m->private;
2840 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2841 return 0;
2842 }
2843
2844 static int ipv6_route_open(struct inode *inode, struct file *file)
2845 {
2846 return single_open_net(inode, file, ipv6_route_show);
2847 }
2848
2849 static const struct file_operations ipv6_route_proc_fops = {
2850 .owner = THIS_MODULE,
2851 .open = ipv6_route_open,
2852 .read = seq_read,
2853 .llseek = seq_lseek,
2854 .release = single_release_net,
2855 };
2856
2857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2858 {
2859 struct net *net = (struct net *)seq->private;
2860 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2861 net->ipv6.rt6_stats->fib_nodes,
2862 net->ipv6.rt6_stats->fib_route_nodes,
2863 net->ipv6.rt6_stats->fib_rt_alloc,
2864 net->ipv6.rt6_stats->fib_rt_entries,
2865 net->ipv6.rt6_stats->fib_rt_cache,
2866 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2867 net->ipv6.rt6_stats->fib_discarded_routes);
2868
2869 return 0;
2870 }
2871
2872 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2873 {
2874 return single_open_net(inode, file, rt6_stats_seq_show);
2875 }
2876
2877 static const struct file_operations rt6_stats_seq_fops = {
2878 .owner = THIS_MODULE,
2879 .open = rt6_stats_seq_open,
2880 .read = seq_read,
2881 .llseek = seq_lseek,
2882 .release = single_release_net,
2883 };
2884 #endif /* CONFIG_PROC_FS */
2885
2886 #ifdef CONFIG_SYSCTL
2887
2888 static
2889 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2890 void __user *buffer, size_t *lenp, loff_t *ppos)
2891 {
2892 struct net *net;
2893 int delay;
2894 if (!write)
2895 return -EINVAL;
2896
2897 net = (struct net *)ctl->extra1;
2898 delay = net->ipv6.sysctl.flush_delay;
2899 proc_dointvec(ctl, write, buffer, lenp, ppos);
2900 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2901 return 0;
2902 }
2903
2904 ctl_table ipv6_route_table_template[] = {
2905 {
2906 .procname = "flush",
2907 .data = &init_net.ipv6.sysctl.flush_delay,
2908 .maxlen = sizeof(int),
2909 .mode = 0200,
2910 .proc_handler = ipv6_sysctl_rtcache_flush
2911 },
2912 {
2913 .procname = "gc_thresh",
2914 .data = &ip6_dst_ops_template.gc_thresh,
2915 .maxlen = sizeof(int),
2916 .mode = 0644,
2917 .proc_handler = proc_dointvec,
2918 },
2919 {
2920 .procname = "max_size",
2921 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2922 .maxlen = sizeof(int),
2923 .mode = 0644,
2924 .proc_handler = proc_dointvec,
2925 },
2926 {
2927 .procname = "gc_min_interval",
2928 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2929 .maxlen = sizeof(int),
2930 .mode = 0644,
2931 .proc_handler = proc_dointvec_jiffies,
2932 },
2933 {
2934 .procname = "gc_timeout",
2935 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2936 .maxlen = sizeof(int),
2937 .mode = 0644,
2938 .proc_handler = proc_dointvec_jiffies,
2939 },
2940 {
2941 .procname = "gc_interval",
2942 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2943 .maxlen = sizeof(int),
2944 .mode = 0644,
2945 .proc_handler = proc_dointvec_jiffies,
2946 },
2947 {
2948 .procname = "gc_elasticity",
2949 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2950 .maxlen = sizeof(int),
2951 .mode = 0644,
2952 .proc_handler = proc_dointvec,
2953 },
2954 {
2955 .procname = "mtu_expires",
2956 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2957 .maxlen = sizeof(int),
2958 .mode = 0644,
2959 .proc_handler = proc_dointvec_jiffies,
2960 },
2961 {
2962 .procname = "min_adv_mss",
2963 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2964 .maxlen = sizeof(int),
2965 .mode = 0644,
2966 .proc_handler = proc_dointvec,
2967 },
2968 {
2969 .procname = "gc_min_interval_ms",
2970 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2971 .maxlen = sizeof(int),
2972 .mode = 0644,
2973 .proc_handler = proc_dointvec_ms_jiffies,
2974 },
2975 { }
2976 };
2977
2978 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2979 {
2980 struct ctl_table *table;
2981
2982 table = kmemdup(ipv6_route_table_template,
2983 sizeof(ipv6_route_table_template),
2984 GFP_KERNEL);
2985
2986 if (table) {
2987 table[0].data = &net->ipv6.sysctl.flush_delay;
2988 table[0].extra1 = net;
2989 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2990 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2997 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2998
2999 /* Don't export sysctls to unprivileged users */
3000 if (net->user_ns != &init_user_ns)
3001 table[0].procname = NULL;
3002 }
3003
3004 return table;
3005 }
3006 #endif
3007
3008 static int __net_init ip6_route_net_init(struct net *net)
3009 {
3010 int ret = -ENOMEM;
3011
3012 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3013 sizeof(net->ipv6.ip6_dst_ops));
3014
3015 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3016 goto out_ip6_dst_ops;
3017
3018 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3019 sizeof(*net->ipv6.ip6_null_entry),
3020 GFP_KERNEL);
3021 if (!net->ipv6.ip6_null_entry)
3022 goto out_ip6_dst_entries;
3023 net->ipv6.ip6_null_entry->dst.path =
3024 (struct dst_entry *)net->ipv6.ip6_null_entry;
3025 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3026 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3027 ip6_template_metrics, true);
3028
3029 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3030 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3031 sizeof(*net->ipv6.ip6_prohibit_entry),
3032 GFP_KERNEL);
3033 if (!net->ipv6.ip6_prohibit_entry)
3034 goto out_ip6_null_entry;
3035 net->ipv6.ip6_prohibit_entry->dst.path =
3036 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3037 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3038 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3039 ip6_template_metrics, true);
3040
3041 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3042 sizeof(*net->ipv6.ip6_blk_hole_entry),
3043 GFP_KERNEL);
3044 if (!net->ipv6.ip6_blk_hole_entry)
3045 goto out_ip6_prohibit_entry;
3046 net->ipv6.ip6_blk_hole_entry->dst.path =
3047 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3048 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3049 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3050 ip6_template_metrics, true);
3051 #endif
3052
3053 net->ipv6.sysctl.flush_delay = 0;
3054 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3055 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3056 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3057 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3058 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3059 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3060 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3061
3062 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3063
3064 ret = 0;
3065 out:
3066 return ret;
3067
3068 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3069 out_ip6_prohibit_entry:
3070 kfree(net->ipv6.ip6_prohibit_entry);
3071 out_ip6_null_entry:
3072 kfree(net->ipv6.ip6_null_entry);
3073 #endif
3074 out_ip6_dst_entries:
3075 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 out_ip6_dst_ops:
3077 goto out;
3078 }
3079
3080 static void __net_exit ip6_route_net_exit(struct net *net)
3081 {
3082 kfree(net->ipv6.ip6_null_entry);
3083 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3084 kfree(net->ipv6.ip6_prohibit_entry);
3085 kfree(net->ipv6.ip6_blk_hole_entry);
3086 #endif
3087 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3088 }
3089
3090 static int __net_init ip6_route_net_init_late(struct net *net)
3091 {
3092 #ifdef CONFIG_PROC_FS
3093 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3094 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3095 #endif
3096 return 0;
3097 }
3098
3099 static void __net_exit ip6_route_net_exit_late(struct net *net)
3100 {
3101 #ifdef CONFIG_PROC_FS
3102 proc_net_remove(net, "ipv6_route");
3103 proc_net_remove(net, "rt6_stats");
3104 #endif
3105 }
3106
3107 static struct pernet_operations ip6_route_net_ops = {
3108 .init = ip6_route_net_init,
3109 .exit = ip6_route_net_exit,
3110 };
3111
3112 static int __net_init ipv6_inetpeer_init(struct net *net)
3113 {
3114 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3115
3116 if (!bp)
3117 return -ENOMEM;
3118 inet_peer_base_init(bp);
3119 net->ipv6.peers = bp;
3120 return 0;
3121 }
3122
3123 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3124 {
3125 struct inet_peer_base *bp = net->ipv6.peers;
3126
3127 net->ipv6.peers = NULL;
3128 inetpeer_invalidate_tree(bp);
3129 kfree(bp);
3130 }
3131
3132 static struct pernet_operations ipv6_inetpeer_ops = {
3133 .init = ipv6_inetpeer_init,
3134 .exit = ipv6_inetpeer_exit,
3135 };
3136
3137 static struct pernet_operations ip6_route_net_late_ops = {
3138 .init = ip6_route_net_init_late,
3139 .exit = ip6_route_net_exit_late,
3140 };
3141
3142 static struct notifier_block ip6_route_dev_notifier = {
3143 .notifier_call = ip6_route_dev_notify,
3144 .priority = 0,
3145 };
3146
3147 int __init ip6_route_init(void)
3148 {
3149 int ret;
3150
3151 ret = -ENOMEM;
3152 ip6_dst_ops_template.kmem_cachep =
3153 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3154 SLAB_HWCACHE_ALIGN, NULL);
3155 if (!ip6_dst_ops_template.kmem_cachep)
3156 goto out;
3157
3158 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3159 if (ret)
3160 goto out_kmem_cache;
3161
3162 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3163 if (ret)
3164 goto out_dst_entries;
3165
3166 ret = register_pernet_subsys(&ip6_route_net_ops);
3167 if (ret)
3168 goto out_register_inetpeer;
3169
3170 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3171
3172 /* Registering of the loopback is done before this portion of code,
3173 * the loopback reference in rt6_info will not be taken, do it
3174 * manually for init_net */
3175 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3176 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3177 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3178 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3179 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3180 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3181 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3182 #endif
3183 ret = fib6_init();
3184 if (ret)
3185 goto out_register_subsys;
3186
3187 ret = xfrm6_init();
3188 if (ret)
3189 goto out_fib6_init;
3190
3191 ret = fib6_rules_init();
3192 if (ret)
3193 goto xfrm6_init;
3194
3195 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3196 if (ret)
3197 goto fib6_rules_init;
3198
3199 ret = -ENOBUFS;
3200 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3201 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3202 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3203 goto out_register_late_subsys;
3204
3205 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3206 if (ret)
3207 goto out_register_late_subsys;
3208
3209 out:
3210 return ret;
3211
3212 out_register_late_subsys:
3213 unregister_pernet_subsys(&ip6_route_net_late_ops);
3214 fib6_rules_init:
3215 fib6_rules_cleanup();
3216 xfrm6_init:
3217 xfrm6_fini();
3218 out_fib6_init:
3219 fib6_gc_cleanup();
3220 out_register_subsys:
3221 unregister_pernet_subsys(&ip6_route_net_ops);
3222 out_register_inetpeer:
3223 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3224 out_dst_entries:
3225 dst_entries_destroy(&ip6_dst_blackhole_ops);
3226 out_kmem_cache:
3227 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3228 goto out;
3229 }
3230
3231 void ip6_route_cleanup(void)
3232 {
3233 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3234 unregister_pernet_subsys(&ip6_route_net_late_ops);
3235 fib6_rules_cleanup();
3236 xfrm6_fini();
3237 fib6_gc_cleanup();
3238 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3239 unregister_pernet_subsys(&ip6_route_net_ops);
3240 dst_entries_destroy(&ip6_dst_blackhole_ops);
3241 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3242 }
This page took 0.098448 seconds and 5 git commands to generate.