ipv6: Stop /128 route from disappearing after pmtu update
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
72 RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
85
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 const struct in6_addr *prefix, int prefixlen,
100 const struct in6_addr *gwaddr, int ifindex,
101 unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 const struct in6_addr *prefix, int prefixlen,
104 const struct in6_addr *gwaddr, int ifindex);
105 #endif
106
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109 struct inet_peer_base *base;
110 struct inet_peer *peer;
111
112 base = inetpeer_base_ptr(rt->_rt6i_peer);
113 if (!base)
114 return;
115
116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 if (peer) {
118 if (!rt6_set_peer(rt, peer))
119 inet_putpeer(peer);
120 }
121 }
122
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125 if (rt6_has_peer(rt))
126 return rt6_peer_ptr(rt);
127
128 rt6_bind_peer(rt, create);
129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134 return __rt6_get_peer(rt, 1);
135 }
136
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139 struct rt6_info *rt = (struct rt6_info *) dst;
140 struct inet_peer *peer;
141 u32 *p = NULL;
142
143 if (!(rt->dst.flags & DST_HOST))
144 return dst_cow_metrics_generic(dst, old);
145
146 peer = rt6_get_peer_create(rt);
147 if (peer) {
148 u32 *old_p = __DST_METRICS_PTR(old);
149 unsigned long prev, new;
150
151 p = peer->metrics;
152 if (inet_metrics_new(peer) ||
153 (old & DST_METRICS_FORCE_OVERWRITE))
154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156 new = (unsigned long) p;
157 prev = cmpxchg(&dst->_metrics, old, new);
158
159 if (prev != old) {
160 p = __DST_METRICS_PTR(prev);
161 if (prev & DST_METRICS_READ_ONLY)
162 p = NULL;
163 }
164 }
165 return p;
166 }
167
168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 struct sk_buff *skb,
170 const void *daddr)
171 {
172 struct in6_addr *p = &rt->rt6i_gateway;
173
174 if (!ipv6_addr_any(p))
175 return (const void *) p;
176 else if (skb)
177 return &ipv6_hdr(skb)->daddr;
178 return daddr;
179 }
180
181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 struct sk_buff *skb,
183 const void *daddr)
184 {
185 struct rt6_info *rt = (struct rt6_info *) dst;
186 struct neighbour *n;
187
188 daddr = choose_neigh_daddr(rt, skb, daddr);
189 n = __ipv6_neigh_lookup(dst->dev, daddr);
190 if (n)
191 return n;
192 return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194
195 static struct dst_ops ip6_dst_ops_template = {
196 .family = AF_INET6,
197 .gc = ip6_dst_gc,
198 .gc_thresh = 1024,
199 .check = ip6_dst_check,
200 .default_advmss = ip6_default_advmss,
201 .mtu = ip6_mtu,
202 .cow_metrics = ipv6_cow_metrics,
203 .destroy = ip6_dst_destroy,
204 .ifdown = ip6_dst_ifdown,
205 .negative_advice = ip6_negative_advice,
206 .link_failure = ip6_link_failure,
207 .update_pmtu = ip6_rt_update_pmtu,
208 .redirect = rt6_do_redirect,
209 .local_out = __ip6_local_out,
210 .neigh_lookup = ip6_neigh_lookup,
211 };
212
213 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214 {
215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217 return mtu ? : dst->dev->mtu;
218 }
219
220 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 struct sk_buff *skb, u32 mtu)
222 {
223 }
224
225 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 struct sk_buff *skb)
227 {
228 }
229
230 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 unsigned long old)
232 {
233 return NULL;
234 }
235
236 static struct dst_ops ip6_dst_blackhole_ops = {
237 .family = AF_INET6,
238 .destroy = ip6_dst_destroy,
239 .check = ip6_dst_check,
240 .mtu = ip6_blackhole_mtu,
241 .default_advmss = ip6_default_advmss,
242 .update_pmtu = ip6_rt_blackhole_update_pmtu,
243 .redirect = ip6_rt_blackhole_redirect,
244 .cow_metrics = ip6_rt_blackhole_cow_metrics,
245 .neigh_lookup = ip6_neigh_lookup,
246 };
247
248 static const u32 ip6_template_metrics[RTAX_MAX] = {
249 [RTAX_HOPLIMIT - 1] = 0,
250 };
251
252 static const struct rt6_info ip6_null_entry_template = {
253 .dst = {
254 .__refcnt = ATOMIC_INIT(1),
255 .__use = 1,
256 .obsolete = DST_OBSOLETE_FORCE_CHK,
257 .error = -ENETUNREACH,
258 .input = ip6_pkt_discard,
259 .output = ip6_pkt_discard_out,
260 },
261 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
262 .rt6i_protocol = RTPROT_KERNEL,
263 .rt6i_metric = ~(u32) 0,
264 .rt6i_ref = ATOMIC_INIT(1),
265 };
266
267 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
268
269 static const struct rt6_info ip6_prohibit_entry_template = {
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
273 .obsolete = DST_OBSOLETE_FORCE_CHK,
274 .error = -EACCES,
275 .input = ip6_pkt_prohibit,
276 .output = ip6_pkt_prohibit_out,
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
279 .rt6i_protocol = RTPROT_KERNEL,
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282 };
283
284 static const struct rt6_info ip6_blk_hole_entry_template = {
285 .dst = {
286 .__refcnt = ATOMIC_INIT(1),
287 .__use = 1,
288 .obsolete = DST_OBSOLETE_FORCE_CHK,
289 .error = -EINVAL,
290 .input = dst_discard,
291 .output = dst_discard_sk,
292 },
293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .rt6i_protocol = RTPROT_KERNEL,
295 .rt6i_metric = ~(u32) 0,
296 .rt6i_ref = ATOMIC_INIT(1),
297 };
298
299 #endif
300
301 /* allocate dst with ip6_dst_ops */
302 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
303 struct net_device *dev,
304 int flags,
305 struct fib6_table *table)
306 {
307 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
308 0, DST_OBSOLETE_FORCE_CHK, flags);
309
310 if (rt) {
311 struct dst_entry *dst = &rt->dst;
312
313 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
314 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
315 INIT_LIST_HEAD(&rt->rt6i_siblings);
316 }
317 return rt;
318 }
319
320 static void ip6_dst_destroy(struct dst_entry *dst)
321 {
322 struct rt6_info *rt = (struct rt6_info *)dst;
323 struct inet6_dev *idev = rt->rt6i_idev;
324 struct dst_entry *from = dst->from;
325
326 if (!(rt->dst.flags & DST_HOST))
327 dst_destroy_metrics_generic(dst);
328
329 if (idev) {
330 rt->rt6i_idev = NULL;
331 in6_dev_put(idev);
332 }
333
334 dst->from = NULL;
335 dst_release(from);
336
337 if (rt6_has_peer(rt)) {
338 struct inet_peer *peer = rt6_peer_ptr(rt);
339 inet_putpeer(peer);
340 }
341 }
342
343 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
344 int how)
345 {
346 struct rt6_info *rt = (struct rt6_info *)dst;
347 struct inet6_dev *idev = rt->rt6i_idev;
348 struct net_device *loopback_dev =
349 dev_net(dev)->loopback_dev;
350
351 if (dev != loopback_dev) {
352 if (idev && idev->dev == dev) {
353 struct inet6_dev *loopback_idev =
354 in6_dev_get(loopback_dev);
355 if (loopback_idev) {
356 rt->rt6i_idev = loopback_idev;
357 in6_dev_put(idev);
358 }
359 }
360 }
361 }
362
363 static bool rt6_check_expired(const struct rt6_info *rt)
364 {
365 if (rt->rt6i_flags & RTF_EXPIRES) {
366 if (time_after(jiffies, rt->dst.expires))
367 return true;
368 } else if (rt->dst.from) {
369 return rt6_check_expired((struct rt6_info *) rt->dst.from);
370 }
371 return false;
372 }
373
374 /* Multipath route selection:
375 * Hash based function using packet header and flowlabel.
376 * Adapted from fib_info_hashfn()
377 */
378 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
379 const struct flowi6 *fl6)
380 {
381 unsigned int val = fl6->flowi6_proto;
382
383 val ^= ipv6_addr_hash(&fl6->daddr);
384 val ^= ipv6_addr_hash(&fl6->saddr);
385
386 /* Work only if this not encapsulated */
387 switch (fl6->flowi6_proto) {
388 case IPPROTO_UDP:
389 case IPPROTO_TCP:
390 case IPPROTO_SCTP:
391 val ^= (__force u16)fl6->fl6_sport;
392 val ^= (__force u16)fl6->fl6_dport;
393 break;
394
395 case IPPROTO_ICMPV6:
396 val ^= (__force u16)fl6->fl6_icmp_type;
397 val ^= (__force u16)fl6->fl6_icmp_code;
398 break;
399 }
400 /* RFC6438 recommands to use flowlabel */
401 val ^= (__force u32)fl6->flowlabel;
402
403 /* Perhaps, we need to tune, this function? */
404 val = val ^ (val >> 7) ^ (val >> 12);
405 return val % candidate_count;
406 }
407
408 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
409 struct flowi6 *fl6, int oif,
410 int strict)
411 {
412 struct rt6_info *sibling, *next_sibling;
413 int route_choosen;
414
415 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
416 /* Don't change the route, if route_choosen == 0
417 * (siblings does not include ourself)
418 */
419 if (route_choosen)
420 list_for_each_entry_safe(sibling, next_sibling,
421 &match->rt6i_siblings, rt6i_siblings) {
422 route_choosen--;
423 if (route_choosen == 0) {
424 if (rt6_score_route(sibling, oif, strict) < 0)
425 break;
426 match = sibling;
427 break;
428 }
429 }
430 return match;
431 }
432
433 /*
434 * Route lookup. Any table->tb6_lock is implied.
435 */
436
437 static inline struct rt6_info *rt6_device_match(struct net *net,
438 struct rt6_info *rt,
439 const struct in6_addr *saddr,
440 int oif,
441 int flags)
442 {
443 struct rt6_info *local = NULL;
444 struct rt6_info *sprt;
445
446 if (!oif && ipv6_addr_any(saddr))
447 goto out;
448
449 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
450 struct net_device *dev = sprt->dst.dev;
451
452 if (oif) {
453 if (dev->ifindex == oif)
454 return sprt;
455 if (dev->flags & IFF_LOOPBACK) {
456 if (!sprt->rt6i_idev ||
457 sprt->rt6i_idev->dev->ifindex != oif) {
458 if (flags & RT6_LOOKUP_F_IFACE && oif)
459 continue;
460 if (local && (!oif ||
461 local->rt6i_idev->dev->ifindex == oif))
462 continue;
463 }
464 local = sprt;
465 }
466 } else {
467 if (ipv6_chk_addr(net, saddr, dev,
468 flags & RT6_LOOKUP_F_IFACE))
469 return sprt;
470 }
471 }
472
473 if (oif) {
474 if (local)
475 return local;
476
477 if (flags & RT6_LOOKUP_F_IFACE)
478 return net->ipv6.ip6_null_entry;
479 }
480 out:
481 return rt;
482 }
483
484 #ifdef CONFIG_IPV6_ROUTER_PREF
485 struct __rt6_probe_work {
486 struct work_struct work;
487 struct in6_addr target;
488 struct net_device *dev;
489 };
490
491 static void rt6_probe_deferred(struct work_struct *w)
492 {
493 struct in6_addr mcaddr;
494 struct __rt6_probe_work *work =
495 container_of(w, struct __rt6_probe_work, work);
496
497 addrconf_addr_solict_mult(&work->target, &mcaddr);
498 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
499 dev_put(work->dev);
500 kfree(work);
501 }
502
503 static void rt6_probe(struct rt6_info *rt)
504 {
505 struct neighbour *neigh;
506 /*
507 * Okay, this does not seem to be appropriate
508 * for now, however, we need to check if it
509 * is really so; aka Router Reachability Probing.
510 *
511 * Router Reachability Probe MUST be rate-limited
512 * to no more than one per minute.
513 */
514 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
515 return;
516 rcu_read_lock_bh();
517 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
518 if (neigh) {
519 write_lock(&neigh->lock);
520 if (neigh->nud_state & NUD_VALID)
521 goto out;
522 }
523
524 if (!neigh ||
525 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
526 struct __rt6_probe_work *work;
527
528 work = kmalloc(sizeof(*work), GFP_ATOMIC);
529
530 if (neigh && work)
531 __neigh_set_probe_once(neigh);
532
533 if (neigh)
534 write_unlock(&neigh->lock);
535
536 if (work) {
537 INIT_WORK(&work->work, rt6_probe_deferred);
538 work->target = rt->rt6i_gateway;
539 dev_hold(rt->dst.dev);
540 work->dev = rt->dst.dev;
541 schedule_work(&work->work);
542 }
543 } else {
544 out:
545 write_unlock(&neigh->lock);
546 }
547 rcu_read_unlock_bh();
548 }
549 #else
550 static inline void rt6_probe(struct rt6_info *rt)
551 {
552 }
553 #endif
554
555 /*
556 * Default Router Selection (RFC 2461 6.3.6)
557 */
558 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
559 {
560 struct net_device *dev = rt->dst.dev;
561 if (!oif || dev->ifindex == oif)
562 return 2;
563 if ((dev->flags & IFF_LOOPBACK) &&
564 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
565 return 1;
566 return 0;
567 }
568
569 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
570 {
571 struct neighbour *neigh;
572 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
573
574 if (rt->rt6i_flags & RTF_NONEXTHOP ||
575 !(rt->rt6i_flags & RTF_GATEWAY))
576 return RT6_NUD_SUCCEED;
577
578 rcu_read_lock_bh();
579 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
580 if (neigh) {
581 read_lock(&neigh->lock);
582 if (neigh->nud_state & NUD_VALID)
583 ret = RT6_NUD_SUCCEED;
584 #ifdef CONFIG_IPV6_ROUTER_PREF
585 else if (!(neigh->nud_state & NUD_FAILED))
586 ret = RT6_NUD_SUCCEED;
587 else
588 ret = RT6_NUD_FAIL_PROBE;
589 #endif
590 read_unlock(&neigh->lock);
591 } else {
592 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
593 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
594 }
595 rcu_read_unlock_bh();
596
597 return ret;
598 }
599
600 static int rt6_score_route(struct rt6_info *rt, int oif,
601 int strict)
602 {
603 int m;
604
605 m = rt6_check_dev(rt, oif);
606 if (!m && (strict & RT6_LOOKUP_F_IFACE))
607 return RT6_NUD_FAIL_HARD;
608 #ifdef CONFIG_IPV6_ROUTER_PREF
609 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
610 #endif
611 if (strict & RT6_LOOKUP_F_REACHABLE) {
612 int n = rt6_check_neigh(rt);
613 if (n < 0)
614 return n;
615 }
616 return m;
617 }
618
619 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
620 int *mpri, struct rt6_info *match,
621 bool *do_rr)
622 {
623 int m;
624 bool match_do_rr = false;
625
626 if (rt6_check_expired(rt))
627 goto out;
628
629 m = rt6_score_route(rt, oif, strict);
630 if (m == RT6_NUD_FAIL_DO_RR) {
631 match_do_rr = true;
632 m = 0; /* lowest valid score */
633 } else if (m == RT6_NUD_FAIL_HARD) {
634 goto out;
635 }
636
637 if (strict & RT6_LOOKUP_F_REACHABLE)
638 rt6_probe(rt);
639
640 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
641 if (m > *mpri) {
642 *do_rr = match_do_rr;
643 *mpri = m;
644 match = rt;
645 }
646 out:
647 return match;
648 }
649
650 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
651 struct rt6_info *rr_head,
652 u32 metric, int oif, int strict,
653 bool *do_rr)
654 {
655 struct rt6_info *rt, *match, *cont;
656 int mpri = -1;
657
658 match = NULL;
659 cont = NULL;
660 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
661 if (rt->rt6i_metric != metric) {
662 cont = rt;
663 break;
664 }
665
666 match = find_match(rt, oif, strict, &mpri, match, do_rr);
667 }
668
669 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
670 if (rt->rt6i_metric != metric) {
671 cont = rt;
672 break;
673 }
674
675 match = find_match(rt, oif, strict, &mpri, match, do_rr);
676 }
677
678 if (match || !cont)
679 return match;
680
681 for (rt = cont; rt; rt = rt->dst.rt6_next)
682 match = find_match(rt, oif, strict, &mpri, match, do_rr);
683
684 return match;
685 }
686
687 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
688 {
689 struct rt6_info *match, *rt0;
690 struct net *net;
691 bool do_rr = false;
692
693 rt0 = fn->rr_ptr;
694 if (!rt0)
695 fn->rr_ptr = rt0 = fn->leaf;
696
697 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
698 &do_rr);
699
700 if (do_rr) {
701 struct rt6_info *next = rt0->dst.rt6_next;
702
703 /* no entries matched; do round-robin */
704 if (!next || next->rt6i_metric != rt0->rt6i_metric)
705 next = fn->leaf;
706
707 if (next != rt0)
708 fn->rr_ptr = next;
709 }
710
711 net = dev_net(rt0->dst.dev);
712 return match ? match : net->ipv6.ip6_null_entry;
713 }
714
715 #ifdef CONFIG_IPV6_ROUTE_INFO
716 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
717 const struct in6_addr *gwaddr)
718 {
719 struct net *net = dev_net(dev);
720 struct route_info *rinfo = (struct route_info *) opt;
721 struct in6_addr prefix_buf, *prefix;
722 unsigned int pref;
723 unsigned long lifetime;
724 struct rt6_info *rt;
725
726 if (len < sizeof(struct route_info)) {
727 return -EINVAL;
728 }
729
730 /* Sanity check for prefix_len and length */
731 if (rinfo->length > 3) {
732 return -EINVAL;
733 } else if (rinfo->prefix_len > 128) {
734 return -EINVAL;
735 } else if (rinfo->prefix_len > 64) {
736 if (rinfo->length < 2) {
737 return -EINVAL;
738 }
739 } else if (rinfo->prefix_len > 0) {
740 if (rinfo->length < 1) {
741 return -EINVAL;
742 }
743 }
744
745 pref = rinfo->route_pref;
746 if (pref == ICMPV6_ROUTER_PREF_INVALID)
747 return -EINVAL;
748
749 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
750
751 if (rinfo->length == 3)
752 prefix = (struct in6_addr *)rinfo->prefix;
753 else {
754 /* this function is safe */
755 ipv6_addr_prefix(&prefix_buf,
756 (struct in6_addr *)rinfo->prefix,
757 rinfo->prefix_len);
758 prefix = &prefix_buf;
759 }
760
761 if (rinfo->prefix_len == 0)
762 rt = rt6_get_dflt_router(gwaddr, dev);
763 else
764 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
765 gwaddr, dev->ifindex);
766
767 if (rt && !lifetime) {
768 ip6_del_rt(rt);
769 rt = NULL;
770 }
771
772 if (!rt && lifetime)
773 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
774 pref);
775 else if (rt)
776 rt->rt6i_flags = RTF_ROUTEINFO |
777 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
778
779 if (rt) {
780 if (!addrconf_finite_timeout(lifetime))
781 rt6_clean_expires(rt);
782 else
783 rt6_set_expires(rt, jiffies + HZ * lifetime);
784
785 ip6_rt_put(rt);
786 }
787 return 0;
788 }
789 #endif
790
791 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
792 struct in6_addr *saddr)
793 {
794 struct fib6_node *pn;
795 while (1) {
796 if (fn->fn_flags & RTN_TL_ROOT)
797 return NULL;
798 pn = fn->parent;
799 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
800 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
801 else
802 fn = pn;
803 if (fn->fn_flags & RTN_RTINFO)
804 return fn;
805 }
806 }
807
808 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
809 struct fib6_table *table,
810 struct flowi6 *fl6, int flags)
811 {
812 struct fib6_node *fn;
813 struct rt6_info *rt;
814
815 read_lock_bh(&table->tb6_lock);
816 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
817 restart:
818 rt = fn->leaf;
819 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
820 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
821 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
822 if (rt == net->ipv6.ip6_null_entry) {
823 fn = fib6_backtrack(fn, &fl6->saddr);
824 if (fn)
825 goto restart;
826 }
827 dst_use(&rt->dst, jiffies);
828 read_unlock_bh(&table->tb6_lock);
829 return rt;
830
831 }
832
833 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
834 int flags)
835 {
836 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
837 }
838 EXPORT_SYMBOL_GPL(ip6_route_lookup);
839
840 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
841 const struct in6_addr *saddr, int oif, int strict)
842 {
843 struct flowi6 fl6 = {
844 .flowi6_oif = oif,
845 .daddr = *daddr,
846 };
847 struct dst_entry *dst;
848 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
849
850 if (saddr) {
851 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
852 flags |= RT6_LOOKUP_F_HAS_SADDR;
853 }
854
855 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
856 if (dst->error == 0)
857 return (struct rt6_info *) dst;
858
859 dst_release(dst);
860
861 return NULL;
862 }
863 EXPORT_SYMBOL(rt6_lookup);
864
865 /* ip6_ins_rt is called with FREE table->tb6_lock.
866 It takes new route entry, the addition fails by any reason the
867 route is freed. In any case, if caller does not hold it, it may
868 be destroyed.
869 */
870
871 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
872 struct mx6_config *mxc)
873 {
874 int err;
875 struct fib6_table *table;
876
877 table = rt->rt6i_table;
878 write_lock_bh(&table->tb6_lock);
879 err = fib6_add(&table->tb6_root, rt, info, mxc);
880 write_unlock_bh(&table->tb6_lock);
881
882 return err;
883 }
884
885 int ip6_ins_rt(struct rt6_info *rt)
886 {
887 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
888 struct mx6_config mxc = { .mx = NULL, };
889
890 return __ip6_ins_rt(rt, &info, &mxc);
891 }
892
893 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
894 const struct in6_addr *daddr,
895 const struct in6_addr *saddr)
896 {
897 struct rt6_info *rt;
898
899 /*
900 * Clone the route.
901 */
902
903 rt = ip6_rt_copy(ort, daddr);
904
905 if (rt) {
906 if (ort->rt6i_dst.plen != 128 &&
907 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
908 rt->rt6i_flags |= RTF_ANYCAST;
909
910 rt->rt6i_flags |= RTF_CACHE;
911
912 #ifdef CONFIG_IPV6_SUBTREES
913 if (rt->rt6i_src.plen && saddr) {
914 rt->rt6i_src.addr = *saddr;
915 rt->rt6i_src.plen = 128;
916 }
917 #endif
918 }
919
920 return rt;
921 }
922
923 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
924 const struct in6_addr *daddr)
925 {
926 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
927
928 if (rt)
929 rt->rt6i_flags |= RTF_CACHE;
930 return rt;
931 }
932
933 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
934 struct flowi6 *fl6, int flags)
935 {
936 struct fib6_node *fn, *saved_fn;
937 struct rt6_info *rt, *nrt;
938 int strict = 0;
939 int attempts = 3;
940 int err;
941
942 strict |= flags & RT6_LOOKUP_F_IFACE;
943 if (net->ipv6.devconf_all->forwarding == 0)
944 strict |= RT6_LOOKUP_F_REACHABLE;
945
946 redo_fib6_lookup_lock:
947 read_lock_bh(&table->tb6_lock);
948
949 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
950 saved_fn = fn;
951
952 redo_rt6_select:
953 rt = rt6_select(fn, oif, strict);
954 if (rt->rt6i_nsiblings)
955 rt = rt6_multipath_select(rt, fl6, oif, strict);
956 if (rt == net->ipv6.ip6_null_entry) {
957 fn = fib6_backtrack(fn, &fl6->saddr);
958 if (fn)
959 goto redo_rt6_select;
960 else if (strict & RT6_LOOKUP_F_REACHABLE) {
961 /* also consider unreachable route */
962 strict &= ~RT6_LOOKUP_F_REACHABLE;
963 fn = saved_fn;
964 goto redo_rt6_select;
965 } else {
966 dst_hold(&rt->dst);
967 read_unlock_bh(&table->tb6_lock);
968 goto out2;
969 }
970 }
971
972 dst_hold(&rt->dst);
973 read_unlock_bh(&table->tb6_lock);
974
975 if (rt->rt6i_flags & RTF_CACHE)
976 goto out2;
977
978 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
979 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
980 else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
981 nrt = rt6_alloc_clone(rt, &fl6->daddr);
982 else
983 goto out2;
984
985 ip6_rt_put(rt);
986 rt = nrt ? : net->ipv6.ip6_null_entry;
987
988 dst_hold(&rt->dst);
989 if (nrt) {
990 err = ip6_ins_rt(nrt);
991 if (!err)
992 goto out2;
993 }
994
995 if (--attempts <= 0)
996 goto out2;
997
998 /*
999 * Race condition! In the gap, when table->tb6_lock was
1000 * released someone could insert this route. Relookup.
1001 */
1002 ip6_rt_put(rt);
1003 goto redo_fib6_lookup_lock;
1004
1005 out2:
1006 rt->dst.lastuse = jiffies;
1007 rt->dst.__use++;
1008
1009 return rt;
1010 }
1011
1012 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1013 struct flowi6 *fl6, int flags)
1014 {
1015 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1016 }
1017
1018 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1019 struct net_device *dev,
1020 struct flowi6 *fl6, int flags)
1021 {
1022 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1023 flags |= RT6_LOOKUP_F_IFACE;
1024
1025 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1026 }
1027
1028 void ip6_route_input(struct sk_buff *skb)
1029 {
1030 const struct ipv6hdr *iph = ipv6_hdr(skb);
1031 struct net *net = dev_net(skb->dev);
1032 int flags = RT6_LOOKUP_F_HAS_SADDR;
1033 struct flowi6 fl6 = {
1034 .flowi6_iif = skb->dev->ifindex,
1035 .daddr = iph->daddr,
1036 .saddr = iph->saddr,
1037 .flowlabel = ip6_flowinfo(iph),
1038 .flowi6_mark = skb->mark,
1039 .flowi6_proto = iph->nexthdr,
1040 };
1041
1042 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1043 }
1044
1045 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1046 struct flowi6 *fl6, int flags)
1047 {
1048 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1049 }
1050
1051 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1052 struct flowi6 *fl6)
1053 {
1054 int flags = 0;
1055
1056 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1057
1058 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1059 flags |= RT6_LOOKUP_F_IFACE;
1060
1061 if (!ipv6_addr_any(&fl6->saddr))
1062 flags |= RT6_LOOKUP_F_HAS_SADDR;
1063 else if (sk)
1064 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1065
1066 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1067 }
1068 EXPORT_SYMBOL(ip6_route_output);
1069
1070 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1071 {
1072 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1073 struct dst_entry *new = NULL;
1074
1075 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1076 if (rt) {
1077 new = &rt->dst;
1078
1079 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1080 rt6_init_peer(rt, net->ipv6.peers);
1081
1082 new->__use = 1;
1083 new->input = dst_discard;
1084 new->output = dst_discard_sk;
1085
1086 if (dst_metrics_read_only(&ort->dst))
1087 new->_metrics = ort->dst._metrics;
1088 else
1089 dst_copy_metrics(new, &ort->dst);
1090 rt->rt6i_idev = ort->rt6i_idev;
1091 if (rt->rt6i_idev)
1092 in6_dev_hold(rt->rt6i_idev);
1093
1094 rt->rt6i_gateway = ort->rt6i_gateway;
1095 rt->rt6i_flags = ort->rt6i_flags;
1096 rt->rt6i_metric = 0;
1097
1098 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1099 #ifdef CONFIG_IPV6_SUBTREES
1100 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1101 #endif
1102
1103 dst_free(new);
1104 }
1105
1106 dst_release(dst_orig);
1107 return new ? new : ERR_PTR(-ENOMEM);
1108 }
1109
1110 /*
1111 * Destination cache support functions
1112 */
1113
1114 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1115 {
1116 struct rt6_info *rt;
1117
1118 rt = (struct rt6_info *) dst;
1119
1120 /* All IPV6 dsts are created with ->obsolete set to the value
1121 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1122 * into this function always.
1123 */
1124 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1125 return NULL;
1126
1127 if (rt6_check_expired(rt))
1128 return NULL;
1129
1130 return dst;
1131 }
1132
1133 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1134 {
1135 struct rt6_info *rt = (struct rt6_info *) dst;
1136
1137 if (rt) {
1138 if (rt->rt6i_flags & RTF_CACHE) {
1139 if (rt6_check_expired(rt)) {
1140 ip6_del_rt(rt);
1141 dst = NULL;
1142 }
1143 } else {
1144 dst_release(dst);
1145 dst = NULL;
1146 }
1147 }
1148 return dst;
1149 }
1150
1151 static void ip6_link_failure(struct sk_buff *skb)
1152 {
1153 struct rt6_info *rt;
1154
1155 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1156
1157 rt = (struct rt6_info *) skb_dst(skb);
1158 if (rt) {
1159 if (rt->rt6i_flags & RTF_CACHE) {
1160 dst_hold(&rt->dst);
1161 if (ip6_del_rt(rt))
1162 dst_free(&rt->dst);
1163 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1164 rt->rt6i_node->fn_sernum = -1;
1165 }
1166 }
1167 }
1168
1169 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1170 struct sk_buff *skb, u32 mtu)
1171 {
1172 struct rt6_info *rt6 = (struct rt6_info *)dst;
1173
1174 dst_confirm(dst);
1175 if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1176 struct net *net = dev_net(dst->dev);
1177
1178 rt6->rt6i_flags |= RTF_MODIFIED;
1179 if (mtu < IPV6_MIN_MTU)
1180 mtu = IPV6_MIN_MTU;
1181
1182 dst_metric_set(dst, RTAX_MTU, mtu);
1183 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1184 }
1185 }
1186
1187 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1188 int oif, u32 mark)
1189 {
1190 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1191 struct dst_entry *dst;
1192 struct flowi6 fl6;
1193
1194 memset(&fl6, 0, sizeof(fl6));
1195 fl6.flowi6_oif = oif;
1196 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1197 fl6.daddr = iph->daddr;
1198 fl6.saddr = iph->saddr;
1199 fl6.flowlabel = ip6_flowinfo(iph);
1200
1201 dst = ip6_route_output(net, NULL, &fl6);
1202 if (!dst->error)
1203 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1204 dst_release(dst);
1205 }
1206 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1207
1208 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1209 {
1210 ip6_update_pmtu(skb, sock_net(sk), mtu,
1211 sk->sk_bound_dev_if, sk->sk_mark);
1212 }
1213 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1214
1215 /* Handle redirects */
1216 struct ip6rd_flowi {
1217 struct flowi6 fl6;
1218 struct in6_addr gateway;
1219 };
1220
1221 static struct rt6_info *__ip6_route_redirect(struct net *net,
1222 struct fib6_table *table,
1223 struct flowi6 *fl6,
1224 int flags)
1225 {
1226 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1227 struct rt6_info *rt;
1228 struct fib6_node *fn;
1229
1230 /* Get the "current" route for this destination and
1231 * check if the redirect has come from approriate router.
1232 *
1233 * RFC 4861 specifies that redirects should only be
1234 * accepted if they come from the nexthop to the target.
1235 * Due to the way the routes are chosen, this notion
1236 * is a bit fuzzy and one might need to check all possible
1237 * routes.
1238 */
1239
1240 read_lock_bh(&table->tb6_lock);
1241 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1242 restart:
1243 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1244 if (rt6_check_expired(rt))
1245 continue;
1246 if (rt->dst.error)
1247 break;
1248 if (!(rt->rt6i_flags & RTF_GATEWAY))
1249 continue;
1250 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1251 continue;
1252 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1253 continue;
1254 break;
1255 }
1256
1257 if (!rt)
1258 rt = net->ipv6.ip6_null_entry;
1259 else if (rt->dst.error) {
1260 rt = net->ipv6.ip6_null_entry;
1261 goto out;
1262 }
1263
1264 if (rt == net->ipv6.ip6_null_entry) {
1265 fn = fib6_backtrack(fn, &fl6->saddr);
1266 if (fn)
1267 goto restart;
1268 }
1269
1270 out:
1271 dst_hold(&rt->dst);
1272
1273 read_unlock_bh(&table->tb6_lock);
1274
1275 return rt;
1276 };
1277
1278 static struct dst_entry *ip6_route_redirect(struct net *net,
1279 const struct flowi6 *fl6,
1280 const struct in6_addr *gateway)
1281 {
1282 int flags = RT6_LOOKUP_F_HAS_SADDR;
1283 struct ip6rd_flowi rdfl;
1284
1285 rdfl.fl6 = *fl6;
1286 rdfl.gateway = *gateway;
1287
1288 return fib6_rule_lookup(net, &rdfl.fl6,
1289 flags, __ip6_route_redirect);
1290 }
1291
1292 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1293 {
1294 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1295 struct dst_entry *dst;
1296 struct flowi6 fl6;
1297
1298 memset(&fl6, 0, sizeof(fl6));
1299 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1300 fl6.flowi6_oif = oif;
1301 fl6.flowi6_mark = mark;
1302 fl6.daddr = iph->daddr;
1303 fl6.saddr = iph->saddr;
1304 fl6.flowlabel = ip6_flowinfo(iph);
1305
1306 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1307 rt6_do_redirect(dst, NULL, skb);
1308 dst_release(dst);
1309 }
1310 EXPORT_SYMBOL_GPL(ip6_redirect);
1311
1312 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1313 u32 mark)
1314 {
1315 const struct ipv6hdr *iph = ipv6_hdr(skb);
1316 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1317 struct dst_entry *dst;
1318 struct flowi6 fl6;
1319
1320 memset(&fl6, 0, sizeof(fl6));
1321 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1322 fl6.flowi6_oif = oif;
1323 fl6.flowi6_mark = mark;
1324 fl6.daddr = msg->dest;
1325 fl6.saddr = iph->daddr;
1326
1327 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1328 rt6_do_redirect(dst, NULL, skb);
1329 dst_release(dst);
1330 }
1331
1332 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1333 {
1334 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1335 }
1336 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1337
1338 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1339 {
1340 struct net_device *dev = dst->dev;
1341 unsigned int mtu = dst_mtu(dst);
1342 struct net *net = dev_net(dev);
1343
1344 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1345
1346 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1347 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1348
1349 /*
1350 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1351 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1352 * IPV6_MAXPLEN is also valid and means: "any MSS,
1353 * rely only on pmtu discovery"
1354 */
1355 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1356 mtu = IPV6_MAXPLEN;
1357 return mtu;
1358 }
1359
1360 static unsigned int ip6_mtu(const struct dst_entry *dst)
1361 {
1362 struct inet6_dev *idev;
1363 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1364
1365 if (mtu)
1366 goto out;
1367
1368 mtu = IPV6_MIN_MTU;
1369
1370 rcu_read_lock();
1371 idev = __in6_dev_get(dst->dev);
1372 if (idev)
1373 mtu = idev->cnf.mtu6;
1374 rcu_read_unlock();
1375
1376 out:
1377 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1378 }
1379
1380 static struct dst_entry *icmp6_dst_gc_list;
1381 static DEFINE_SPINLOCK(icmp6_dst_lock);
1382
1383 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1384 struct flowi6 *fl6)
1385 {
1386 struct dst_entry *dst;
1387 struct rt6_info *rt;
1388 struct inet6_dev *idev = in6_dev_get(dev);
1389 struct net *net = dev_net(dev);
1390
1391 if (unlikely(!idev))
1392 return ERR_PTR(-ENODEV);
1393
1394 rt = ip6_dst_alloc(net, dev, 0, NULL);
1395 if (unlikely(!rt)) {
1396 in6_dev_put(idev);
1397 dst = ERR_PTR(-ENOMEM);
1398 goto out;
1399 }
1400
1401 rt->dst.flags |= DST_HOST;
1402 rt->dst.output = ip6_output;
1403 atomic_set(&rt->dst.__refcnt, 1);
1404 rt->rt6i_gateway = fl6->daddr;
1405 rt->rt6i_dst.addr = fl6->daddr;
1406 rt->rt6i_dst.plen = 128;
1407 rt->rt6i_idev = idev;
1408 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1409
1410 spin_lock_bh(&icmp6_dst_lock);
1411 rt->dst.next = icmp6_dst_gc_list;
1412 icmp6_dst_gc_list = &rt->dst;
1413 spin_unlock_bh(&icmp6_dst_lock);
1414
1415 fib6_force_start_gc(net);
1416
1417 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1418
1419 out:
1420 return dst;
1421 }
1422
1423 int icmp6_dst_gc(void)
1424 {
1425 struct dst_entry *dst, **pprev;
1426 int more = 0;
1427
1428 spin_lock_bh(&icmp6_dst_lock);
1429 pprev = &icmp6_dst_gc_list;
1430
1431 while ((dst = *pprev) != NULL) {
1432 if (!atomic_read(&dst->__refcnt)) {
1433 *pprev = dst->next;
1434 dst_free(dst);
1435 } else {
1436 pprev = &dst->next;
1437 ++more;
1438 }
1439 }
1440
1441 spin_unlock_bh(&icmp6_dst_lock);
1442
1443 return more;
1444 }
1445
1446 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1447 void *arg)
1448 {
1449 struct dst_entry *dst, **pprev;
1450
1451 spin_lock_bh(&icmp6_dst_lock);
1452 pprev = &icmp6_dst_gc_list;
1453 while ((dst = *pprev) != NULL) {
1454 struct rt6_info *rt = (struct rt6_info *) dst;
1455 if (func(rt, arg)) {
1456 *pprev = dst->next;
1457 dst_free(dst);
1458 } else {
1459 pprev = &dst->next;
1460 }
1461 }
1462 spin_unlock_bh(&icmp6_dst_lock);
1463 }
1464
1465 static int ip6_dst_gc(struct dst_ops *ops)
1466 {
1467 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1468 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1469 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1470 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1471 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1472 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1473 int entries;
1474
1475 entries = dst_entries_get_fast(ops);
1476 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1477 entries <= rt_max_size)
1478 goto out;
1479
1480 net->ipv6.ip6_rt_gc_expire++;
1481 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1482 entries = dst_entries_get_slow(ops);
1483 if (entries < ops->gc_thresh)
1484 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1485 out:
1486 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1487 return entries > rt_max_size;
1488 }
1489
1490 static int ip6_convert_metrics(struct mx6_config *mxc,
1491 const struct fib6_config *cfg)
1492 {
1493 struct nlattr *nla;
1494 int remaining;
1495 u32 *mp;
1496
1497 if (!cfg->fc_mx)
1498 return 0;
1499
1500 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1501 if (unlikely(!mp))
1502 return -ENOMEM;
1503
1504 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1505 int type = nla_type(nla);
1506
1507 if (type) {
1508 u32 val;
1509
1510 if (unlikely(type > RTAX_MAX))
1511 goto err;
1512 if (type == RTAX_CC_ALGO) {
1513 char tmp[TCP_CA_NAME_MAX];
1514
1515 nla_strlcpy(tmp, nla, sizeof(tmp));
1516 val = tcp_ca_get_key_by_name(tmp);
1517 if (val == TCP_CA_UNSPEC)
1518 goto err;
1519 } else {
1520 val = nla_get_u32(nla);
1521 }
1522
1523 mp[type - 1] = val;
1524 __set_bit(type - 1, mxc->mx_valid);
1525 }
1526 }
1527
1528 mxc->mx = mp;
1529
1530 return 0;
1531 err:
1532 kfree(mp);
1533 return -EINVAL;
1534 }
1535
1536 int ip6_route_add(struct fib6_config *cfg)
1537 {
1538 int err;
1539 struct net *net = cfg->fc_nlinfo.nl_net;
1540 struct rt6_info *rt = NULL;
1541 struct net_device *dev = NULL;
1542 struct inet6_dev *idev = NULL;
1543 struct fib6_table *table;
1544 struct mx6_config mxc = { .mx = NULL, };
1545 int addr_type;
1546
1547 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1548 return -EINVAL;
1549 #ifndef CONFIG_IPV6_SUBTREES
1550 if (cfg->fc_src_len)
1551 return -EINVAL;
1552 #endif
1553 if (cfg->fc_ifindex) {
1554 err = -ENODEV;
1555 dev = dev_get_by_index(net, cfg->fc_ifindex);
1556 if (!dev)
1557 goto out;
1558 idev = in6_dev_get(dev);
1559 if (!idev)
1560 goto out;
1561 }
1562
1563 if (cfg->fc_metric == 0)
1564 cfg->fc_metric = IP6_RT_PRIO_USER;
1565
1566 err = -ENOBUFS;
1567 if (cfg->fc_nlinfo.nlh &&
1568 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1569 table = fib6_get_table(net, cfg->fc_table);
1570 if (!table) {
1571 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1572 table = fib6_new_table(net, cfg->fc_table);
1573 }
1574 } else {
1575 table = fib6_new_table(net, cfg->fc_table);
1576 }
1577
1578 if (!table)
1579 goto out;
1580
1581 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1582
1583 if (!rt) {
1584 err = -ENOMEM;
1585 goto out;
1586 }
1587
1588 if (cfg->fc_flags & RTF_EXPIRES)
1589 rt6_set_expires(rt, jiffies +
1590 clock_t_to_jiffies(cfg->fc_expires));
1591 else
1592 rt6_clean_expires(rt);
1593
1594 if (cfg->fc_protocol == RTPROT_UNSPEC)
1595 cfg->fc_protocol = RTPROT_BOOT;
1596 rt->rt6i_protocol = cfg->fc_protocol;
1597
1598 addr_type = ipv6_addr_type(&cfg->fc_dst);
1599
1600 if (addr_type & IPV6_ADDR_MULTICAST)
1601 rt->dst.input = ip6_mc_input;
1602 else if (cfg->fc_flags & RTF_LOCAL)
1603 rt->dst.input = ip6_input;
1604 else
1605 rt->dst.input = ip6_forward;
1606
1607 rt->dst.output = ip6_output;
1608
1609 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1610 rt->rt6i_dst.plen = cfg->fc_dst_len;
1611 if (rt->rt6i_dst.plen == 128) {
1612 rt->dst.flags |= DST_HOST;
1613 dst_metrics_set_force_overwrite(&rt->dst);
1614 }
1615
1616 #ifdef CONFIG_IPV6_SUBTREES
1617 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1618 rt->rt6i_src.plen = cfg->fc_src_len;
1619 #endif
1620
1621 rt->rt6i_metric = cfg->fc_metric;
1622
1623 /* We cannot add true routes via loopback here,
1624 they would result in kernel looping; promote them to reject routes
1625 */
1626 if ((cfg->fc_flags & RTF_REJECT) ||
1627 (dev && (dev->flags & IFF_LOOPBACK) &&
1628 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1629 !(cfg->fc_flags & RTF_LOCAL))) {
1630 /* hold loopback dev/idev if we haven't done so. */
1631 if (dev != net->loopback_dev) {
1632 if (dev) {
1633 dev_put(dev);
1634 in6_dev_put(idev);
1635 }
1636 dev = net->loopback_dev;
1637 dev_hold(dev);
1638 idev = in6_dev_get(dev);
1639 if (!idev) {
1640 err = -ENODEV;
1641 goto out;
1642 }
1643 }
1644 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1645 switch (cfg->fc_type) {
1646 case RTN_BLACKHOLE:
1647 rt->dst.error = -EINVAL;
1648 rt->dst.output = dst_discard_sk;
1649 rt->dst.input = dst_discard;
1650 break;
1651 case RTN_PROHIBIT:
1652 rt->dst.error = -EACCES;
1653 rt->dst.output = ip6_pkt_prohibit_out;
1654 rt->dst.input = ip6_pkt_prohibit;
1655 break;
1656 case RTN_THROW:
1657 default:
1658 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1659 : -ENETUNREACH;
1660 rt->dst.output = ip6_pkt_discard_out;
1661 rt->dst.input = ip6_pkt_discard;
1662 break;
1663 }
1664 goto install_route;
1665 }
1666
1667 if (cfg->fc_flags & RTF_GATEWAY) {
1668 const struct in6_addr *gw_addr;
1669 int gwa_type;
1670
1671 gw_addr = &cfg->fc_gateway;
1672 rt->rt6i_gateway = *gw_addr;
1673 gwa_type = ipv6_addr_type(gw_addr);
1674
1675 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1676 struct rt6_info *grt;
1677
1678 /* IPv6 strictly inhibits using not link-local
1679 addresses as nexthop address.
1680 Otherwise, router will not able to send redirects.
1681 It is very good, but in some (rare!) circumstances
1682 (SIT, PtP, NBMA NOARP links) it is handy to allow
1683 some exceptions. --ANK
1684 */
1685 err = -EINVAL;
1686 if (!(gwa_type & IPV6_ADDR_UNICAST))
1687 goto out;
1688
1689 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1690
1691 err = -EHOSTUNREACH;
1692 if (!grt)
1693 goto out;
1694 if (dev) {
1695 if (dev != grt->dst.dev) {
1696 ip6_rt_put(grt);
1697 goto out;
1698 }
1699 } else {
1700 dev = grt->dst.dev;
1701 idev = grt->rt6i_idev;
1702 dev_hold(dev);
1703 in6_dev_hold(grt->rt6i_idev);
1704 }
1705 if (!(grt->rt6i_flags & RTF_GATEWAY))
1706 err = 0;
1707 ip6_rt_put(grt);
1708
1709 if (err)
1710 goto out;
1711 }
1712 err = -EINVAL;
1713 if (!dev || (dev->flags & IFF_LOOPBACK))
1714 goto out;
1715 }
1716
1717 err = -ENODEV;
1718 if (!dev)
1719 goto out;
1720
1721 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1722 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1723 err = -EINVAL;
1724 goto out;
1725 }
1726 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1727 rt->rt6i_prefsrc.plen = 128;
1728 } else
1729 rt->rt6i_prefsrc.plen = 0;
1730
1731 rt->rt6i_flags = cfg->fc_flags;
1732
1733 install_route:
1734 rt->dst.dev = dev;
1735 rt->rt6i_idev = idev;
1736 rt->rt6i_table = table;
1737
1738 cfg->fc_nlinfo.nl_net = dev_net(dev);
1739
1740 err = ip6_convert_metrics(&mxc, cfg);
1741 if (err)
1742 goto out;
1743
1744 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1745
1746 kfree(mxc.mx);
1747 return err;
1748 out:
1749 if (dev)
1750 dev_put(dev);
1751 if (idev)
1752 in6_dev_put(idev);
1753 if (rt)
1754 dst_free(&rt->dst);
1755 return err;
1756 }
1757
1758 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1759 {
1760 int err;
1761 struct fib6_table *table;
1762 struct net *net = dev_net(rt->dst.dev);
1763
1764 if (rt == net->ipv6.ip6_null_entry) {
1765 err = -ENOENT;
1766 goto out;
1767 }
1768
1769 table = rt->rt6i_table;
1770 write_lock_bh(&table->tb6_lock);
1771 err = fib6_del(rt, info);
1772 write_unlock_bh(&table->tb6_lock);
1773
1774 out:
1775 ip6_rt_put(rt);
1776 return err;
1777 }
1778
1779 int ip6_del_rt(struct rt6_info *rt)
1780 {
1781 struct nl_info info = {
1782 .nl_net = dev_net(rt->dst.dev),
1783 };
1784 return __ip6_del_rt(rt, &info);
1785 }
1786
1787 static int ip6_route_del(struct fib6_config *cfg)
1788 {
1789 struct fib6_table *table;
1790 struct fib6_node *fn;
1791 struct rt6_info *rt;
1792 int err = -ESRCH;
1793
1794 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1795 if (!table)
1796 return err;
1797
1798 read_lock_bh(&table->tb6_lock);
1799
1800 fn = fib6_locate(&table->tb6_root,
1801 &cfg->fc_dst, cfg->fc_dst_len,
1802 &cfg->fc_src, cfg->fc_src_len);
1803
1804 if (fn) {
1805 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1806 if ((rt->rt6i_flags & RTF_CACHE) &&
1807 !(cfg->fc_flags & RTF_CACHE))
1808 continue;
1809 if (cfg->fc_ifindex &&
1810 (!rt->dst.dev ||
1811 rt->dst.dev->ifindex != cfg->fc_ifindex))
1812 continue;
1813 if (cfg->fc_flags & RTF_GATEWAY &&
1814 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1815 continue;
1816 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1817 continue;
1818 dst_hold(&rt->dst);
1819 read_unlock_bh(&table->tb6_lock);
1820
1821 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1822 }
1823 }
1824 read_unlock_bh(&table->tb6_lock);
1825
1826 return err;
1827 }
1828
1829 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1830 {
1831 struct net *net = dev_net(skb->dev);
1832 struct netevent_redirect netevent;
1833 struct rt6_info *rt, *nrt = NULL;
1834 struct ndisc_options ndopts;
1835 struct inet6_dev *in6_dev;
1836 struct neighbour *neigh;
1837 struct rd_msg *msg;
1838 int optlen, on_link;
1839 u8 *lladdr;
1840
1841 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1842 optlen -= sizeof(*msg);
1843
1844 if (optlen < 0) {
1845 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1846 return;
1847 }
1848
1849 msg = (struct rd_msg *)icmp6_hdr(skb);
1850
1851 if (ipv6_addr_is_multicast(&msg->dest)) {
1852 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1853 return;
1854 }
1855
1856 on_link = 0;
1857 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1858 on_link = 1;
1859 } else if (ipv6_addr_type(&msg->target) !=
1860 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1861 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1862 return;
1863 }
1864
1865 in6_dev = __in6_dev_get(skb->dev);
1866 if (!in6_dev)
1867 return;
1868 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1869 return;
1870
1871 /* RFC2461 8.1:
1872 * The IP source address of the Redirect MUST be the same as the current
1873 * first-hop router for the specified ICMP Destination Address.
1874 */
1875
1876 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1877 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1878 return;
1879 }
1880
1881 lladdr = NULL;
1882 if (ndopts.nd_opts_tgt_lladdr) {
1883 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1884 skb->dev);
1885 if (!lladdr) {
1886 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1887 return;
1888 }
1889 }
1890
1891 rt = (struct rt6_info *) dst;
1892 if (rt == net->ipv6.ip6_null_entry) {
1893 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1894 return;
1895 }
1896
1897 /* Redirect received -> path was valid.
1898 * Look, redirects are sent only in response to data packets,
1899 * so that this nexthop apparently is reachable. --ANK
1900 */
1901 dst_confirm(&rt->dst);
1902
1903 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1904 if (!neigh)
1905 return;
1906
1907 /*
1908 * We have finally decided to accept it.
1909 */
1910
1911 neigh_update(neigh, lladdr, NUD_STALE,
1912 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1913 NEIGH_UPDATE_F_OVERRIDE|
1914 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1915 NEIGH_UPDATE_F_ISROUTER))
1916 );
1917
1918 nrt = ip6_rt_copy(rt, &msg->dest);
1919 if (!nrt)
1920 goto out;
1921
1922 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1923 if (on_link)
1924 nrt->rt6i_flags &= ~RTF_GATEWAY;
1925
1926 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1927
1928 if (ip6_ins_rt(nrt))
1929 goto out;
1930
1931 netevent.old = &rt->dst;
1932 netevent.new = &nrt->dst;
1933 netevent.daddr = &msg->dest;
1934 netevent.neigh = neigh;
1935 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1936
1937 if (rt->rt6i_flags & RTF_CACHE) {
1938 rt = (struct rt6_info *) dst_clone(&rt->dst);
1939 ip6_del_rt(rt);
1940 }
1941
1942 out:
1943 neigh_release(neigh);
1944 }
1945
1946 /*
1947 * Misc support functions
1948 */
1949
1950 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1951 const struct in6_addr *dest)
1952 {
1953 struct net *net = dev_net(ort->dst.dev);
1954 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1955 ort->rt6i_table);
1956
1957 if (rt) {
1958 rt->dst.input = ort->dst.input;
1959 rt->dst.output = ort->dst.output;
1960 rt->dst.flags |= DST_HOST;
1961
1962 rt->rt6i_dst.addr = *dest;
1963 rt->rt6i_dst.plen = 128;
1964 dst_copy_metrics(&rt->dst, &ort->dst);
1965 rt->dst.error = ort->dst.error;
1966 rt->rt6i_idev = ort->rt6i_idev;
1967 if (rt->rt6i_idev)
1968 in6_dev_hold(rt->rt6i_idev);
1969 rt->dst.lastuse = jiffies;
1970
1971 if (ort->rt6i_flags & RTF_GATEWAY)
1972 rt->rt6i_gateway = ort->rt6i_gateway;
1973 else
1974 rt->rt6i_gateway = *dest;
1975 rt->rt6i_flags = ort->rt6i_flags;
1976 rt6_set_from(rt, ort);
1977 rt->rt6i_metric = 0;
1978
1979 #ifdef CONFIG_IPV6_SUBTREES
1980 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1981 #endif
1982 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1983 rt->rt6i_table = ort->rt6i_table;
1984 }
1985 return rt;
1986 }
1987
1988 #ifdef CONFIG_IPV6_ROUTE_INFO
1989 static struct rt6_info *rt6_get_route_info(struct net *net,
1990 const struct in6_addr *prefix, int prefixlen,
1991 const struct in6_addr *gwaddr, int ifindex)
1992 {
1993 struct fib6_node *fn;
1994 struct rt6_info *rt = NULL;
1995 struct fib6_table *table;
1996
1997 table = fib6_get_table(net, RT6_TABLE_INFO);
1998 if (!table)
1999 return NULL;
2000
2001 read_lock_bh(&table->tb6_lock);
2002 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2003 if (!fn)
2004 goto out;
2005
2006 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2007 if (rt->dst.dev->ifindex != ifindex)
2008 continue;
2009 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2010 continue;
2011 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2012 continue;
2013 dst_hold(&rt->dst);
2014 break;
2015 }
2016 out:
2017 read_unlock_bh(&table->tb6_lock);
2018 return rt;
2019 }
2020
2021 static struct rt6_info *rt6_add_route_info(struct net *net,
2022 const struct in6_addr *prefix, int prefixlen,
2023 const struct in6_addr *gwaddr, int ifindex,
2024 unsigned int pref)
2025 {
2026 struct fib6_config cfg = {
2027 .fc_table = RT6_TABLE_INFO,
2028 .fc_metric = IP6_RT_PRIO_USER,
2029 .fc_ifindex = ifindex,
2030 .fc_dst_len = prefixlen,
2031 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2032 RTF_UP | RTF_PREF(pref),
2033 .fc_nlinfo.portid = 0,
2034 .fc_nlinfo.nlh = NULL,
2035 .fc_nlinfo.nl_net = net,
2036 };
2037
2038 cfg.fc_dst = *prefix;
2039 cfg.fc_gateway = *gwaddr;
2040
2041 /* We should treat it as a default route if prefix length is 0. */
2042 if (!prefixlen)
2043 cfg.fc_flags |= RTF_DEFAULT;
2044
2045 ip6_route_add(&cfg);
2046
2047 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2048 }
2049 #endif
2050
2051 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2052 {
2053 struct rt6_info *rt;
2054 struct fib6_table *table;
2055
2056 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2057 if (!table)
2058 return NULL;
2059
2060 read_lock_bh(&table->tb6_lock);
2061 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2062 if (dev == rt->dst.dev &&
2063 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2064 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2065 break;
2066 }
2067 if (rt)
2068 dst_hold(&rt->dst);
2069 read_unlock_bh(&table->tb6_lock);
2070 return rt;
2071 }
2072
2073 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2074 struct net_device *dev,
2075 unsigned int pref)
2076 {
2077 struct fib6_config cfg = {
2078 .fc_table = RT6_TABLE_DFLT,
2079 .fc_metric = IP6_RT_PRIO_USER,
2080 .fc_ifindex = dev->ifindex,
2081 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2082 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2083 .fc_nlinfo.portid = 0,
2084 .fc_nlinfo.nlh = NULL,
2085 .fc_nlinfo.nl_net = dev_net(dev),
2086 };
2087
2088 cfg.fc_gateway = *gwaddr;
2089
2090 ip6_route_add(&cfg);
2091
2092 return rt6_get_dflt_router(gwaddr, dev);
2093 }
2094
2095 void rt6_purge_dflt_routers(struct net *net)
2096 {
2097 struct rt6_info *rt;
2098 struct fib6_table *table;
2099
2100 /* NOTE: Keep consistent with rt6_get_dflt_router */
2101 table = fib6_get_table(net, RT6_TABLE_DFLT);
2102 if (!table)
2103 return;
2104
2105 restart:
2106 read_lock_bh(&table->tb6_lock);
2107 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2108 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2109 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2110 dst_hold(&rt->dst);
2111 read_unlock_bh(&table->tb6_lock);
2112 ip6_del_rt(rt);
2113 goto restart;
2114 }
2115 }
2116 read_unlock_bh(&table->tb6_lock);
2117 }
2118
2119 static void rtmsg_to_fib6_config(struct net *net,
2120 struct in6_rtmsg *rtmsg,
2121 struct fib6_config *cfg)
2122 {
2123 memset(cfg, 0, sizeof(*cfg));
2124
2125 cfg->fc_table = RT6_TABLE_MAIN;
2126 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2127 cfg->fc_metric = rtmsg->rtmsg_metric;
2128 cfg->fc_expires = rtmsg->rtmsg_info;
2129 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2130 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2131 cfg->fc_flags = rtmsg->rtmsg_flags;
2132
2133 cfg->fc_nlinfo.nl_net = net;
2134
2135 cfg->fc_dst = rtmsg->rtmsg_dst;
2136 cfg->fc_src = rtmsg->rtmsg_src;
2137 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2138 }
2139
2140 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2141 {
2142 struct fib6_config cfg;
2143 struct in6_rtmsg rtmsg;
2144 int err;
2145
2146 switch (cmd) {
2147 case SIOCADDRT: /* Add a route */
2148 case SIOCDELRT: /* Delete a route */
2149 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2150 return -EPERM;
2151 err = copy_from_user(&rtmsg, arg,
2152 sizeof(struct in6_rtmsg));
2153 if (err)
2154 return -EFAULT;
2155
2156 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2157
2158 rtnl_lock();
2159 switch (cmd) {
2160 case SIOCADDRT:
2161 err = ip6_route_add(&cfg);
2162 break;
2163 case SIOCDELRT:
2164 err = ip6_route_del(&cfg);
2165 break;
2166 default:
2167 err = -EINVAL;
2168 }
2169 rtnl_unlock();
2170
2171 return err;
2172 }
2173
2174 return -EINVAL;
2175 }
2176
2177 /*
2178 * Drop the packet on the floor
2179 */
2180
2181 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2182 {
2183 int type;
2184 struct dst_entry *dst = skb_dst(skb);
2185 switch (ipstats_mib_noroutes) {
2186 case IPSTATS_MIB_INNOROUTES:
2187 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2188 if (type == IPV6_ADDR_ANY) {
2189 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2190 IPSTATS_MIB_INADDRERRORS);
2191 break;
2192 }
2193 /* FALLTHROUGH */
2194 case IPSTATS_MIB_OUTNOROUTES:
2195 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2196 ipstats_mib_noroutes);
2197 break;
2198 }
2199 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2200 kfree_skb(skb);
2201 return 0;
2202 }
2203
2204 static int ip6_pkt_discard(struct sk_buff *skb)
2205 {
2206 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2207 }
2208
2209 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2210 {
2211 skb->dev = skb_dst(skb)->dev;
2212 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2213 }
2214
2215 static int ip6_pkt_prohibit(struct sk_buff *skb)
2216 {
2217 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2218 }
2219
2220 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2221 {
2222 skb->dev = skb_dst(skb)->dev;
2223 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2224 }
2225
2226 /*
2227 * Allocate a dst for local (unicast / anycast) address.
2228 */
2229
2230 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2231 const struct in6_addr *addr,
2232 bool anycast)
2233 {
2234 struct net *net = dev_net(idev->dev);
2235 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2236 DST_NOCOUNT, NULL);
2237 if (!rt)
2238 return ERR_PTR(-ENOMEM);
2239
2240 in6_dev_hold(idev);
2241
2242 rt->dst.flags |= DST_HOST;
2243 rt->dst.input = ip6_input;
2244 rt->dst.output = ip6_output;
2245 rt->rt6i_idev = idev;
2246
2247 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2248 if (anycast)
2249 rt->rt6i_flags |= RTF_ANYCAST;
2250 else
2251 rt->rt6i_flags |= RTF_LOCAL;
2252
2253 rt->rt6i_gateway = *addr;
2254 rt->rt6i_dst.addr = *addr;
2255 rt->rt6i_dst.plen = 128;
2256 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2257
2258 atomic_set(&rt->dst.__refcnt, 1);
2259
2260 return rt;
2261 }
2262
2263 int ip6_route_get_saddr(struct net *net,
2264 struct rt6_info *rt,
2265 const struct in6_addr *daddr,
2266 unsigned int prefs,
2267 struct in6_addr *saddr)
2268 {
2269 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2270 int err = 0;
2271 if (rt->rt6i_prefsrc.plen)
2272 *saddr = rt->rt6i_prefsrc.addr;
2273 else
2274 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2275 daddr, prefs, saddr);
2276 return err;
2277 }
2278
2279 /* remove deleted ip from prefsrc entries */
2280 struct arg_dev_net_ip {
2281 struct net_device *dev;
2282 struct net *net;
2283 struct in6_addr *addr;
2284 };
2285
2286 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2287 {
2288 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2289 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2290 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2291
2292 if (((void *)rt->dst.dev == dev || !dev) &&
2293 rt != net->ipv6.ip6_null_entry &&
2294 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2295 /* remove prefsrc entry */
2296 rt->rt6i_prefsrc.plen = 0;
2297 }
2298 return 0;
2299 }
2300
2301 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2302 {
2303 struct net *net = dev_net(ifp->idev->dev);
2304 struct arg_dev_net_ip adni = {
2305 .dev = ifp->idev->dev,
2306 .net = net,
2307 .addr = &ifp->addr,
2308 };
2309 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2310 }
2311
2312 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2313 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2314
2315 /* Remove routers and update dst entries when gateway turn into host. */
2316 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2317 {
2318 struct in6_addr *gateway = (struct in6_addr *)arg;
2319
2320 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2321 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2322 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2323 return -1;
2324 }
2325 return 0;
2326 }
2327
2328 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2329 {
2330 fib6_clean_all(net, fib6_clean_tohost, gateway);
2331 }
2332
2333 struct arg_dev_net {
2334 struct net_device *dev;
2335 struct net *net;
2336 };
2337
2338 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2339 {
2340 const struct arg_dev_net *adn = arg;
2341 const struct net_device *dev = adn->dev;
2342
2343 if ((rt->dst.dev == dev || !dev) &&
2344 rt != adn->net->ipv6.ip6_null_entry)
2345 return -1;
2346
2347 return 0;
2348 }
2349
2350 void rt6_ifdown(struct net *net, struct net_device *dev)
2351 {
2352 struct arg_dev_net adn = {
2353 .dev = dev,
2354 .net = net,
2355 };
2356
2357 fib6_clean_all(net, fib6_ifdown, &adn);
2358 icmp6_clean_all(fib6_ifdown, &adn);
2359 }
2360
2361 struct rt6_mtu_change_arg {
2362 struct net_device *dev;
2363 unsigned int mtu;
2364 };
2365
2366 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2367 {
2368 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2369 struct inet6_dev *idev;
2370
2371 /* In IPv6 pmtu discovery is not optional,
2372 so that RTAX_MTU lock cannot disable it.
2373 We still use this lock to block changes
2374 caused by addrconf/ndisc.
2375 */
2376
2377 idev = __in6_dev_get(arg->dev);
2378 if (!idev)
2379 return 0;
2380
2381 /* For administrative MTU increase, there is no way to discover
2382 IPv6 PMTU increase, so PMTU increase should be updated here.
2383 Since RFC 1981 doesn't include administrative MTU increase
2384 update PMTU increase is a MUST. (i.e. jumbo frame)
2385 */
2386 /*
2387 If new MTU is less than route PMTU, this new MTU will be the
2388 lowest MTU in the path, update the route PMTU to reflect PMTU
2389 decreases; if new MTU is greater than route PMTU, and the
2390 old MTU is the lowest MTU in the path, update the route PMTU
2391 to reflect the increase. In this case if the other nodes' MTU
2392 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2393 PMTU discouvery.
2394 */
2395 if (rt->dst.dev == arg->dev &&
2396 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2397 (dst_mtu(&rt->dst) >= arg->mtu ||
2398 (dst_mtu(&rt->dst) < arg->mtu &&
2399 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2400 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2401 }
2402 return 0;
2403 }
2404
2405 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2406 {
2407 struct rt6_mtu_change_arg arg = {
2408 .dev = dev,
2409 .mtu = mtu,
2410 };
2411
2412 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2413 }
2414
2415 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2416 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2417 [RTA_OIF] = { .type = NLA_U32 },
2418 [RTA_IIF] = { .type = NLA_U32 },
2419 [RTA_PRIORITY] = { .type = NLA_U32 },
2420 [RTA_METRICS] = { .type = NLA_NESTED },
2421 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2422 [RTA_PREF] = { .type = NLA_U8 },
2423 };
2424
2425 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2426 struct fib6_config *cfg)
2427 {
2428 struct rtmsg *rtm;
2429 struct nlattr *tb[RTA_MAX+1];
2430 unsigned int pref;
2431 int err;
2432
2433 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2434 if (err < 0)
2435 goto errout;
2436
2437 err = -EINVAL;
2438 rtm = nlmsg_data(nlh);
2439 memset(cfg, 0, sizeof(*cfg));
2440
2441 cfg->fc_table = rtm->rtm_table;
2442 cfg->fc_dst_len = rtm->rtm_dst_len;
2443 cfg->fc_src_len = rtm->rtm_src_len;
2444 cfg->fc_flags = RTF_UP;
2445 cfg->fc_protocol = rtm->rtm_protocol;
2446 cfg->fc_type = rtm->rtm_type;
2447
2448 if (rtm->rtm_type == RTN_UNREACHABLE ||
2449 rtm->rtm_type == RTN_BLACKHOLE ||
2450 rtm->rtm_type == RTN_PROHIBIT ||
2451 rtm->rtm_type == RTN_THROW)
2452 cfg->fc_flags |= RTF_REJECT;
2453
2454 if (rtm->rtm_type == RTN_LOCAL)
2455 cfg->fc_flags |= RTF_LOCAL;
2456
2457 if (rtm->rtm_flags & RTM_F_CLONED)
2458 cfg->fc_flags |= RTF_CACHE;
2459
2460 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2461 cfg->fc_nlinfo.nlh = nlh;
2462 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2463
2464 if (tb[RTA_GATEWAY]) {
2465 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2466 cfg->fc_flags |= RTF_GATEWAY;
2467 }
2468
2469 if (tb[RTA_DST]) {
2470 int plen = (rtm->rtm_dst_len + 7) >> 3;
2471
2472 if (nla_len(tb[RTA_DST]) < plen)
2473 goto errout;
2474
2475 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2476 }
2477
2478 if (tb[RTA_SRC]) {
2479 int plen = (rtm->rtm_src_len + 7) >> 3;
2480
2481 if (nla_len(tb[RTA_SRC]) < plen)
2482 goto errout;
2483
2484 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2485 }
2486
2487 if (tb[RTA_PREFSRC])
2488 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2489
2490 if (tb[RTA_OIF])
2491 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2492
2493 if (tb[RTA_PRIORITY])
2494 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2495
2496 if (tb[RTA_METRICS]) {
2497 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2498 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2499 }
2500
2501 if (tb[RTA_TABLE])
2502 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2503
2504 if (tb[RTA_MULTIPATH]) {
2505 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2506 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2507 }
2508
2509 if (tb[RTA_PREF]) {
2510 pref = nla_get_u8(tb[RTA_PREF]);
2511 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2512 pref != ICMPV6_ROUTER_PREF_HIGH)
2513 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2514 cfg->fc_flags |= RTF_PREF(pref);
2515 }
2516
2517 err = 0;
2518 errout:
2519 return err;
2520 }
2521
2522 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2523 {
2524 struct fib6_config r_cfg;
2525 struct rtnexthop *rtnh;
2526 int remaining;
2527 int attrlen;
2528 int err = 0, last_err = 0;
2529
2530 beginning:
2531 rtnh = (struct rtnexthop *)cfg->fc_mp;
2532 remaining = cfg->fc_mp_len;
2533
2534 /* Parse a Multipath Entry */
2535 while (rtnh_ok(rtnh, remaining)) {
2536 memcpy(&r_cfg, cfg, sizeof(*cfg));
2537 if (rtnh->rtnh_ifindex)
2538 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2539
2540 attrlen = rtnh_attrlen(rtnh);
2541 if (attrlen > 0) {
2542 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2543
2544 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2545 if (nla) {
2546 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2547 r_cfg.fc_flags |= RTF_GATEWAY;
2548 }
2549 }
2550 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2551 if (err) {
2552 last_err = err;
2553 /* If we are trying to remove a route, do not stop the
2554 * loop when ip6_route_del() fails (because next hop is
2555 * already gone), we should try to remove all next hops.
2556 */
2557 if (add) {
2558 /* If add fails, we should try to delete all
2559 * next hops that have been already added.
2560 */
2561 add = 0;
2562 goto beginning;
2563 }
2564 }
2565 /* Because each route is added like a single route we remove
2566 * this flag after the first nexthop (if there is a collision,
2567 * we have already fail to add the first nexthop:
2568 * fib6_add_rt2node() has reject it).
2569 */
2570 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2571 rtnh = rtnh_next(rtnh, &remaining);
2572 }
2573
2574 return last_err;
2575 }
2576
2577 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2578 {
2579 struct fib6_config cfg;
2580 int err;
2581
2582 err = rtm_to_fib6_config(skb, nlh, &cfg);
2583 if (err < 0)
2584 return err;
2585
2586 if (cfg.fc_mp)
2587 return ip6_route_multipath(&cfg, 0);
2588 else
2589 return ip6_route_del(&cfg);
2590 }
2591
2592 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2593 {
2594 struct fib6_config cfg;
2595 int err;
2596
2597 err = rtm_to_fib6_config(skb, nlh, &cfg);
2598 if (err < 0)
2599 return err;
2600
2601 if (cfg.fc_mp)
2602 return ip6_route_multipath(&cfg, 1);
2603 else
2604 return ip6_route_add(&cfg);
2605 }
2606
2607 static inline size_t rt6_nlmsg_size(void)
2608 {
2609 return NLMSG_ALIGN(sizeof(struct rtmsg))
2610 + nla_total_size(16) /* RTA_SRC */
2611 + nla_total_size(16) /* RTA_DST */
2612 + nla_total_size(16) /* RTA_GATEWAY */
2613 + nla_total_size(16) /* RTA_PREFSRC */
2614 + nla_total_size(4) /* RTA_TABLE */
2615 + nla_total_size(4) /* RTA_IIF */
2616 + nla_total_size(4) /* RTA_OIF */
2617 + nla_total_size(4) /* RTA_PRIORITY */
2618 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2619 + nla_total_size(sizeof(struct rta_cacheinfo))
2620 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2621 + nla_total_size(1); /* RTA_PREF */
2622 }
2623
2624 static int rt6_fill_node(struct net *net,
2625 struct sk_buff *skb, struct rt6_info *rt,
2626 struct in6_addr *dst, struct in6_addr *src,
2627 int iif, int type, u32 portid, u32 seq,
2628 int prefix, int nowait, unsigned int flags)
2629 {
2630 struct rtmsg *rtm;
2631 struct nlmsghdr *nlh;
2632 long expires;
2633 u32 table;
2634
2635 if (prefix) { /* user wants prefix routes only */
2636 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2637 /* success since this is not a prefix route */
2638 return 1;
2639 }
2640 }
2641
2642 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2643 if (!nlh)
2644 return -EMSGSIZE;
2645
2646 rtm = nlmsg_data(nlh);
2647 rtm->rtm_family = AF_INET6;
2648 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2649 rtm->rtm_src_len = rt->rt6i_src.plen;
2650 rtm->rtm_tos = 0;
2651 if (rt->rt6i_table)
2652 table = rt->rt6i_table->tb6_id;
2653 else
2654 table = RT6_TABLE_UNSPEC;
2655 rtm->rtm_table = table;
2656 if (nla_put_u32(skb, RTA_TABLE, table))
2657 goto nla_put_failure;
2658 if (rt->rt6i_flags & RTF_REJECT) {
2659 switch (rt->dst.error) {
2660 case -EINVAL:
2661 rtm->rtm_type = RTN_BLACKHOLE;
2662 break;
2663 case -EACCES:
2664 rtm->rtm_type = RTN_PROHIBIT;
2665 break;
2666 case -EAGAIN:
2667 rtm->rtm_type = RTN_THROW;
2668 break;
2669 default:
2670 rtm->rtm_type = RTN_UNREACHABLE;
2671 break;
2672 }
2673 }
2674 else if (rt->rt6i_flags & RTF_LOCAL)
2675 rtm->rtm_type = RTN_LOCAL;
2676 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2677 rtm->rtm_type = RTN_LOCAL;
2678 else
2679 rtm->rtm_type = RTN_UNICAST;
2680 rtm->rtm_flags = 0;
2681 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2682 rtm->rtm_protocol = rt->rt6i_protocol;
2683 if (rt->rt6i_flags & RTF_DYNAMIC)
2684 rtm->rtm_protocol = RTPROT_REDIRECT;
2685 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2686 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2687 rtm->rtm_protocol = RTPROT_RA;
2688 else
2689 rtm->rtm_protocol = RTPROT_KERNEL;
2690 }
2691
2692 if (rt->rt6i_flags & RTF_CACHE)
2693 rtm->rtm_flags |= RTM_F_CLONED;
2694
2695 if (dst) {
2696 if (nla_put_in6_addr(skb, RTA_DST, dst))
2697 goto nla_put_failure;
2698 rtm->rtm_dst_len = 128;
2699 } else if (rtm->rtm_dst_len)
2700 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2701 goto nla_put_failure;
2702 #ifdef CONFIG_IPV6_SUBTREES
2703 if (src) {
2704 if (nla_put_in6_addr(skb, RTA_SRC, src))
2705 goto nla_put_failure;
2706 rtm->rtm_src_len = 128;
2707 } else if (rtm->rtm_src_len &&
2708 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2709 goto nla_put_failure;
2710 #endif
2711 if (iif) {
2712 #ifdef CONFIG_IPV6_MROUTE
2713 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2714 int err = ip6mr_get_route(net, skb, rtm, nowait);
2715 if (err <= 0) {
2716 if (!nowait) {
2717 if (err == 0)
2718 return 0;
2719 goto nla_put_failure;
2720 } else {
2721 if (err == -EMSGSIZE)
2722 goto nla_put_failure;
2723 }
2724 }
2725 } else
2726 #endif
2727 if (nla_put_u32(skb, RTA_IIF, iif))
2728 goto nla_put_failure;
2729 } else if (dst) {
2730 struct in6_addr saddr_buf;
2731 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2732 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2733 goto nla_put_failure;
2734 }
2735
2736 if (rt->rt6i_prefsrc.plen) {
2737 struct in6_addr saddr_buf;
2738 saddr_buf = rt->rt6i_prefsrc.addr;
2739 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2740 goto nla_put_failure;
2741 }
2742
2743 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2744 goto nla_put_failure;
2745
2746 if (rt->rt6i_flags & RTF_GATEWAY) {
2747 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2748 goto nla_put_failure;
2749 }
2750
2751 if (rt->dst.dev &&
2752 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2753 goto nla_put_failure;
2754 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2755 goto nla_put_failure;
2756
2757 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2758
2759 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2760 goto nla_put_failure;
2761
2762 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2763 goto nla_put_failure;
2764
2765 nlmsg_end(skb, nlh);
2766 return 0;
2767
2768 nla_put_failure:
2769 nlmsg_cancel(skb, nlh);
2770 return -EMSGSIZE;
2771 }
2772
2773 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2774 {
2775 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2776 int prefix;
2777
2778 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2779 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2780 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2781 } else
2782 prefix = 0;
2783
2784 return rt6_fill_node(arg->net,
2785 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2786 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2787 prefix, 0, NLM_F_MULTI);
2788 }
2789
2790 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2791 {
2792 struct net *net = sock_net(in_skb->sk);
2793 struct nlattr *tb[RTA_MAX+1];
2794 struct rt6_info *rt;
2795 struct sk_buff *skb;
2796 struct rtmsg *rtm;
2797 struct flowi6 fl6;
2798 int err, iif = 0, oif = 0;
2799
2800 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2801 if (err < 0)
2802 goto errout;
2803
2804 err = -EINVAL;
2805 memset(&fl6, 0, sizeof(fl6));
2806
2807 if (tb[RTA_SRC]) {
2808 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2809 goto errout;
2810
2811 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2812 }
2813
2814 if (tb[RTA_DST]) {
2815 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2816 goto errout;
2817
2818 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2819 }
2820
2821 if (tb[RTA_IIF])
2822 iif = nla_get_u32(tb[RTA_IIF]);
2823
2824 if (tb[RTA_OIF])
2825 oif = nla_get_u32(tb[RTA_OIF]);
2826
2827 if (tb[RTA_MARK])
2828 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2829
2830 if (iif) {
2831 struct net_device *dev;
2832 int flags = 0;
2833
2834 dev = __dev_get_by_index(net, iif);
2835 if (!dev) {
2836 err = -ENODEV;
2837 goto errout;
2838 }
2839
2840 fl6.flowi6_iif = iif;
2841
2842 if (!ipv6_addr_any(&fl6.saddr))
2843 flags |= RT6_LOOKUP_F_HAS_SADDR;
2844
2845 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2846 flags);
2847 } else {
2848 fl6.flowi6_oif = oif;
2849
2850 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2851 }
2852
2853 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2854 if (!skb) {
2855 ip6_rt_put(rt);
2856 err = -ENOBUFS;
2857 goto errout;
2858 }
2859
2860 /* Reserve room for dummy headers, this skb can pass
2861 through good chunk of routing engine.
2862 */
2863 skb_reset_mac_header(skb);
2864 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2865
2866 skb_dst_set(skb, &rt->dst);
2867
2868 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2869 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2870 nlh->nlmsg_seq, 0, 0, 0);
2871 if (err < 0) {
2872 kfree_skb(skb);
2873 goto errout;
2874 }
2875
2876 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2877 errout:
2878 return err;
2879 }
2880
2881 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2882 {
2883 struct sk_buff *skb;
2884 struct net *net = info->nl_net;
2885 u32 seq;
2886 int err;
2887
2888 err = -ENOBUFS;
2889 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2890
2891 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2892 if (!skb)
2893 goto errout;
2894
2895 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2896 event, info->portid, seq, 0, 0, 0);
2897 if (err < 0) {
2898 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2899 WARN_ON(err == -EMSGSIZE);
2900 kfree_skb(skb);
2901 goto errout;
2902 }
2903 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2904 info->nlh, gfp_any());
2905 return;
2906 errout:
2907 if (err < 0)
2908 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2909 }
2910
2911 static int ip6_route_dev_notify(struct notifier_block *this,
2912 unsigned long event, void *ptr)
2913 {
2914 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2915 struct net *net = dev_net(dev);
2916
2917 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2918 net->ipv6.ip6_null_entry->dst.dev = dev;
2919 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2920 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2921 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2922 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2923 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2924 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2925 #endif
2926 }
2927
2928 return NOTIFY_OK;
2929 }
2930
2931 /*
2932 * /proc
2933 */
2934
2935 #ifdef CONFIG_PROC_FS
2936
2937 static const struct file_operations ipv6_route_proc_fops = {
2938 .owner = THIS_MODULE,
2939 .open = ipv6_route_open,
2940 .read = seq_read,
2941 .llseek = seq_lseek,
2942 .release = seq_release_net,
2943 };
2944
2945 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2946 {
2947 struct net *net = (struct net *)seq->private;
2948 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2949 net->ipv6.rt6_stats->fib_nodes,
2950 net->ipv6.rt6_stats->fib_route_nodes,
2951 net->ipv6.rt6_stats->fib_rt_alloc,
2952 net->ipv6.rt6_stats->fib_rt_entries,
2953 net->ipv6.rt6_stats->fib_rt_cache,
2954 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2955 net->ipv6.rt6_stats->fib_discarded_routes);
2956
2957 return 0;
2958 }
2959
2960 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2961 {
2962 return single_open_net(inode, file, rt6_stats_seq_show);
2963 }
2964
2965 static const struct file_operations rt6_stats_seq_fops = {
2966 .owner = THIS_MODULE,
2967 .open = rt6_stats_seq_open,
2968 .read = seq_read,
2969 .llseek = seq_lseek,
2970 .release = single_release_net,
2971 };
2972 #endif /* CONFIG_PROC_FS */
2973
2974 #ifdef CONFIG_SYSCTL
2975
2976 static
2977 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2978 void __user *buffer, size_t *lenp, loff_t *ppos)
2979 {
2980 struct net *net;
2981 int delay;
2982 if (!write)
2983 return -EINVAL;
2984
2985 net = (struct net *)ctl->extra1;
2986 delay = net->ipv6.sysctl.flush_delay;
2987 proc_dointvec(ctl, write, buffer, lenp, ppos);
2988 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2989 return 0;
2990 }
2991
2992 struct ctl_table ipv6_route_table_template[] = {
2993 {
2994 .procname = "flush",
2995 .data = &init_net.ipv6.sysctl.flush_delay,
2996 .maxlen = sizeof(int),
2997 .mode = 0200,
2998 .proc_handler = ipv6_sysctl_rtcache_flush
2999 },
3000 {
3001 .procname = "gc_thresh",
3002 .data = &ip6_dst_ops_template.gc_thresh,
3003 .maxlen = sizeof(int),
3004 .mode = 0644,
3005 .proc_handler = proc_dointvec,
3006 },
3007 {
3008 .procname = "max_size",
3009 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3010 .maxlen = sizeof(int),
3011 .mode = 0644,
3012 .proc_handler = proc_dointvec,
3013 },
3014 {
3015 .procname = "gc_min_interval",
3016 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3017 .maxlen = sizeof(int),
3018 .mode = 0644,
3019 .proc_handler = proc_dointvec_jiffies,
3020 },
3021 {
3022 .procname = "gc_timeout",
3023 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3024 .maxlen = sizeof(int),
3025 .mode = 0644,
3026 .proc_handler = proc_dointvec_jiffies,
3027 },
3028 {
3029 .procname = "gc_interval",
3030 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3031 .maxlen = sizeof(int),
3032 .mode = 0644,
3033 .proc_handler = proc_dointvec_jiffies,
3034 },
3035 {
3036 .procname = "gc_elasticity",
3037 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3038 .maxlen = sizeof(int),
3039 .mode = 0644,
3040 .proc_handler = proc_dointvec,
3041 },
3042 {
3043 .procname = "mtu_expires",
3044 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3045 .maxlen = sizeof(int),
3046 .mode = 0644,
3047 .proc_handler = proc_dointvec_jiffies,
3048 },
3049 {
3050 .procname = "min_adv_mss",
3051 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3052 .maxlen = sizeof(int),
3053 .mode = 0644,
3054 .proc_handler = proc_dointvec,
3055 },
3056 {
3057 .procname = "gc_min_interval_ms",
3058 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3059 .maxlen = sizeof(int),
3060 .mode = 0644,
3061 .proc_handler = proc_dointvec_ms_jiffies,
3062 },
3063 { }
3064 };
3065
3066 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3067 {
3068 struct ctl_table *table;
3069
3070 table = kmemdup(ipv6_route_table_template,
3071 sizeof(ipv6_route_table_template),
3072 GFP_KERNEL);
3073
3074 if (table) {
3075 table[0].data = &net->ipv6.sysctl.flush_delay;
3076 table[0].extra1 = net;
3077 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3078 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3079 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3080 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3081 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3082 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3083 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3084 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3085 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3086
3087 /* Don't export sysctls to unprivileged users */
3088 if (net->user_ns != &init_user_ns)
3089 table[0].procname = NULL;
3090 }
3091
3092 return table;
3093 }
3094 #endif
3095
3096 static int __net_init ip6_route_net_init(struct net *net)
3097 {
3098 int ret = -ENOMEM;
3099
3100 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3101 sizeof(net->ipv6.ip6_dst_ops));
3102
3103 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3104 goto out_ip6_dst_ops;
3105
3106 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3107 sizeof(*net->ipv6.ip6_null_entry),
3108 GFP_KERNEL);
3109 if (!net->ipv6.ip6_null_entry)
3110 goto out_ip6_dst_entries;
3111 net->ipv6.ip6_null_entry->dst.path =
3112 (struct dst_entry *)net->ipv6.ip6_null_entry;
3113 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3114 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3115 ip6_template_metrics, true);
3116
3117 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3118 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3119 sizeof(*net->ipv6.ip6_prohibit_entry),
3120 GFP_KERNEL);
3121 if (!net->ipv6.ip6_prohibit_entry)
3122 goto out_ip6_null_entry;
3123 net->ipv6.ip6_prohibit_entry->dst.path =
3124 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3125 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3126 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3127 ip6_template_metrics, true);
3128
3129 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3130 sizeof(*net->ipv6.ip6_blk_hole_entry),
3131 GFP_KERNEL);
3132 if (!net->ipv6.ip6_blk_hole_entry)
3133 goto out_ip6_prohibit_entry;
3134 net->ipv6.ip6_blk_hole_entry->dst.path =
3135 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3136 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3137 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3138 ip6_template_metrics, true);
3139 #endif
3140
3141 net->ipv6.sysctl.flush_delay = 0;
3142 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3143 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3144 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3145 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3146 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3147 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3148 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3149
3150 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3151
3152 ret = 0;
3153 out:
3154 return ret;
3155
3156 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3157 out_ip6_prohibit_entry:
3158 kfree(net->ipv6.ip6_prohibit_entry);
3159 out_ip6_null_entry:
3160 kfree(net->ipv6.ip6_null_entry);
3161 #endif
3162 out_ip6_dst_entries:
3163 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3164 out_ip6_dst_ops:
3165 goto out;
3166 }
3167
3168 static void __net_exit ip6_route_net_exit(struct net *net)
3169 {
3170 kfree(net->ipv6.ip6_null_entry);
3171 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3172 kfree(net->ipv6.ip6_prohibit_entry);
3173 kfree(net->ipv6.ip6_blk_hole_entry);
3174 #endif
3175 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3176 }
3177
3178 static int __net_init ip6_route_net_init_late(struct net *net)
3179 {
3180 #ifdef CONFIG_PROC_FS
3181 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3182 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3183 #endif
3184 return 0;
3185 }
3186
3187 static void __net_exit ip6_route_net_exit_late(struct net *net)
3188 {
3189 #ifdef CONFIG_PROC_FS
3190 remove_proc_entry("ipv6_route", net->proc_net);
3191 remove_proc_entry("rt6_stats", net->proc_net);
3192 #endif
3193 }
3194
3195 static struct pernet_operations ip6_route_net_ops = {
3196 .init = ip6_route_net_init,
3197 .exit = ip6_route_net_exit,
3198 };
3199
3200 static int __net_init ipv6_inetpeer_init(struct net *net)
3201 {
3202 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3203
3204 if (!bp)
3205 return -ENOMEM;
3206 inet_peer_base_init(bp);
3207 net->ipv6.peers = bp;
3208 return 0;
3209 }
3210
3211 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3212 {
3213 struct inet_peer_base *bp = net->ipv6.peers;
3214
3215 net->ipv6.peers = NULL;
3216 inetpeer_invalidate_tree(bp);
3217 kfree(bp);
3218 }
3219
3220 static struct pernet_operations ipv6_inetpeer_ops = {
3221 .init = ipv6_inetpeer_init,
3222 .exit = ipv6_inetpeer_exit,
3223 };
3224
3225 static struct pernet_operations ip6_route_net_late_ops = {
3226 .init = ip6_route_net_init_late,
3227 .exit = ip6_route_net_exit_late,
3228 };
3229
3230 static struct notifier_block ip6_route_dev_notifier = {
3231 .notifier_call = ip6_route_dev_notify,
3232 .priority = 0,
3233 };
3234
3235 int __init ip6_route_init(void)
3236 {
3237 int ret;
3238
3239 ret = -ENOMEM;
3240 ip6_dst_ops_template.kmem_cachep =
3241 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3242 SLAB_HWCACHE_ALIGN, NULL);
3243 if (!ip6_dst_ops_template.kmem_cachep)
3244 goto out;
3245
3246 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3247 if (ret)
3248 goto out_kmem_cache;
3249
3250 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3251 if (ret)
3252 goto out_dst_entries;
3253
3254 ret = register_pernet_subsys(&ip6_route_net_ops);
3255 if (ret)
3256 goto out_register_inetpeer;
3257
3258 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3259
3260 /* Registering of the loopback is done before this portion of code,
3261 * the loopback reference in rt6_info will not be taken, do it
3262 * manually for init_net */
3263 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3264 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3265 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3266 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3267 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3268 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3269 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3270 #endif
3271 ret = fib6_init();
3272 if (ret)
3273 goto out_register_subsys;
3274
3275 ret = xfrm6_init();
3276 if (ret)
3277 goto out_fib6_init;
3278
3279 ret = fib6_rules_init();
3280 if (ret)
3281 goto xfrm6_init;
3282
3283 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3284 if (ret)
3285 goto fib6_rules_init;
3286
3287 ret = -ENOBUFS;
3288 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3289 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3290 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3291 goto out_register_late_subsys;
3292
3293 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3294 if (ret)
3295 goto out_register_late_subsys;
3296
3297 out:
3298 return ret;
3299
3300 out_register_late_subsys:
3301 unregister_pernet_subsys(&ip6_route_net_late_ops);
3302 fib6_rules_init:
3303 fib6_rules_cleanup();
3304 xfrm6_init:
3305 xfrm6_fini();
3306 out_fib6_init:
3307 fib6_gc_cleanup();
3308 out_register_subsys:
3309 unregister_pernet_subsys(&ip6_route_net_ops);
3310 out_register_inetpeer:
3311 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3312 out_dst_entries:
3313 dst_entries_destroy(&ip6_dst_blackhole_ops);
3314 out_kmem_cache:
3315 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3316 goto out;
3317 }
3318
3319 void ip6_route_cleanup(void)
3320 {
3321 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3322 unregister_pernet_subsys(&ip6_route_net_late_ops);
3323 fib6_rules_cleanup();
3324 xfrm6_fini();
3325 fib6_gc_cleanup();
3326 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3327 unregister_pernet_subsys(&ip6_route_net_ops);
3328 dst_entries_destroy(&ip6_dst_blackhole_ops);
3329 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3330 }
This page took 0.092736 seconds and 6 git commands to generate.