ipv6: prevent fib6_run_gc() contention
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -2,
70 RT6_NUD_FAIL_SOFT = -1,
71 RT6_NUD_SUCCEED = 1
72 };
73
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75 const struct in6_addr *dest);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
89 struct sk_buff *skb, u32 mtu);
90 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
91 struct sk_buff *skb);
92 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
93
94 #ifdef CONFIG_IPV6_ROUTE_INFO
95 static struct rt6_info *rt6_add_route_info(struct net *net,
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex,
98 unsigned int pref);
99 static struct rt6_info *rt6_get_route_info(struct net *net,
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex);
102 #endif
103
104 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
105 {
106 struct rt6_info *rt = (struct rt6_info *) dst;
107 struct inet_peer *peer;
108 u32 *p = NULL;
109
110 if (!(rt->dst.flags & DST_HOST))
111 return NULL;
112
113 peer = rt6_get_peer_create(rt);
114 if (peer) {
115 u32 *old_p = __DST_METRICS_PTR(old);
116 unsigned long prev, new;
117
118 p = peer->metrics;
119 if (inet_metrics_new(peer))
120 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121
122 new = (unsigned long) p;
123 prev = cmpxchg(&dst->_metrics, old, new);
124
125 if (prev != old) {
126 p = __DST_METRICS_PTR(prev);
127 if (prev & DST_METRICS_READ_ONLY)
128 p = NULL;
129 }
130 }
131 return p;
132 }
133
134 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
135 struct sk_buff *skb,
136 const void *daddr)
137 {
138 struct in6_addr *p = &rt->rt6i_gateway;
139
140 if (!ipv6_addr_any(p))
141 return (const void *) p;
142 else if (skb)
143 return &ipv6_hdr(skb)->daddr;
144 return daddr;
145 }
146
147 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
148 struct sk_buff *skb,
149 const void *daddr)
150 {
151 struct rt6_info *rt = (struct rt6_info *) dst;
152 struct neighbour *n;
153
154 daddr = choose_neigh_daddr(rt, skb, daddr);
155 n = __ipv6_neigh_lookup(dst->dev, daddr);
156 if (n)
157 return n;
158 return neigh_create(&nd_tbl, daddr, dst->dev);
159 }
160
161 static struct dst_ops ip6_dst_ops_template = {
162 .family = AF_INET6,
163 .protocol = cpu_to_be16(ETH_P_IPV6),
164 .gc = ip6_dst_gc,
165 .gc_thresh = 1024,
166 .check = ip6_dst_check,
167 .default_advmss = ip6_default_advmss,
168 .mtu = ip6_mtu,
169 .cow_metrics = ipv6_cow_metrics,
170 .destroy = ip6_dst_destroy,
171 .ifdown = ip6_dst_ifdown,
172 .negative_advice = ip6_negative_advice,
173 .link_failure = ip6_link_failure,
174 .update_pmtu = ip6_rt_update_pmtu,
175 .redirect = rt6_do_redirect,
176 .local_out = __ip6_local_out,
177 .neigh_lookup = ip6_neigh_lookup,
178 };
179
180 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
181 {
182 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
183
184 return mtu ? : dst->dev->mtu;
185 }
186
187 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
188 struct sk_buff *skb, u32 mtu)
189 {
190 }
191
192 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb)
194 {
195 }
196
197 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
198 unsigned long old)
199 {
200 return NULL;
201 }
202
203 static struct dst_ops ip6_dst_blackhole_ops = {
204 .family = AF_INET6,
205 .protocol = cpu_to_be16(ETH_P_IPV6),
206 .destroy = ip6_dst_destroy,
207 .check = ip6_dst_check,
208 .mtu = ip6_blackhole_mtu,
209 .default_advmss = ip6_default_advmss,
210 .update_pmtu = ip6_rt_blackhole_update_pmtu,
211 .redirect = ip6_rt_blackhole_redirect,
212 .cow_metrics = ip6_rt_blackhole_cow_metrics,
213 .neigh_lookup = ip6_neigh_lookup,
214 };
215
216 static const u32 ip6_template_metrics[RTAX_MAX] = {
217 [RTAX_HOPLIMIT - 1] = 0,
218 };
219
220 static const struct rt6_info ip6_null_entry_template = {
221 .dst = {
222 .__refcnt = ATOMIC_INIT(1),
223 .__use = 1,
224 .obsolete = DST_OBSOLETE_FORCE_CHK,
225 .error = -ENETUNREACH,
226 .input = ip6_pkt_discard,
227 .output = ip6_pkt_discard_out,
228 },
229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
230 .rt6i_protocol = RTPROT_KERNEL,
231 .rt6i_metric = ~(u32) 0,
232 .rt6i_ref = ATOMIC_INIT(1),
233 };
234
235 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
236
237 static int ip6_pkt_prohibit(struct sk_buff *skb);
238 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
239
240 static const struct rt6_info ip6_prohibit_entry_template = {
241 .dst = {
242 .__refcnt = ATOMIC_INIT(1),
243 .__use = 1,
244 .obsolete = DST_OBSOLETE_FORCE_CHK,
245 .error = -EACCES,
246 .input = ip6_pkt_prohibit,
247 .output = ip6_pkt_prohibit_out,
248 },
249 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
250 .rt6i_protocol = RTPROT_KERNEL,
251 .rt6i_metric = ~(u32) 0,
252 .rt6i_ref = ATOMIC_INIT(1),
253 };
254
255 static const struct rt6_info ip6_blk_hole_entry_template = {
256 .dst = {
257 .__refcnt = ATOMIC_INIT(1),
258 .__use = 1,
259 .obsolete = DST_OBSOLETE_FORCE_CHK,
260 .error = -EINVAL,
261 .input = dst_discard,
262 .output = dst_discard,
263 },
264 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
265 .rt6i_protocol = RTPROT_KERNEL,
266 .rt6i_metric = ~(u32) 0,
267 .rt6i_ref = ATOMIC_INIT(1),
268 };
269
270 #endif
271
272 /* allocate dst with ip6_dst_ops */
273 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274 struct net_device *dev,
275 int flags,
276 struct fib6_table *table)
277 {
278 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 0, DST_OBSOLETE_FORCE_CHK, flags);
280
281 if (rt) {
282 struct dst_entry *dst = &rt->dst;
283
284 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 rt->rt6i_genid = rt_genid(net);
287 INIT_LIST_HEAD(&rt->rt6i_siblings);
288 rt->rt6i_nsiblings = 0;
289 }
290 return rt;
291 }
292
293 static void ip6_dst_destroy(struct dst_entry *dst)
294 {
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
297 struct dst_entry *from = dst->from;
298
299 if (!(rt->dst.flags & DST_HOST))
300 dst_destroy_metrics_generic(dst);
301
302 if (idev) {
303 rt->rt6i_idev = NULL;
304 in6_dev_put(idev);
305 }
306
307 dst->from = NULL;
308 dst_release(from);
309
310 if (rt6_has_peer(rt)) {
311 struct inet_peer *peer = rt6_peer_ptr(rt);
312 inet_putpeer(peer);
313 }
314 }
315
316 void rt6_bind_peer(struct rt6_info *rt, int create)
317 {
318 struct inet_peer_base *base;
319 struct inet_peer *peer;
320
321 base = inetpeer_base_ptr(rt->_rt6i_peer);
322 if (!base)
323 return;
324
325 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
326 if (peer) {
327 if (!rt6_set_peer(rt, peer))
328 inet_putpeer(peer);
329 }
330 }
331
332 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
333 int how)
334 {
335 struct rt6_info *rt = (struct rt6_info *)dst;
336 struct inet6_dev *idev = rt->rt6i_idev;
337 struct net_device *loopback_dev =
338 dev_net(dev)->loopback_dev;
339
340 if (dev != loopback_dev) {
341 if (idev && idev->dev == dev) {
342 struct inet6_dev *loopback_idev =
343 in6_dev_get(loopback_dev);
344 if (loopback_idev) {
345 rt->rt6i_idev = loopback_idev;
346 in6_dev_put(idev);
347 }
348 }
349 }
350 }
351
352 static bool rt6_check_expired(const struct rt6_info *rt)
353 {
354 if (rt->rt6i_flags & RTF_EXPIRES) {
355 if (time_after(jiffies, rt->dst.expires))
356 return true;
357 } else if (rt->dst.from) {
358 return rt6_check_expired((struct rt6_info *) rt->dst.from);
359 }
360 return false;
361 }
362
363 static bool rt6_need_strict(const struct in6_addr *daddr)
364 {
365 return ipv6_addr_type(daddr) &
366 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
367 }
368
369 /* Multipath route selection:
370 * Hash based function using packet header and flowlabel.
371 * Adapted from fib_info_hashfn()
372 */
373 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
374 const struct flowi6 *fl6)
375 {
376 unsigned int val = fl6->flowi6_proto;
377
378 val ^= ipv6_addr_hash(&fl6->daddr);
379 val ^= ipv6_addr_hash(&fl6->saddr);
380
381 /* Work only if this not encapsulated */
382 switch (fl6->flowi6_proto) {
383 case IPPROTO_UDP:
384 case IPPROTO_TCP:
385 case IPPROTO_SCTP:
386 val ^= (__force u16)fl6->fl6_sport;
387 val ^= (__force u16)fl6->fl6_dport;
388 break;
389
390 case IPPROTO_ICMPV6:
391 val ^= (__force u16)fl6->fl6_icmp_type;
392 val ^= (__force u16)fl6->fl6_icmp_code;
393 break;
394 }
395 /* RFC6438 recommands to use flowlabel */
396 val ^= (__force u32)fl6->flowlabel;
397
398 /* Perhaps, we need to tune, this function? */
399 val = val ^ (val >> 7) ^ (val >> 12);
400 return val % candidate_count;
401 }
402
403 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
404 struct flowi6 *fl6, int oif,
405 int strict)
406 {
407 struct rt6_info *sibling, *next_sibling;
408 int route_choosen;
409
410 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
411 /* Don't change the route, if route_choosen == 0
412 * (siblings does not include ourself)
413 */
414 if (route_choosen)
415 list_for_each_entry_safe(sibling, next_sibling,
416 &match->rt6i_siblings, rt6i_siblings) {
417 route_choosen--;
418 if (route_choosen == 0) {
419 if (rt6_score_route(sibling, oif, strict) < 0)
420 break;
421 match = sibling;
422 break;
423 }
424 }
425 return match;
426 }
427
428 /*
429 * Route lookup. Any table->tb6_lock is implied.
430 */
431
432 static inline struct rt6_info *rt6_device_match(struct net *net,
433 struct rt6_info *rt,
434 const struct in6_addr *saddr,
435 int oif,
436 int flags)
437 {
438 struct rt6_info *local = NULL;
439 struct rt6_info *sprt;
440
441 if (!oif && ipv6_addr_any(saddr))
442 goto out;
443
444 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
445 struct net_device *dev = sprt->dst.dev;
446
447 if (oif) {
448 if (dev->ifindex == oif)
449 return sprt;
450 if (dev->flags & IFF_LOOPBACK) {
451 if (!sprt->rt6i_idev ||
452 sprt->rt6i_idev->dev->ifindex != oif) {
453 if (flags & RT6_LOOKUP_F_IFACE && oif)
454 continue;
455 if (local && (!oif ||
456 local->rt6i_idev->dev->ifindex == oif))
457 continue;
458 }
459 local = sprt;
460 }
461 } else {
462 if (ipv6_chk_addr(net, saddr, dev,
463 flags & RT6_LOOKUP_F_IFACE))
464 return sprt;
465 }
466 }
467
468 if (oif) {
469 if (local)
470 return local;
471
472 if (flags & RT6_LOOKUP_F_IFACE)
473 return net->ipv6.ip6_null_entry;
474 }
475 out:
476 return rt;
477 }
478
479 #ifdef CONFIG_IPV6_ROUTER_PREF
480 static void rt6_probe(struct rt6_info *rt)
481 {
482 struct neighbour *neigh;
483 /*
484 * Okay, this does not seem to be appropriate
485 * for now, however, we need to check if it
486 * is really so; aka Router Reachability Probing.
487 *
488 * Router Reachability Probe MUST be rate-limited
489 * to no more than one per minute.
490 */
491 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
492 return;
493 rcu_read_lock_bh();
494 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
495 if (neigh) {
496 write_lock(&neigh->lock);
497 if (neigh->nud_state & NUD_VALID)
498 goto out;
499 }
500
501 if (!neigh ||
502 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
503 struct in6_addr mcaddr;
504 struct in6_addr *target;
505
506 if (neigh) {
507 neigh->updated = jiffies;
508 write_unlock(&neigh->lock);
509 }
510
511 target = (struct in6_addr *)&rt->rt6i_gateway;
512 addrconf_addr_solict_mult(target, &mcaddr);
513 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
514 } else {
515 out:
516 write_unlock(&neigh->lock);
517 }
518 rcu_read_unlock_bh();
519 }
520 #else
521 static inline void rt6_probe(struct rt6_info *rt)
522 {
523 }
524 #endif
525
526 /*
527 * Default Router Selection (RFC 2461 6.3.6)
528 */
529 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
530 {
531 struct net_device *dev = rt->dst.dev;
532 if (!oif || dev->ifindex == oif)
533 return 2;
534 if ((dev->flags & IFF_LOOPBACK) &&
535 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
536 return 1;
537 return 0;
538 }
539
540 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
541 {
542 struct neighbour *neigh;
543 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
544
545 if (rt->rt6i_flags & RTF_NONEXTHOP ||
546 !(rt->rt6i_flags & RTF_GATEWAY))
547 return RT6_NUD_SUCCEED;
548
549 rcu_read_lock_bh();
550 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551 if (neigh) {
552 read_lock(&neigh->lock);
553 if (neigh->nud_state & NUD_VALID)
554 ret = RT6_NUD_SUCCEED;
555 #ifdef CONFIG_IPV6_ROUTER_PREF
556 else if (!(neigh->nud_state & NUD_FAILED))
557 ret = RT6_NUD_SUCCEED;
558 #endif
559 read_unlock(&neigh->lock);
560 } else {
561 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
562 RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
563 }
564 rcu_read_unlock_bh();
565
566 return ret;
567 }
568
569 static int rt6_score_route(struct rt6_info *rt, int oif,
570 int strict)
571 {
572 int m;
573
574 m = rt6_check_dev(rt, oif);
575 if (!m && (strict & RT6_LOOKUP_F_IFACE))
576 return RT6_NUD_FAIL_HARD;
577 #ifdef CONFIG_IPV6_ROUTER_PREF
578 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
579 #endif
580 if (strict & RT6_LOOKUP_F_REACHABLE) {
581 int n = rt6_check_neigh(rt);
582 if (n < 0)
583 return n;
584 }
585 return m;
586 }
587
588 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
589 int *mpri, struct rt6_info *match,
590 bool *do_rr)
591 {
592 int m;
593 bool match_do_rr = false;
594
595 if (rt6_check_expired(rt))
596 goto out;
597
598 m = rt6_score_route(rt, oif, strict);
599 if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
600 match_do_rr = true;
601 m = 0; /* lowest valid score */
602 } else if (m < 0) {
603 goto out;
604 }
605
606 if (strict & RT6_LOOKUP_F_REACHABLE)
607 rt6_probe(rt);
608
609 if (m > *mpri) {
610 *do_rr = match_do_rr;
611 *mpri = m;
612 match = rt;
613 }
614 out:
615 return match;
616 }
617
618 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
619 struct rt6_info *rr_head,
620 u32 metric, int oif, int strict,
621 bool *do_rr)
622 {
623 struct rt6_info *rt, *match;
624 int mpri = -1;
625
626 match = NULL;
627 for (rt = rr_head; rt && rt->rt6i_metric == metric;
628 rt = rt->dst.rt6_next)
629 match = find_match(rt, oif, strict, &mpri, match, do_rr);
630 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
631 rt = rt->dst.rt6_next)
632 match = find_match(rt, oif, strict, &mpri, match, do_rr);
633
634 return match;
635 }
636
637 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
638 {
639 struct rt6_info *match, *rt0;
640 struct net *net;
641 bool do_rr = false;
642
643 rt0 = fn->rr_ptr;
644 if (!rt0)
645 fn->rr_ptr = rt0 = fn->leaf;
646
647 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
648 &do_rr);
649
650 if (do_rr) {
651 struct rt6_info *next = rt0->dst.rt6_next;
652
653 /* no entries matched; do round-robin */
654 if (!next || next->rt6i_metric != rt0->rt6i_metric)
655 next = fn->leaf;
656
657 if (next != rt0)
658 fn->rr_ptr = next;
659 }
660
661 net = dev_net(rt0->dst.dev);
662 return match ? match : net->ipv6.ip6_null_entry;
663 }
664
665 #ifdef CONFIG_IPV6_ROUTE_INFO
666 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
667 const struct in6_addr *gwaddr)
668 {
669 struct net *net = dev_net(dev);
670 struct route_info *rinfo = (struct route_info *) opt;
671 struct in6_addr prefix_buf, *prefix;
672 unsigned int pref;
673 unsigned long lifetime;
674 struct rt6_info *rt;
675
676 if (len < sizeof(struct route_info)) {
677 return -EINVAL;
678 }
679
680 /* Sanity check for prefix_len and length */
681 if (rinfo->length > 3) {
682 return -EINVAL;
683 } else if (rinfo->prefix_len > 128) {
684 return -EINVAL;
685 } else if (rinfo->prefix_len > 64) {
686 if (rinfo->length < 2) {
687 return -EINVAL;
688 }
689 } else if (rinfo->prefix_len > 0) {
690 if (rinfo->length < 1) {
691 return -EINVAL;
692 }
693 }
694
695 pref = rinfo->route_pref;
696 if (pref == ICMPV6_ROUTER_PREF_INVALID)
697 return -EINVAL;
698
699 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
700
701 if (rinfo->length == 3)
702 prefix = (struct in6_addr *)rinfo->prefix;
703 else {
704 /* this function is safe */
705 ipv6_addr_prefix(&prefix_buf,
706 (struct in6_addr *)rinfo->prefix,
707 rinfo->prefix_len);
708 prefix = &prefix_buf;
709 }
710
711 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
712 dev->ifindex);
713
714 if (rt && !lifetime) {
715 ip6_del_rt(rt);
716 rt = NULL;
717 }
718
719 if (!rt && lifetime)
720 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
721 pref);
722 else if (rt)
723 rt->rt6i_flags = RTF_ROUTEINFO |
724 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
725
726 if (rt) {
727 if (!addrconf_finite_timeout(lifetime))
728 rt6_clean_expires(rt);
729 else
730 rt6_set_expires(rt, jiffies + HZ * lifetime);
731
732 ip6_rt_put(rt);
733 }
734 return 0;
735 }
736 #endif
737
738 #define BACKTRACK(__net, saddr) \
739 do { \
740 if (rt == __net->ipv6.ip6_null_entry) { \
741 struct fib6_node *pn; \
742 while (1) { \
743 if (fn->fn_flags & RTN_TL_ROOT) \
744 goto out; \
745 pn = fn->parent; \
746 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
747 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
748 else \
749 fn = pn; \
750 if (fn->fn_flags & RTN_RTINFO) \
751 goto restart; \
752 } \
753 } \
754 } while (0)
755
756 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
757 struct fib6_table *table,
758 struct flowi6 *fl6, int flags)
759 {
760 struct fib6_node *fn;
761 struct rt6_info *rt;
762
763 read_lock_bh(&table->tb6_lock);
764 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
765 restart:
766 rt = fn->leaf;
767 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
768 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
769 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
770 BACKTRACK(net, &fl6->saddr);
771 out:
772 dst_use(&rt->dst, jiffies);
773 read_unlock_bh(&table->tb6_lock);
774 return rt;
775
776 }
777
778 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
779 int flags)
780 {
781 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
782 }
783 EXPORT_SYMBOL_GPL(ip6_route_lookup);
784
785 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
786 const struct in6_addr *saddr, int oif, int strict)
787 {
788 struct flowi6 fl6 = {
789 .flowi6_oif = oif,
790 .daddr = *daddr,
791 };
792 struct dst_entry *dst;
793 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
794
795 if (saddr) {
796 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
797 flags |= RT6_LOOKUP_F_HAS_SADDR;
798 }
799
800 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
801 if (dst->error == 0)
802 return (struct rt6_info *) dst;
803
804 dst_release(dst);
805
806 return NULL;
807 }
808
809 EXPORT_SYMBOL(rt6_lookup);
810
811 /* ip6_ins_rt is called with FREE table->tb6_lock.
812 It takes new route entry, the addition fails by any reason the
813 route is freed. In any case, if caller does not hold it, it may
814 be destroyed.
815 */
816
817 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
818 {
819 int err;
820 struct fib6_table *table;
821
822 table = rt->rt6i_table;
823 write_lock_bh(&table->tb6_lock);
824 err = fib6_add(&table->tb6_root, rt, info);
825 write_unlock_bh(&table->tb6_lock);
826
827 return err;
828 }
829
830 int ip6_ins_rt(struct rt6_info *rt)
831 {
832 struct nl_info info = {
833 .nl_net = dev_net(rt->dst.dev),
834 };
835 return __ip6_ins_rt(rt, &info);
836 }
837
838 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
839 const struct in6_addr *daddr,
840 const struct in6_addr *saddr)
841 {
842 struct rt6_info *rt;
843
844 /*
845 * Clone the route.
846 */
847
848 rt = ip6_rt_copy(ort, daddr);
849
850 if (rt) {
851 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
852 if (ort->rt6i_dst.plen != 128 &&
853 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
854 rt->rt6i_flags |= RTF_ANYCAST;
855 rt->rt6i_gateway = *daddr;
856 }
857
858 rt->rt6i_flags |= RTF_CACHE;
859
860 #ifdef CONFIG_IPV6_SUBTREES
861 if (rt->rt6i_src.plen && saddr) {
862 rt->rt6i_src.addr = *saddr;
863 rt->rt6i_src.plen = 128;
864 }
865 #endif
866 }
867
868 return rt;
869 }
870
871 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
872 const struct in6_addr *daddr)
873 {
874 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
875
876 if (rt)
877 rt->rt6i_flags |= RTF_CACHE;
878 return rt;
879 }
880
881 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
882 struct flowi6 *fl6, int flags)
883 {
884 struct fib6_node *fn;
885 struct rt6_info *rt, *nrt;
886 int strict = 0;
887 int attempts = 3;
888 int err;
889 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
890
891 strict |= flags & RT6_LOOKUP_F_IFACE;
892
893 relookup:
894 read_lock_bh(&table->tb6_lock);
895
896 restart_2:
897 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
898
899 restart:
900 rt = rt6_select(fn, oif, strict | reachable);
901 if (rt->rt6i_nsiblings)
902 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
903 BACKTRACK(net, &fl6->saddr);
904 if (rt == net->ipv6.ip6_null_entry ||
905 rt->rt6i_flags & RTF_CACHE)
906 goto out;
907
908 dst_hold(&rt->dst);
909 read_unlock_bh(&table->tb6_lock);
910
911 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
912 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
913 else if (!(rt->dst.flags & DST_HOST))
914 nrt = rt6_alloc_clone(rt, &fl6->daddr);
915 else
916 goto out2;
917
918 ip6_rt_put(rt);
919 rt = nrt ? : net->ipv6.ip6_null_entry;
920
921 dst_hold(&rt->dst);
922 if (nrt) {
923 err = ip6_ins_rt(nrt);
924 if (!err)
925 goto out2;
926 }
927
928 if (--attempts <= 0)
929 goto out2;
930
931 /*
932 * Race condition! In the gap, when table->tb6_lock was
933 * released someone could insert this route. Relookup.
934 */
935 ip6_rt_put(rt);
936 goto relookup;
937
938 out:
939 if (reachable) {
940 reachable = 0;
941 goto restart_2;
942 }
943 dst_hold(&rt->dst);
944 read_unlock_bh(&table->tb6_lock);
945 out2:
946 rt->dst.lastuse = jiffies;
947 rt->dst.__use++;
948
949 return rt;
950 }
951
952 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
953 struct flowi6 *fl6, int flags)
954 {
955 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
956 }
957
958 static struct dst_entry *ip6_route_input_lookup(struct net *net,
959 struct net_device *dev,
960 struct flowi6 *fl6, int flags)
961 {
962 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
963 flags |= RT6_LOOKUP_F_IFACE;
964
965 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
966 }
967
968 void ip6_route_input(struct sk_buff *skb)
969 {
970 const struct ipv6hdr *iph = ipv6_hdr(skb);
971 struct net *net = dev_net(skb->dev);
972 int flags = RT6_LOOKUP_F_HAS_SADDR;
973 struct flowi6 fl6 = {
974 .flowi6_iif = skb->dev->ifindex,
975 .daddr = iph->daddr,
976 .saddr = iph->saddr,
977 .flowlabel = ip6_flowinfo(iph),
978 .flowi6_mark = skb->mark,
979 .flowi6_proto = iph->nexthdr,
980 };
981
982 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
983 }
984
985 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
986 struct flowi6 *fl6, int flags)
987 {
988 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
989 }
990
991 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
992 struct flowi6 *fl6)
993 {
994 int flags = 0;
995
996 fl6->flowi6_iif = LOOPBACK_IFINDEX;
997
998 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
999 flags |= RT6_LOOKUP_F_IFACE;
1000
1001 if (!ipv6_addr_any(&fl6->saddr))
1002 flags |= RT6_LOOKUP_F_HAS_SADDR;
1003 else if (sk)
1004 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1005
1006 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1007 }
1008
1009 EXPORT_SYMBOL(ip6_route_output);
1010
1011 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1012 {
1013 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1014 struct dst_entry *new = NULL;
1015
1016 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1017 if (rt) {
1018 new = &rt->dst;
1019
1020 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1021 rt6_init_peer(rt, net->ipv6.peers);
1022
1023 new->__use = 1;
1024 new->input = dst_discard;
1025 new->output = dst_discard;
1026
1027 if (dst_metrics_read_only(&ort->dst))
1028 new->_metrics = ort->dst._metrics;
1029 else
1030 dst_copy_metrics(new, &ort->dst);
1031 rt->rt6i_idev = ort->rt6i_idev;
1032 if (rt->rt6i_idev)
1033 in6_dev_hold(rt->rt6i_idev);
1034
1035 rt->rt6i_gateway = ort->rt6i_gateway;
1036 rt->rt6i_flags = ort->rt6i_flags;
1037 rt->rt6i_metric = 0;
1038
1039 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1040 #ifdef CONFIG_IPV6_SUBTREES
1041 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1042 #endif
1043
1044 dst_free(new);
1045 }
1046
1047 dst_release(dst_orig);
1048 return new ? new : ERR_PTR(-ENOMEM);
1049 }
1050
1051 /*
1052 * Destination cache support functions
1053 */
1054
1055 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1056 {
1057 struct rt6_info *rt;
1058
1059 rt = (struct rt6_info *) dst;
1060
1061 /* All IPV6 dsts are created with ->obsolete set to the value
1062 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1063 * into this function always.
1064 */
1065 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1066 return NULL;
1067
1068 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1069 return dst;
1070
1071 return NULL;
1072 }
1073
1074 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1075 {
1076 struct rt6_info *rt = (struct rt6_info *) dst;
1077
1078 if (rt) {
1079 if (rt->rt6i_flags & RTF_CACHE) {
1080 if (rt6_check_expired(rt)) {
1081 ip6_del_rt(rt);
1082 dst = NULL;
1083 }
1084 } else {
1085 dst_release(dst);
1086 dst = NULL;
1087 }
1088 }
1089 return dst;
1090 }
1091
1092 static void ip6_link_failure(struct sk_buff *skb)
1093 {
1094 struct rt6_info *rt;
1095
1096 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1097
1098 rt = (struct rt6_info *) skb_dst(skb);
1099 if (rt) {
1100 if (rt->rt6i_flags & RTF_CACHE) {
1101 dst_hold(&rt->dst);
1102 if (ip6_del_rt(rt))
1103 dst_free(&rt->dst);
1104 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1105 rt->rt6i_node->fn_sernum = -1;
1106 }
1107 }
1108 }
1109
1110 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1111 struct sk_buff *skb, u32 mtu)
1112 {
1113 struct rt6_info *rt6 = (struct rt6_info*)dst;
1114
1115 dst_confirm(dst);
1116 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1117 struct net *net = dev_net(dst->dev);
1118
1119 rt6->rt6i_flags |= RTF_MODIFIED;
1120 if (mtu < IPV6_MIN_MTU) {
1121 u32 features = dst_metric(dst, RTAX_FEATURES);
1122 mtu = IPV6_MIN_MTU;
1123 features |= RTAX_FEATURE_ALLFRAG;
1124 dst_metric_set(dst, RTAX_FEATURES, features);
1125 }
1126 dst_metric_set(dst, RTAX_MTU, mtu);
1127 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1128 }
1129 }
1130
1131 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1132 int oif, u32 mark)
1133 {
1134 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1135 struct dst_entry *dst;
1136 struct flowi6 fl6;
1137
1138 memset(&fl6, 0, sizeof(fl6));
1139 fl6.flowi6_oif = oif;
1140 fl6.flowi6_mark = mark;
1141 fl6.flowi6_flags = 0;
1142 fl6.daddr = iph->daddr;
1143 fl6.saddr = iph->saddr;
1144 fl6.flowlabel = ip6_flowinfo(iph);
1145
1146 dst = ip6_route_output(net, NULL, &fl6);
1147 if (!dst->error)
1148 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1149 dst_release(dst);
1150 }
1151 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1152
1153 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1154 {
1155 ip6_update_pmtu(skb, sock_net(sk), mtu,
1156 sk->sk_bound_dev_if, sk->sk_mark);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1159
1160 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1161 {
1162 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1163 struct dst_entry *dst;
1164 struct flowi6 fl6;
1165
1166 memset(&fl6, 0, sizeof(fl6));
1167 fl6.flowi6_oif = oif;
1168 fl6.flowi6_mark = mark;
1169 fl6.flowi6_flags = 0;
1170 fl6.daddr = iph->daddr;
1171 fl6.saddr = iph->saddr;
1172 fl6.flowlabel = ip6_flowinfo(iph);
1173
1174 dst = ip6_route_output(net, NULL, &fl6);
1175 if (!dst->error)
1176 rt6_do_redirect(dst, NULL, skb);
1177 dst_release(dst);
1178 }
1179 EXPORT_SYMBOL_GPL(ip6_redirect);
1180
1181 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1182 {
1183 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1184 }
1185 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1186
1187 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1188 {
1189 struct net_device *dev = dst->dev;
1190 unsigned int mtu = dst_mtu(dst);
1191 struct net *net = dev_net(dev);
1192
1193 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1194
1195 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1196 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1197
1198 /*
1199 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1200 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1201 * IPV6_MAXPLEN is also valid and means: "any MSS,
1202 * rely only on pmtu discovery"
1203 */
1204 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1205 mtu = IPV6_MAXPLEN;
1206 return mtu;
1207 }
1208
1209 static unsigned int ip6_mtu(const struct dst_entry *dst)
1210 {
1211 struct inet6_dev *idev;
1212 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1213
1214 if (mtu)
1215 return mtu;
1216
1217 mtu = IPV6_MIN_MTU;
1218
1219 rcu_read_lock();
1220 idev = __in6_dev_get(dst->dev);
1221 if (idev)
1222 mtu = idev->cnf.mtu6;
1223 rcu_read_unlock();
1224
1225 return mtu;
1226 }
1227
1228 static struct dst_entry *icmp6_dst_gc_list;
1229 static DEFINE_SPINLOCK(icmp6_dst_lock);
1230
1231 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1232 struct flowi6 *fl6)
1233 {
1234 struct dst_entry *dst;
1235 struct rt6_info *rt;
1236 struct inet6_dev *idev = in6_dev_get(dev);
1237 struct net *net = dev_net(dev);
1238
1239 if (unlikely(!idev))
1240 return ERR_PTR(-ENODEV);
1241
1242 rt = ip6_dst_alloc(net, dev, 0, NULL);
1243 if (unlikely(!rt)) {
1244 in6_dev_put(idev);
1245 dst = ERR_PTR(-ENOMEM);
1246 goto out;
1247 }
1248
1249 rt->dst.flags |= DST_HOST;
1250 rt->dst.output = ip6_output;
1251 atomic_set(&rt->dst.__refcnt, 1);
1252 rt->rt6i_dst.addr = fl6->daddr;
1253 rt->rt6i_dst.plen = 128;
1254 rt->rt6i_idev = idev;
1255 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1256
1257 spin_lock_bh(&icmp6_dst_lock);
1258 rt->dst.next = icmp6_dst_gc_list;
1259 icmp6_dst_gc_list = &rt->dst;
1260 spin_unlock_bh(&icmp6_dst_lock);
1261
1262 fib6_force_start_gc(net);
1263
1264 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1265
1266 out:
1267 return dst;
1268 }
1269
1270 int icmp6_dst_gc(void)
1271 {
1272 struct dst_entry *dst, **pprev;
1273 int more = 0;
1274
1275 spin_lock_bh(&icmp6_dst_lock);
1276 pprev = &icmp6_dst_gc_list;
1277
1278 while ((dst = *pprev) != NULL) {
1279 if (!atomic_read(&dst->__refcnt)) {
1280 *pprev = dst->next;
1281 dst_free(dst);
1282 } else {
1283 pprev = &dst->next;
1284 ++more;
1285 }
1286 }
1287
1288 spin_unlock_bh(&icmp6_dst_lock);
1289
1290 return more;
1291 }
1292
1293 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1294 void *arg)
1295 {
1296 struct dst_entry *dst, **pprev;
1297
1298 spin_lock_bh(&icmp6_dst_lock);
1299 pprev = &icmp6_dst_gc_list;
1300 while ((dst = *pprev) != NULL) {
1301 struct rt6_info *rt = (struct rt6_info *) dst;
1302 if (func(rt, arg)) {
1303 *pprev = dst->next;
1304 dst_free(dst);
1305 } else {
1306 pprev = &dst->next;
1307 }
1308 }
1309 spin_unlock_bh(&icmp6_dst_lock);
1310 }
1311
1312 static int ip6_dst_gc(struct dst_ops *ops)
1313 {
1314 unsigned long now = jiffies;
1315 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1316 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1317 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1318 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1319 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1320 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1321 int entries;
1322
1323 entries = dst_entries_get_fast(ops);
1324 if (time_after(rt_last_gc + rt_min_interval, now) &&
1325 entries <= rt_max_size)
1326 goto out;
1327
1328 net->ipv6.ip6_rt_gc_expire++;
1329 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1330 net->ipv6.ip6_rt_last_gc = now;
1331 entries = dst_entries_get_slow(ops);
1332 if (entries < ops->gc_thresh)
1333 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1334 out:
1335 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1336 return entries > rt_max_size;
1337 }
1338
1339 int ip6_dst_hoplimit(struct dst_entry *dst)
1340 {
1341 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1342 if (hoplimit == 0) {
1343 struct net_device *dev = dst->dev;
1344 struct inet6_dev *idev;
1345
1346 rcu_read_lock();
1347 idev = __in6_dev_get(dev);
1348 if (idev)
1349 hoplimit = idev->cnf.hop_limit;
1350 else
1351 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1352 rcu_read_unlock();
1353 }
1354 return hoplimit;
1355 }
1356 EXPORT_SYMBOL(ip6_dst_hoplimit);
1357
1358 /*
1359 *
1360 */
1361
1362 int ip6_route_add(struct fib6_config *cfg)
1363 {
1364 int err;
1365 struct net *net = cfg->fc_nlinfo.nl_net;
1366 struct rt6_info *rt = NULL;
1367 struct net_device *dev = NULL;
1368 struct inet6_dev *idev = NULL;
1369 struct fib6_table *table;
1370 int addr_type;
1371
1372 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1373 return -EINVAL;
1374 #ifndef CONFIG_IPV6_SUBTREES
1375 if (cfg->fc_src_len)
1376 return -EINVAL;
1377 #endif
1378 if (cfg->fc_ifindex) {
1379 err = -ENODEV;
1380 dev = dev_get_by_index(net, cfg->fc_ifindex);
1381 if (!dev)
1382 goto out;
1383 idev = in6_dev_get(dev);
1384 if (!idev)
1385 goto out;
1386 }
1387
1388 if (cfg->fc_metric == 0)
1389 cfg->fc_metric = IP6_RT_PRIO_USER;
1390
1391 err = -ENOBUFS;
1392 if (cfg->fc_nlinfo.nlh &&
1393 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1394 table = fib6_get_table(net, cfg->fc_table);
1395 if (!table) {
1396 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1397 table = fib6_new_table(net, cfg->fc_table);
1398 }
1399 } else {
1400 table = fib6_new_table(net, cfg->fc_table);
1401 }
1402
1403 if (!table)
1404 goto out;
1405
1406 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1407
1408 if (!rt) {
1409 err = -ENOMEM;
1410 goto out;
1411 }
1412
1413 if (cfg->fc_flags & RTF_EXPIRES)
1414 rt6_set_expires(rt, jiffies +
1415 clock_t_to_jiffies(cfg->fc_expires));
1416 else
1417 rt6_clean_expires(rt);
1418
1419 if (cfg->fc_protocol == RTPROT_UNSPEC)
1420 cfg->fc_protocol = RTPROT_BOOT;
1421 rt->rt6i_protocol = cfg->fc_protocol;
1422
1423 addr_type = ipv6_addr_type(&cfg->fc_dst);
1424
1425 if (addr_type & IPV6_ADDR_MULTICAST)
1426 rt->dst.input = ip6_mc_input;
1427 else if (cfg->fc_flags & RTF_LOCAL)
1428 rt->dst.input = ip6_input;
1429 else
1430 rt->dst.input = ip6_forward;
1431
1432 rt->dst.output = ip6_output;
1433
1434 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1435 rt->rt6i_dst.plen = cfg->fc_dst_len;
1436 if (rt->rt6i_dst.plen == 128)
1437 rt->dst.flags |= DST_HOST;
1438
1439 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1440 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1441 if (!metrics) {
1442 err = -ENOMEM;
1443 goto out;
1444 }
1445 dst_init_metrics(&rt->dst, metrics, 0);
1446 }
1447 #ifdef CONFIG_IPV6_SUBTREES
1448 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1449 rt->rt6i_src.plen = cfg->fc_src_len;
1450 #endif
1451
1452 rt->rt6i_metric = cfg->fc_metric;
1453
1454 /* We cannot add true routes via loopback here,
1455 they would result in kernel looping; promote them to reject routes
1456 */
1457 if ((cfg->fc_flags & RTF_REJECT) ||
1458 (dev && (dev->flags & IFF_LOOPBACK) &&
1459 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1460 !(cfg->fc_flags & RTF_LOCAL))) {
1461 /* hold loopback dev/idev if we haven't done so. */
1462 if (dev != net->loopback_dev) {
1463 if (dev) {
1464 dev_put(dev);
1465 in6_dev_put(idev);
1466 }
1467 dev = net->loopback_dev;
1468 dev_hold(dev);
1469 idev = in6_dev_get(dev);
1470 if (!idev) {
1471 err = -ENODEV;
1472 goto out;
1473 }
1474 }
1475 rt->dst.output = ip6_pkt_discard_out;
1476 rt->dst.input = ip6_pkt_discard;
1477 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1478 switch (cfg->fc_type) {
1479 case RTN_BLACKHOLE:
1480 rt->dst.error = -EINVAL;
1481 break;
1482 case RTN_PROHIBIT:
1483 rt->dst.error = -EACCES;
1484 break;
1485 case RTN_THROW:
1486 rt->dst.error = -EAGAIN;
1487 break;
1488 default:
1489 rt->dst.error = -ENETUNREACH;
1490 break;
1491 }
1492 goto install_route;
1493 }
1494
1495 if (cfg->fc_flags & RTF_GATEWAY) {
1496 const struct in6_addr *gw_addr;
1497 int gwa_type;
1498
1499 gw_addr = &cfg->fc_gateway;
1500 rt->rt6i_gateway = *gw_addr;
1501 gwa_type = ipv6_addr_type(gw_addr);
1502
1503 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1504 struct rt6_info *grt;
1505
1506 /* IPv6 strictly inhibits using not link-local
1507 addresses as nexthop address.
1508 Otherwise, router will not able to send redirects.
1509 It is very good, but in some (rare!) circumstances
1510 (SIT, PtP, NBMA NOARP links) it is handy to allow
1511 some exceptions. --ANK
1512 */
1513 err = -EINVAL;
1514 if (!(gwa_type & IPV6_ADDR_UNICAST))
1515 goto out;
1516
1517 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1518
1519 err = -EHOSTUNREACH;
1520 if (!grt)
1521 goto out;
1522 if (dev) {
1523 if (dev != grt->dst.dev) {
1524 ip6_rt_put(grt);
1525 goto out;
1526 }
1527 } else {
1528 dev = grt->dst.dev;
1529 idev = grt->rt6i_idev;
1530 dev_hold(dev);
1531 in6_dev_hold(grt->rt6i_idev);
1532 }
1533 if (!(grt->rt6i_flags & RTF_GATEWAY))
1534 err = 0;
1535 ip6_rt_put(grt);
1536
1537 if (err)
1538 goto out;
1539 }
1540 err = -EINVAL;
1541 if (!dev || (dev->flags & IFF_LOOPBACK))
1542 goto out;
1543 }
1544
1545 err = -ENODEV;
1546 if (!dev)
1547 goto out;
1548
1549 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1550 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1551 err = -EINVAL;
1552 goto out;
1553 }
1554 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1555 rt->rt6i_prefsrc.plen = 128;
1556 } else
1557 rt->rt6i_prefsrc.plen = 0;
1558
1559 rt->rt6i_flags = cfg->fc_flags;
1560
1561 install_route:
1562 if (cfg->fc_mx) {
1563 struct nlattr *nla;
1564 int remaining;
1565
1566 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1567 int type = nla_type(nla);
1568
1569 if (type) {
1570 if (type > RTAX_MAX) {
1571 err = -EINVAL;
1572 goto out;
1573 }
1574
1575 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1576 }
1577 }
1578 }
1579
1580 rt->dst.dev = dev;
1581 rt->rt6i_idev = idev;
1582 rt->rt6i_table = table;
1583
1584 cfg->fc_nlinfo.nl_net = dev_net(dev);
1585
1586 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1587
1588 out:
1589 if (dev)
1590 dev_put(dev);
1591 if (idev)
1592 in6_dev_put(idev);
1593 if (rt)
1594 dst_free(&rt->dst);
1595 return err;
1596 }
1597
1598 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1599 {
1600 int err;
1601 struct fib6_table *table;
1602 struct net *net = dev_net(rt->dst.dev);
1603
1604 if (rt == net->ipv6.ip6_null_entry) {
1605 err = -ENOENT;
1606 goto out;
1607 }
1608
1609 table = rt->rt6i_table;
1610 write_lock_bh(&table->tb6_lock);
1611 err = fib6_del(rt, info);
1612 write_unlock_bh(&table->tb6_lock);
1613
1614 out:
1615 ip6_rt_put(rt);
1616 return err;
1617 }
1618
1619 int ip6_del_rt(struct rt6_info *rt)
1620 {
1621 struct nl_info info = {
1622 .nl_net = dev_net(rt->dst.dev),
1623 };
1624 return __ip6_del_rt(rt, &info);
1625 }
1626
1627 static int ip6_route_del(struct fib6_config *cfg)
1628 {
1629 struct fib6_table *table;
1630 struct fib6_node *fn;
1631 struct rt6_info *rt;
1632 int err = -ESRCH;
1633
1634 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1635 if (!table)
1636 return err;
1637
1638 read_lock_bh(&table->tb6_lock);
1639
1640 fn = fib6_locate(&table->tb6_root,
1641 &cfg->fc_dst, cfg->fc_dst_len,
1642 &cfg->fc_src, cfg->fc_src_len);
1643
1644 if (fn) {
1645 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1646 if (cfg->fc_ifindex &&
1647 (!rt->dst.dev ||
1648 rt->dst.dev->ifindex != cfg->fc_ifindex))
1649 continue;
1650 if (cfg->fc_flags & RTF_GATEWAY &&
1651 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1652 continue;
1653 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1654 continue;
1655 dst_hold(&rt->dst);
1656 read_unlock_bh(&table->tb6_lock);
1657
1658 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1659 }
1660 }
1661 read_unlock_bh(&table->tb6_lock);
1662
1663 return err;
1664 }
1665
1666 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1667 {
1668 struct net *net = dev_net(skb->dev);
1669 struct netevent_redirect netevent;
1670 struct rt6_info *rt, *nrt = NULL;
1671 struct ndisc_options ndopts;
1672 struct inet6_dev *in6_dev;
1673 struct neighbour *neigh;
1674 struct rd_msg *msg;
1675 int optlen, on_link;
1676 u8 *lladdr;
1677
1678 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1679 optlen -= sizeof(*msg);
1680
1681 if (optlen < 0) {
1682 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1683 return;
1684 }
1685
1686 msg = (struct rd_msg *)icmp6_hdr(skb);
1687
1688 if (ipv6_addr_is_multicast(&msg->dest)) {
1689 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1690 return;
1691 }
1692
1693 on_link = 0;
1694 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1695 on_link = 1;
1696 } else if (ipv6_addr_type(&msg->target) !=
1697 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1698 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1699 return;
1700 }
1701
1702 in6_dev = __in6_dev_get(skb->dev);
1703 if (!in6_dev)
1704 return;
1705 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1706 return;
1707
1708 /* RFC2461 8.1:
1709 * The IP source address of the Redirect MUST be the same as the current
1710 * first-hop router for the specified ICMP Destination Address.
1711 */
1712
1713 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1714 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1715 return;
1716 }
1717
1718 lladdr = NULL;
1719 if (ndopts.nd_opts_tgt_lladdr) {
1720 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1721 skb->dev);
1722 if (!lladdr) {
1723 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1724 return;
1725 }
1726 }
1727
1728 rt = (struct rt6_info *) dst;
1729 if (rt == net->ipv6.ip6_null_entry) {
1730 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1731 return;
1732 }
1733
1734 /* Redirect received -> path was valid.
1735 * Look, redirects are sent only in response to data packets,
1736 * so that this nexthop apparently is reachable. --ANK
1737 */
1738 dst_confirm(&rt->dst);
1739
1740 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1741 if (!neigh)
1742 return;
1743
1744 /*
1745 * We have finally decided to accept it.
1746 */
1747
1748 neigh_update(neigh, lladdr, NUD_STALE,
1749 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1750 NEIGH_UPDATE_F_OVERRIDE|
1751 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1752 NEIGH_UPDATE_F_ISROUTER))
1753 );
1754
1755 nrt = ip6_rt_copy(rt, &msg->dest);
1756 if (!nrt)
1757 goto out;
1758
1759 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1760 if (on_link)
1761 nrt->rt6i_flags &= ~RTF_GATEWAY;
1762
1763 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1764
1765 if (ip6_ins_rt(nrt))
1766 goto out;
1767
1768 netevent.old = &rt->dst;
1769 netevent.new = &nrt->dst;
1770 netevent.daddr = &msg->dest;
1771 netevent.neigh = neigh;
1772 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1773
1774 if (rt->rt6i_flags & RTF_CACHE) {
1775 rt = (struct rt6_info *) dst_clone(&rt->dst);
1776 ip6_del_rt(rt);
1777 }
1778
1779 out:
1780 neigh_release(neigh);
1781 }
1782
1783 /*
1784 * Misc support functions
1785 */
1786
1787 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1788 const struct in6_addr *dest)
1789 {
1790 struct net *net = dev_net(ort->dst.dev);
1791 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1792 ort->rt6i_table);
1793
1794 if (rt) {
1795 rt->dst.input = ort->dst.input;
1796 rt->dst.output = ort->dst.output;
1797 rt->dst.flags |= DST_HOST;
1798
1799 rt->rt6i_dst.addr = *dest;
1800 rt->rt6i_dst.plen = 128;
1801 dst_copy_metrics(&rt->dst, &ort->dst);
1802 rt->dst.error = ort->dst.error;
1803 rt->rt6i_idev = ort->rt6i_idev;
1804 if (rt->rt6i_idev)
1805 in6_dev_hold(rt->rt6i_idev);
1806 rt->dst.lastuse = jiffies;
1807
1808 rt->rt6i_gateway = ort->rt6i_gateway;
1809 rt->rt6i_flags = ort->rt6i_flags;
1810 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1811 (RTF_DEFAULT | RTF_ADDRCONF))
1812 rt6_set_from(rt, ort);
1813 rt->rt6i_metric = 0;
1814
1815 #ifdef CONFIG_IPV6_SUBTREES
1816 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1817 #endif
1818 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1819 rt->rt6i_table = ort->rt6i_table;
1820 }
1821 return rt;
1822 }
1823
1824 #ifdef CONFIG_IPV6_ROUTE_INFO
1825 static struct rt6_info *rt6_get_route_info(struct net *net,
1826 const struct in6_addr *prefix, int prefixlen,
1827 const struct in6_addr *gwaddr, int ifindex)
1828 {
1829 struct fib6_node *fn;
1830 struct rt6_info *rt = NULL;
1831 struct fib6_table *table;
1832
1833 table = fib6_get_table(net, RT6_TABLE_INFO);
1834 if (!table)
1835 return NULL;
1836
1837 read_lock_bh(&table->tb6_lock);
1838 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1839 if (!fn)
1840 goto out;
1841
1842 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1843 if (rt->dst.dev->ifindex != ifindex)
1844 continue;
1845 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1846 continue;
1847 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1848 continue;
1849 dst_hold(&rt->dst);
1850 break;
1851 }
1852 out:
1853 read_unlock_bh(&table->tb6_lock);
1854 return rt;
1855 }
1856
1857 static struct rt6_info *rt6_add_route_info(struct net *net,
1858 const struct in6_addr *prefix, int prefixlen,
1859 const struct in6_addr *gwaddr, int ifindex,
1860 unsigned int pref)
1861 {
1862 struct fib6_config cfg = {
1863 .fc_table = RT6_TABLE_INFO,
1864 .fc_metric = IP6_RT_PRIO_USER,
1865 .fc_ifindex = ifindex,
1866 .fc_dst_len = prefixlen,
1867 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1868 RTF_UP | RTF_PREF(pref),
1869 .fc_nlinfo.portid = 0,
1870 .fc_nlinfo.nlh = NULL,
1871 .fc_nlinfo.nl_net = net,
1872 };
1873
1874 cfg.fc_dst = *prefix;
1875 cfg.fc_gateway = *gwaddr;
1876
1877 /* We should treat it as a default route if prefix length is 0. */
1878 if (!prefixlen)
1879 cfg.fc_flags |= RTF_DEFAULT;
1880
1881 ip6_route_add(&cfg);
1882
1883 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1884 }
1885 #endif
1886
1887 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1888 {
1889 struct rt6_info *rt;
1890 struct fib6_table *table;
1891
1892 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1893 if (!table)
1894 return NULL;
1895
1896 read_lock_bh(&table->tb6_lock);
1897 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1898 if (dev == rt->dst.dev &&
1899 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1900 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1901 break;
1902 }
1903 if (rt)
1904 dst_hold(&rt->dst);
1905 read_unlock_bh(&table->tb6_lock);
1906 return rt;
1907 }
1908
1909 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1910 struct net_device *dev,
1911 unsigned int pref)
1912 {
1913 struct fib6_config cfg = {
1914 .fc_table = RT6_TABLE_DFLT,
1915 .fc_metric = IP6_RT_PRIO_USER,
1916 .fc_ifindex = dev->ifindex,
1917 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1918 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1919 .fc_nlinfo.portid = 0,
1920 .fc_nlinfo.nlh = NULL,
1921 .fc_nlinfo.nl_net = dev_net(dev),
1922 };
1923
1924 cfg.fc_gateway = *gwaddr;
1925
1926 ip6_route_add(&cfg);
1927
1928 return rt6_get_dflt_router(gwaddr, dev);
1929 }
1930
1931 void rt6_purge_dflt_routers(struct net *net)
1932 {
1933 struct rt6_info *rt;
1934 struct fib6_table *table;
1935
1936 /* NOTE: Keep consistent with rt6_get_dflt_router */
1937 table = fib6_get_table(net, RT6_TABLE_DFLT);
1938 if (!table)
1939 return;
1940
1941 restart:
1942 read_lock_bh(&table->tb6_lock);
1943 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1944 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1945 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1946 dst_hold(&rt->dst);
1947 read_unlock_bh(&table->tb6_lock);
1948 ip6_del_rt(rt);
1949 goto restart;
1950 }
1951 }
1952 read_unlock_bh(&table->tb6_lock);
1953 }
1954
1955 static void rtmsg_to_fib6_config(struct net *net,
1956 struct in6_rtmsg *rtmsg,
1957 struct fib6_config *cfg)
1958 {
1959 memset(cfg, 0, sizeof(*cfg));
1960
1961 cfg->fc_table = RT6_TABLE_MAIN;
1962 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1963 cfg->fc_metric = rtmsg->rtmsg_metric;
1964 cfg->fc_expires = rtmsg->rtmsg_info;
1965 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1966 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1967 cfg->fc_flags = rtmsg->rtmsg_flags;
1968
1969 cfg->fc_nlinfo.nl_net = net;
1970
1971 cfg->fc_dst = rtmsg->rtmsg_dst;
1972 cfg->fc_src = rtmsg->rtmsg_src;
1973 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1974 }
1975
1976 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1977 {
1978 struct fib6_config cfg;
1979 struct in6_rtmsg rtmsg;
1980 int err;
1981
1982 switch(cmd) {
1983 case SIOCADDRT: /* Add a route */
1984 case SIOCDELRT: /* Delete a route */
1985 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1986 return -EPERM;
1987 err = copy_from_user(&rtmsg, arg,
1988 sizeof(struct in6_rtmsg));
1989 if (err)
1990 return -EFAULT;
1991
1992 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1993
1994 rtnl_lock();
1995 switch (cmd) {
1996 case SIOCADDRT:
1997 err = ip6_route_add(&cfg);
1998 break;
1999 case SIOCDELRT:
2000 err = ip6_route_del(&cfg);
2001 break;
2002 default:
2003 err = -EINVAL;
2004 }
2005 rtnl_unlock();
2006
2007 return err;
2008 }
2009
2010 return -EINVAL;
2011 }
2012
2013 /*
2014 * Drop the packet on the floor
2015 */
2016
2017 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2018 {
2019 int type;
2020 struct dst_entry *dst = skb_dst(skb);
2021 switch (ipstats_mib_noroutes) {
2022 case IPSTATS_MIB_INNOROUTES:
2023 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2024 if (type == IPV6_ADDR_ANY) {
2025 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2026 IPSTATS_MIB_INADDRERRORS);
2027 break;
2028 }
2029 /* FALLTHROUGH */
2030 case IPSTATS_MIB_OUTNOROUTES:
2031 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032 ipstats_mib_noroutes);
2033 break;
2034 }
2035 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2036 kfree_skb(skb);
2037 return 0;
2038 }
2039
2040 static int ip6_pkt_discard(struct sk_buff *skb)
2041 {
2042 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2043 }
2044
2045 static int ip6_pkt_discard_out(struct sk_buff *skb)
2046 {
2047 skb->dev = skb_dst(skb)->dev;
2048 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2049 }
2050
2051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2052
2053 static int ip6_pkt_prohibit(struct sk_buff *skb)
2054 {
2055 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2056 }
2057
2058 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2059 {
2060 skb->dev = skb_dst(skb)->dev;
2061 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2062 }
2063
2064 #endif
2065
2066 /*
2067 * Allocate a dst for local (unicast / anycast) address.
2068 */
2069
2070 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2071 const struct in6_addr *addr,
2072 bool anycast)
2073 {
2074 struct net *net = dev_net(idev->dev);
2075 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2076
2077 if (!rt) {
2078 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2079 return ERR_PTR(-ENOMEM);
2080 }
2081
2082 in6_dev_hold(idev);
2083
2084 rt->dst.flags |= DST_HOST;
2085 rt->dst.input = ip6_input;
2086 rt->dst.output = ip6_output;
2087 rt->rt6i_idev = idev;
2088
2089 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2090 if (anycast)
2091 rt->rt6i_flags |= RTF_ANYCAST;
2092 else
2093 rt->rt6i_flags |= RTF_LOCAL;
2094
2095 rt->rt6i_dst.addr = *addr;
2096 rt->rt6i_dst.plen = 128;
2097 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2098
2099 atomic_set(&rt->dst.__refcnt, 1);
2100
2101 return rt;
2102 }
2103
2104 int ip6_route_get_saddr(struct net *net,
2105 struct rt6_info *rt,
2106 const struct in6_addr *daddr,
2107 unsigned int prefs,
2108 struct in6_addr *saddr)
2109 {
2110 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2111 int err = 0;
2112 if (rt->rt6i_prefsrc.plen)
2113 *saddr = rt->rt6i_prefsrc.addr;
2114 else
2115 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2116 daddr, prefs, saddr);
2117 return err;
2118 }
2119
2120 /* remove deleted ip from prefsrc entries */
2121 struct arg_dev_net_ip {
2122 struct net_device *dev;
2123 struct net *net;
2124 struct in6_addr *addr;
2125 };
2126
2127 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2128 {
2129 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2130 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2131 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2132
2133 if (((void *)rt->dst.dev == dev || !dev) &&
2134 rt != net->ipv6.ip6_null_entry &&
2135 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2136 /* remove prefsrc entry */
2137 rt->rt6i_prefsrc.plen = 0;
2138 }
2139 return 0;
2140 }
2141
2142 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2143 {
2144 struct net *net = dev_net(ifp->idev->dev);
2145 struct arg_dev_net_ip adni = {
2146 .dev = ifp->idev->dev,
2147 .net = net,
2148 .addr = &ifp->addr,
2149 };
2150 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2151 }
2152
2153 struct arg_dev_net {
2154 struct net_device *dev;
2155 struct net *net;
2156 };
2157
2158 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2159 {
2160 const struct arg_dev_net *adn = arg;
2161 const struct net_device *dev = adn->dev;
2162
2163 if ((rt->dst.dev == dev || !dev) &&
2164 rt != adn->net->ipv6.ip6_null_entry)
2165 return -1;
2166
2167 return 0;
2168 }
2169
2170 void rt6_ifdown(struct net *net, struct net_device *dev)
2171 {
2172 struct arg_dev_net adn = {
2173 .dev = dev,
2174 .net = net,
2175 };
2176
2177 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2178 icmp6_clean_all(fib6_ifdown, &adn);
2179 }
2180
2181 struct rt6_mtu_change_arg {
2182 struct net_device *dev;
2183 unsigned int mtu;
2184 };
2185
2186 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2187 {
2188 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2189 struct inet6_dev *idev;
2190
2191 /* In IPv6 pmtu discovery is not optional,
2192 so that RTAX_MTU lock cannot disable it.
2193 We still use this lock to block changes
2194 caused by addrconf/ndisc.
2195 */
2196
2197 idev = __in6_dev_get(arg->dev);
2198 if (!idev)
2199 return 0;
2200
2201 /* For administrative MTU increase, there is no way to discover
2202 IPv6 PMTU increase, so PMTU increase should be updated here.
2203 Since RFC 1981 doesn't include administrative MTU increase
2204 update PMTU increase is a MUST. (i.e. jumbo frame)
2205 */
2206 /*
2207 If new MTU is less than route PMTU, this new MTU will be the
2208 lowest MTU in the path, update the route PMTU to reflect PMTU
2209 decreases; if new MTU is greater than route PMTU, and the
2210 old MTU is the lowest MTU in the path, update the route PMTU
2211 to reflect the increase. In this case if the other nodes' MTU
2212 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2213 PMTU discouvery.
2214 */
2215 if (rt->dst.dev == arg->dev &&
2216 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2217 (dst_mtu(&rt->dst) >= arg->mtu ||
2218 (dst_mtu(&rt->dst) < arg->mtu &&
2219 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2220 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2221 }
2222 return 0;
2223 }
2224
2225 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2226 {
2227 struct rt6_mtu_change_arg arg = {
2228 .dev = dev,
2229 .mtu = mtu,
2230 };
2231
2232 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2233 }
2234
2235 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2236 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2237 [RTA_OIF] = { .type = NLA_U32 },
2238 [RTA_IIF] = { .type = NLA_U32 },
2239 [RTA_PRIORITY] = { .type = NLA_U32 },
2240 [RTA_METRICS] = { .type = NLA_NESTED },
2241 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2242 };
2243
2244 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2245 struct fib6_config *cfg)
2246 {
2247 struct rtmsg *rtm;
2248 struct nlattr *tb[RTA_MAX+1];
2249 int err;
2250
2251 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2252 if (err < 0)
2253 goto errout;
2254
2255 err = -EINVAL;
2256 rtm = nlmsg_data(nlh);
2257 memset(cfg, 0, sizeof(*cfg));
2258
2259 cfg->fc_table = rtm->rtm_table;
2260 cfg->fc_dst_len = rtm->rtm_dst_len;
2261 cfg->fc_src_len = rtm->rtm_src_len;
2262 cfg->fc_flags = RTF_UP;
2263 cfg->fc_protocol = rtm->rtm_protocol;
2264 cfg->fc_type = rtm->rtm_type;
2265
2266 if (rtm->rtm_type == RTN_UNREACHABLE ||
2267 rtm->rtm_type == RTN_BLACKHOLE ||
2268 rtm->rtm_type == RTN_PROHIBIT ||
2269 rtm->rtm_type == RTN_THROW)
2270 cfg->fc_flags |= RTF_REJECT;
2271
2272 if (rtm->rtm_type == RTN_LOCAL)
2273 cfg->fc_flags |= RTF_LOCAL;
2274
2275 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2276 cfg->fc_nlinfo.nlh = nlh;
2277 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2278
2279 if (tb[RTA_GATEWAY]) {
2280 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2281 cfg->fc_flags |= RTF_GATEWAY;
2282 }
2283
2284 if (tb[RTA_DST]) {
2285 int plen = (rtm->rtm_dst_len + 7) >> 3;
2286
2287 if (nla_len(tb[RTA_DST]) < plen)
2288 goto errout;
2289
2290 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2291 }
2292
2293 if (tb[RTA_SRC]) {
2294 int plen = (rtm->rtm_src_len + 7) >> 3;
2295
2296 if (nla_len(tb[RTA_SRC]) < plen)
2297 goto errout;
2298
2299 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2300 }
2301
2302 if (tb[RTA_PREFSRC])
2303 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2304
2305 if (tb[RTA_OIF])
2306 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2307
2308 if (tb[RTA_PRIORITY])
2309 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2310
2311 if (tb[RTA_METRICS]) {
2312 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2313 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2314 }
2315
2316 if (tb[RTA_TABLE])
2317 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2318
2319 if (tb[RTA_MULTIPATH]) {
2320 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2321 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2322 }
2323
2324 err = 0;
2325 errout:
2326 return err;
2327 }
2328
2329 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2330 {
2331 struct fib6_config r_cfg;
2332 struct rtnexthop *rtnh;
2333 int remaining;
2334 int attrlen;
2335 int err = 0, last_err = 0;
2336
2337 beginning:
2338 rtnh = (struct rtnexthop *)cfg->fc_mp;
2339 remaining = cfg->fc_mp_len;
2340
2341 /* Parse a Multipath Entry */
2342 while (rtnh_ok(rtnh, remaining)) {
2343 memcpy(&r_cfg, cfg, sizeof(*cfg));
2344 if (rtnh->rtnh_ifindex)
2345 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2346
2347 attrlen = rtnh_attrlen(rtnh);
2348 if (attrlen > 0) {
2349 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2350
2351 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2352 if (nla) {
2353 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2354 r_cfg.fc_flags |= RTF_GATEWAY;
2355 }
2356 }
2357 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2358 if (err) {
2359 last_err = err;
2360 /* If we are trying to remove a route, do not stop the
2361 * loop when ip6_route_del() fails (because next hop is
2362 * already gone), we should try to remove all next hops.
2363 */
2364 if (add) {
2365 /* If add fails, we should try to delete all
2366 * next hops that have been already added.
2367 */
2368 add = 0;
2369 goto beginning;
2370 }
2371 }
2372 /* Because each route is added like a single route we remove
2373 * this flag after the first nexthop (if there is a collision,
2374 * we have already fail to add the first nexthop:
2375 * fib6_add_rt2node() has reject it).
2376 */
2377 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2378 rtnh = rtnh_next(rtnh, &remaining);
2379 }
2380
2381 return last_err;
2382 }
2383
2384 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2385 {
2386 struct fib6_config cfg;
2387 int err;
2388
2389 err = rtm_to_fib6_config(skb, nlh, &cfg);
2390 if (err < 0)
2391 return err;
2392
2393 if (cfg.fc_mp)
2394 return ip6_route_multipath(&cfg, 0);
2395 else
2396 return ip6_route_del(&cfg);
2397 }
2398
2399 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2400 {
2401 struct fib6_config cfg;
2402 int err;
2403
2404 err = rtm_to_fib6_config(skb, nlh, &cfg);
2405 if (err < 0)
2406 return err;
2407
2408 if (cfg.fc_mp)
2409 return ip6_route_multipath(&cfg, 1);
2410 else
2411 return ip6_route_add(&cfg);
2412 }
2413
2414 static inline size_t rt6_nlmsg_size(void)
2415 {
2416 return NLMSG_ALIGN(sizeof(struct rtmsg))
2417 + nla_total_size(16) /* RTA_SRC */
2418 + nla_total_size(16) /* RTA_DST */
2419 + nla_total_size(16) /* RTA_GATEWAY */
2420 + nla_total_size(16) /* RTA_PREFSRC */
2421 + nla_total_size(4) /* RTA_TABLE */
2422 + nla_total_size(4) /* RTA_IIF */
2423 + nla_total_size(4) /* RTA_OIF */
2424 + nla_total_size(4) /* RTA_PRIORITY */
2425 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2426 + nla_total_size(sizeof(struct rta_cacheinfo));
2427 }
2428
2429 static int rt6_fill_node(struct net *net,
2430 struct sk_buff *skb, struct rt6_info *rt,
2431 struct in6_addr *dst, struct in6_addr *src,
2432 int iif, int type, u32 portid, u32 seq,
2433 int prefix, int nowait, unsigned int flags)
2434 {
2435 struct rtmsg *rtm;
2436 struct nlmsghdr *nlh;
2437 long expires;
2438 u32 table;
2439
2440 if (prefix) { /* user wants prefix routes only */
2441 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2442 /* success since this is not a prefix route */
2443 return 1;
2444 }
2445 }
2446
2447 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2448 if (!nlh)
2449 return -EMSGSIZE;
2450
2451 rtm = nlmsg_data(nlh);
2452 rtm->rtm_family = AF_INET6;
2453 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2454 rtm->rtm_src_len = rt->rt6i_src.plen;
2455 rtm->rtm_tos = 0;
2456 if (rt->rt6i_table)
2457 table = rt->rt6i_table->tb6_id;
2458 else
2459 table = RT6_TABLE_UNSPEC;
2460 rtm->rtm_table = table;
2461 if (nla_put_u32(skb, RTA_TABLE, table))
2462 goto nla_put_failure;
2463 if (rt->rt6i_flags & RTF_REJECT) {
2464 switch (rt->dst.error) {
2465 case -EINVAL:
2466 rtm->rtm_type = RTN_BLACKHOLE;
2467 break;
2468 case -EACCES:
2469 rtm->rtm_type = RTN_PROHIBIT;
2470 break;
2471 case -EAGAIN:
2472 rtm->rtm_type = RTN_THROW;
2473 break;
2474 default:
2475 rtm->rtm_type = RTN_UNREACHABLE;
2476 break;
2477 }
2478 }
2479 else if (rt->rt6i_flags & RTF_LOCAL)
2480 rtm->rtm_type = RTN_LOCAL;
2481 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2482 rtm->rtm_type = RTN_LOCAL;
2483 else
2484 rtm->rtm_type = RTN_UNICAST;
2485 rtm->rtm_flags = 0;
2486 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2487 rtm->rtm_protocol = rt->rt6i_protocol;
2488 if (rt->rt6i_flags & RTF_DYNAMIC)
2489 rtm->rtm_protocol = RTPROT_REDIRECT;
2490 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2491 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2492 rtm->rtm_protocol = RTPROT_RA;
2493 else
2494 rtm->rtm_protocol = RTPROT_KERNEL;
2495 }
2496
2497 if (rt->rt6i_flags & RTF_CACHE)
2498 rtm->rtm_flags |= RTM_F_CLONED;
2499
2500 if (dst) {
2501 if (nla_put(skb, RTA_DST, 16, dst))
2502 goto nla_put_failure;
2503 rtm->rtm_dst_len = 128;
2504 } else if (rtm->rtm_dst_len)
2505 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2506 goto nla_put_failure;
2507 #ifdef CONFIG_IPV6_SUBTREES
2508 if (src) {
2509 if (nla_put(skb, RTA_SRC, 16, src))
2510 goto nla_put_failure;
2511 rtm->rtm_src_len = 128;
2512 } else if (rtm->rtm_src_len &&
2513 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2514 goto nla_put_failure;
2515 #endif
2516 if (iif) {
2517 #ifdef CONFIG_IPV6_MROUTE
2518 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2519 int err = ip6mr_get_route(net, skb, rtm, nowait);
2520 if (err <= 0) {
2521 if (!nowait) {
2522 if (err == 0)
2523 return 0;
2524 goto nla_put_failure;
2525 } else {
2526 if (err == -EMSGSIZE)
2527 goto nla_put_failure;
2528 }
2529 }
2530 } else
2531 #endif
2532 if (nla_put_u32(skb, RTA_IIF, iif))
2533 goto nla_put_failure;
2534 } else if (dst) {
2535 struct in6_addr saddr_buf;
2536 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2537 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2538 goto nla_put_failure;
2539 }
2540
2541 if (rt->rt6i_prefsrc.plen) {
2542 struct in6_addr saddr_buf;
2543 saddr_buf = rt->rt6i_prefsrc.addr;
2544 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2545 goto nla_put_failure;
2546 }
2547
2548 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2549 goto nla_put_failure;
2550
2551 if (rt->rt6i_flags & RTF_GATEWAY) {
2552 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2553 goto nla_put_failure;
2554 }
2555
2556 if (rt->dst.dev &&
2557 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2558 goto nla_put_failure;
2559 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2560 goto nla_put_failure;
2561
2562 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2563
2564 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2565 goto nla_put_failure;
2566
2567 return nlmsg_end(skb, nlh);
2568
2569 nla_put_failure:
2570 nlmsg_cancel(skb, nlh);
2571 return -EMSGSIZE;
2572 }
2573
2574 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2575 {
2576 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2577 int prefix;
2578
2579 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2580 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2581 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2582 } else
2583 prefix = 0;
2584
2585 return rt6_fill_node(arg->net,
2586 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2587 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2588 prefix, 0, NLM_F_MULTI);
2589 }
2590
2591 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2592 {
2593 struct net *net = sock_net(in_skb->sk);
2594 struct nlattr *tb[RTA_MAX+1];
2595 struct rt6_info *rt;
2596 struct sk_buff *skb;
2597 struct rtmsg *rtm;
2598 struct flowi6 fl6;
2599 int err, iif = 0, oif = 0;
2600
2601 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2602 if (err < 0)
2603 goto errout;
2604
2605 err = -EINVAL;
2606 memset(&fl6, 0, sizeof(fl6));
2607
2608 if (tb[RTA_SRC]) {
2609 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2610 goto errout;
2611
2612 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2613 }
2614
2615 if (tb[RTA_DST]) {
2616 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2617 goto errout;
2618
2619 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2620 }
2621
2622 if (tb[RTA_IIF])
2623 iif = nla_get_u32(tb[RTA_IIF]);
2624
2625 if (tb[RTA_OIF])
2626 oif = nla_get_u32(tb[RTA_OIF]);
2627
2628 if (iif) {
2629 struct net_device *dev;
2630 int flags = 0;
2631
2632 dev = __dev_get_by_index(net, iif);
2633 if (!dev) {
2634 err = -ENODEV;
2635 goto errout;
2636 }
2637
2638 fl6.flowi6_iif = iif;
2639
2640 if (!ipv6_addr_any(&fl6.saddr))
2641 flags |= RT6_LOOKUP_F_HAS_SADDR;
2642
2643 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2644 flags);
2645 } else {
2646 fl6.flowi6_oif = oif;
2647
2648 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2649 }
2650
2651 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2652 if (!skb) {
2653 ip6_rt_put(rt);
2654 err = -ENOBUFS;
2655 goto errout;
2656 }
2657
2658 /* Reserve room for dummy headers, this skb can pass
2659 through good chunk of routing engine.
2660 */
2661 skb_reset_mac_header(skb);
2662 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2663
2664 skb_dst_set(skb, &rt->dst);
2665
2666 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2667 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2668 nlh->nlmsg_seq, 0, 0, 0);
2669 if (err < 0) {
2670 kfree_skb(skb);
2671 goto errout;
2672 }
2673
2674 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2675 errout:
2676 return err;
2677 }
2678
2679 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2680 {
2681 struct sk_buff *skb;
2682 struct net *net = info->nl_net;
2683 u32 seq;
2684 int err;
2685
2686 err = -ENOBUFS;
2687 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2688
2689 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2690 if (!skb)
2691 goto errout;
2692
2693 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2694 event, info->portid, seq, 0, 0, 0);
2695 if (err < 0) {
2696 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2697 WARN_ON(err == -EMSGSIZE);
2698 kfree_skb(skb);
2699 goto errout;
2700 }
2701 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2702 info->nlh, gfp_any());
2703 return;
2704 errout:
2705 if (err < 0)
2706 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2707 }
2708
2709 static int ip6_route_dev_notify(struct notifier_block *this,
2710 unsigned long event, void *ptr)
2711 {
2712 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2713 struct net *net = dev_net(dev);
2714
2715 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2716 net->ipv6.ip6_null_entry->dst.dev = dev;
2717 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2718 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2719 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2720 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2721 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2722 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2723 #endif
2724 }
2725
2726 return NOTIFY_OK;
2727 }
2728
2729 /*
2730 * /proc
2731 */
2732
2733 #ifdef CONFIG_PROC_FS
2734
2735 struct rt6_proc_arg
2736 {
2737 char *buffer;
2738 int offset;
2739 int length;
2740 int skip;
2741 int len;
2742 };
2743
2744 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2745 {
2746 struct seq_file *m = p_arg;
2747
2748 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2749
2750 #ifdef CONFIG_IPV6_SUBTREES
2751 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2752 #else
2753 seq_puts(m, "00000000000000000000000000000000 00 ");
2754 #endif
2755 if (rt->rt6i_flags & RTF_GATEWAY) {
2756 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2757 } else {
2758 seq_puts(m, "00000000000000000000000000000000");
2759 }
2760 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2761 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2762 rt->dst.__use, rt->rt6i_flags,
2763 rt->dst.dev ? rt->dst.dev->name : "");
2764 return 0;
2765 }
2766
2767 static int ipv6_route_show(struct seq_file *m, void *v)
2768 {
2769 struct net *net = (struct net *)m->private;
2770 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2771 return 0;
2772 }
2773
2774 static int ipv6_route_open(struct inode *inode, struct file *file)
2775 {
2776 return single_open_net(inode, file, ipv6_route_show);
2777 }
2778
2779 static const struct file_operations ipv6_route_proc_fops = {
2780 .owner = THIS_MODULE,
2781 .open = ipv6_route_open,
2782 .read = seq_read,
2783 .llseek = seq_lseek,
2784 .release = single_release_net,
2785 };
2786
2787 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2788 {
2789 struct net *net = (struct net *)seq->private;
2790 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2791 net->ipv6.rt6_stats->fib_nodes,
2792 net->ipv6.rt6_stats->fib_route_nodes,
2793 net->ipv6.rt6_stats->fib_rt_alloc,
2794 net->ipv6.rt6_stats->fib_rt_entries,
2795 net->ipv6.rt6_stats->fib_rt_cache,
2796 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2797 net->ipv6.rt6_stats->fib_discarded_routes);
2798
2799 return 0;
2800 }
2801
2802 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2803 {
2804 return single_open_net(inode, file, rt6_stats_seq_show);
2805 }
2806
2807 static const struct file_operations rt6_stats_seq_fops = {
2808 .owner = THIS_MODULE,
2809 .open = rt6_stats_seq_open,
2810 .read = seq_read,
2811 .llseek = seq_lseek,
2812 .release = single_release_net,
2813 };
2814 #endif /* CONFIG_PROC_FS */
2815
2816 #ifdef CONFIG_SYSCTL
2817
2818 static
2819 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2820 void __user *buffer, size_t *lenp, loff_t *ppos)
2821 {
2822 struct net *net;
2823 int delay;
2824 if (!write)
2825 return -EINVAL;
2826
2827 net = (struct net *)ctl->extra1;
2828 delay = net->ipv6.sysctl.flush_delay;
2829 proc_dointvec(ctl, write, buffer, lenp, ppos);
2830 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2831 return 0;
2832 }
2833
2834 struct ctl_table ipv6_route_table_template[] = {
2835 {
2836 .procname = "flush",
2837 .data = &init_net.ipv6.sysctl.flush_delay,
2838 .maxlen = sizeof(int),
2839 .mode = 0200,
2840 .proc_handler = ipv6_sysctl_rtcache_flush
2841 },
2842 {
2843 .procname = "gc_thresh",
2844 .data = &ip6_dst_ops_template.gc_thresh,
2845 .maxlen = sizeof(int),
2846 .mode = 0644,
2847 .proc_handler = proc_dointvec,
2848 },
2849 {
2850 .procname = "max_size",
2851 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
2854 .proc_handler = proc_dointvec,
2855 },
2856 {
2857 .procname = "gc_min_interval",
2858 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2859 .maxlen = sizeof(int),
2860 .mode = 0644,
2861 .proc_handler = proc_dointvec_jiffies,
2862 },
2863 {
2864 .procname = "gc_timeout",
2865 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2866 .maxlen = sizeof(int),
2867 .mode = 0644,
2868 .proc_handler = proc_dointvec_jiffies,
2869 },
2870 {
2871 .procname = "gc_interval",
2872 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2873 .maxlen = sizeof(int),
2874 .mode = 0644,
2875 .proc_handler = proc_dointvec_jiffies,
2876 },
2877 {
2878 .procname = "gc_elasticity",
2879 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2880 .maxlen = sizeof(int),
2881 .mode = 0644,
2882 .proc_handler = proc_dointvec,
2883 },
2884 {
2885 .procname = "mtu_expires",
2886 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2887 .maxlen = sizeof(int),
2888 .mode = 0644,
2889 .proc_handler = proc_dointvec_jiffies,
2890 },
2891 {
2892 .procname = "min_adv_mss",
2893 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2894 .maxlen = sizeof(int),
2895 .mode = 0644,
2896 .proc_handler = proc_dointvec,
2897 },
2898 {
2899 .procname = "gc_min_interval_ms",
2900 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2901 .maxlen = sizeof(int),
2902 .mode = 0644,
2903 .proc_handler = proc_dointvec_ms_jiffies,
2904 },
2905 { }
2906 };
2907
2908 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2909 {
2910 struct ctl_table *table;
2911
2912 table = kmemdup(ipv6_route_table_template,
2913 sizeof(ipv6_route_table_template),
2914 GFP_KERNEL);
2915
2916 if (table) {
2917 table[0].data = &net->ipv6.sysctl.flush_delay;
2918 table[0].extra1 = net;
2919 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2920 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2921 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2922 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2923 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2924 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2925 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2926 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2927 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2928
2929 /* Don't export sysctls to unprivileged users */
2930 if (net->user_ns != &init_user_ns)
2931 table[0].procname = NULL;
2932 }
2933
2934 return table;
2935 }
2936 #endif
2937
2938 static int __net_init ip6_route_net_init(struct net *net)
2939 {
2940 int ret = -ENOMEM;
2941
2942 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2943 sizeof(net->ipv6.ip6_dst_ops));
2944
2945 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2946 goto out_ip6_dst_ops;
2947
2948 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2949 sizeof(*net->ipv6.ip6_null_entry),
2950 GFP_KERNEL);
2951 if (!net->ipv6.ip6_null_entry)
2952 goto out_ip6_dst_entries;
2953 net->ipv6.ip6_null_entry->dst.path =
2954 (struct dst_entry *)net->ipv6.ip6_null_entry;
2955 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2956 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2957 ip6_template_metrics, true);
2958
2959 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2960 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2961 sizeof(*net->ipv6.ip6_prohibit_entry),
2962 GFP_KERNEL);
2963 if (!net->ipv6.ip6_prohibit_entry)
2964 goto out_ip6_null_entry;
2965 net->ipv6.ip6_prohibit_entry->dst.path =
2966 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2967 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2968 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2969 ip6_template_metrics, true);
2970
2971 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2972 sizeof(*net->ipv6.ip6_blk_hole_entry),
2973 GFP_KERNEL);
2974 if (!net->ipv6.ip6_blk_hole_entry)
2975 goto out_ip6_prohibit_entry;
2976 net->ipv6.ip6_blk_hole_entry->dst.path =
2977 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2978 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2979 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2980 ip6_template_metrics, true);
2981 #endif
2982
2983 net->ipv6.sysctl.flush_delay = 0;
2984 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2985 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2986 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2987 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2988 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2989 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2990 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2991
2992 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2993
2994 ret = 0;
2995 out:
2996 return ret;
2997
2998 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2999 out_ip6_prohibit_entry:
3000 kfree(net->ipv6.ip6_prohibit_entry);
3001 out_ip6_null_entry:
3002 kfree(net->ipv6.ip6_null_entry);
3003 #endif
3004 out_ip6_dst_entries:
3005 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3006 out_ip6_dst_ops:
3007 goto out;
3008 }
3009
3010 static void __net_exit ip6_route_net_exit(struct net *net)
3011 {
3012 kfree(net->ipv6.ip6_null_entry);
3013 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3014 kfree(net->ipv6.ip6_prohibit_entry);
3015 kfree(net->ipv6.ip6_blk_hole_entry);
3016 #endif
3017 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3018 }
3019
3020 static int __net_init ip6_route_net_init_late(struct net *net)
3021 {
3022 #ifdef CONFIG_PROC_FS
3023 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3024 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3025 #endif
3026 return 0;
3027 }
3028
3029 static void __net_exit ip6_route_net_exit_late(struct net *net)
3030 {
3031 #ifdef CONFIG_PROC_FS
3032 remove_proc_entry("ipv6_route", net->proc_net);
3033 remove_proc_entry("rt6_stats", net->proc_net);
3034 #endif
3035 }
3036
3037 static struct pernet_operations ip6_route_net_ops = {
3038 .init = ip6_route_net_init,
3039 .exit = ip6_route_net_exit,
3040 };
3041
3042 static int __net_init ipv6_inetpeer_init(struct net *net)
3043 {
3044 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3045
3046 if (!bp)
3047 return -ENOMEM;
3048 inet_peer_base_init(bp);
3049 net->ipv6.peers = bp;
3050 return 0;
3051 }
3052
3053 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3054 {
3055 struct inet_peer_base *bp = net->ipv6.peers;
3056
3057 net->ipv6.peers = NULL;
3058 inetpeer_invalidate_tree(bp);
3059 kfree(bp);
3060 }
3061
3062 static struct pernet_operations ipv6_inetpeer_ops = {
3063 .init = ipv6_inetpeer_init,
3064 .exit = ipv6_inetpeer_exit,
3065 };
3066
3067 static struct pernet_operations ip6_route_net_late_ops = {
3068 .init = ip6_route_net_init_late,
3069 .exit = ip6_route_net_exit_late,
3070 };
3071
3072 static struct notifier_block ip6_route_dev_notifier = {
3073 .notifier_call = ip6_route_dev_notify,
3074 .priority = 0,
3075 };
3076
3077 int __init ip6_route_init(void)
3078 {
3079 int ret;
3080
3081 ret = -ENOMEM;
3082 ip6_dst_ops_template.kmem_cachep =
3083 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3084 SLAB_HWCACHE_ALIGN, NULL);
3085 if (!ip6_dst_ops_template.kmem_cachep)
3086 goto out;
3087
3088 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3089 if (ret)
3090 goto out_kmem_cache;
3091
3092 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3093 if (ret)
3094 goto out_dst_entries;
3095
3096 ret = register_pernet_subsys(&ip6_route_net_ops);
3097 if (ret)
3098 goto out_register_inetpeer;
3099
3100 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3101
3102 /* Registering of the loopback is done before this portion of code,
3103 * the loopback reference in rt6_info will not be taken, do it
3104 * manually for init_net */
3105 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3106 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3107 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3108 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3109 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3110 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3111 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3112 #endif
3113 ret = fib6_init();
3114 if (ret)
3115 goto out_register_subsys;
3116
3117 ret = xfrm6_init();
3118 if (ret)
3119 goto out_fib6_init;
3120
3121 ret = fib6_rules_init();
3122 if (ret)
3123 goto xfrm6_init;
3124
3125 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3126 if (ret)
3127 goto fib6_rules_init;
3128
3129 ret = -ENOBUFS;
3130 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3131 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3132 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3133 goto out_register_late_subsys;
3134
3135 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3136 if (ret)
3137 goto out_register_late_subsys;
3138
3139 out:
3140 return ret;
3141
3142 out_register_late_subsys:
3143 unregister_pernet_subsys(&ip6_route_net_late_ops);
3144 fib6_rules_init:
3145 fib6_rules_cleanup();
3146 xfrm6_init:
3147 xfrm6_fini();
3148 out_fib6_init:
3149 fib6_gc_cleanup();
3150 out_register_subsys:
3151 unregister_pernet_subsys(&ip6_route_net_ops);
3152 out_register_inetpeer:
3153 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3154 out_dst_entries:
3155 dst_entries_destroy(&ip6_dst_blackhole_ops);
3156 out_kmem_cache:
3157 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3158 goto out;
3159 }
3160
3161 void ip6_route_cleanup(void)
3162 {
3163 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3164 unregister_pernet_subsys(&ip6_route_net_late_ops);
3165 fib6_rules_cleanup();
3166 xfrm6_fini();
3167 fib6_gc_cleanup();
3168 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3169 unregister_pernet_subsys(&ip6_route_net_ops);
3170 dst_entries_destroy(&ip6_dst_blackhole_ops);
3171 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3172 }
This page took 0.096318 seconds and 5 git commands to generate.