515bb51e05a8b2f7eb5882fd2ebdf3a73fcec61e
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void ip6_dst_destroy(struct dst_entry *);
75 static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
77 static int ip6_dst_gc(struct dst_ops *ops);
78
79 static int ip6_pkt_discard(struct sk_buff *skb);
80 static int ip6_pkt_discard_out(struct sk_buff *skb);
81 static void ip6_link_failure(struct sk_buff *skb);
82 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
91 unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
101 u32 *p = NULL;
102
103 if (!(rt->dst.flags & DST_HOST))
104 return NULL;
105
106 peer = rt6_get_peer_create(rt);
107 if (peer) {
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
110
111 p = peer->metrics;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
117
118 if (prev != old) {
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
121 p = NULL;
122 }
123 }
124 return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct sk_buff *skb,
129 const void *daddr)
130 {
131 struct in6_addr *p = &rt->rt6i_gateway;
132
133 if (!ipv6_addr_any(p))
134 return (const void *) p;
135 else if (skb)
136 return &ipv6_hdr(skb)->daddr;
137 return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct sk_buff *skb,
142 const void *daddr)
143 {
144 struct rt6_info *rt = (struct rt6_info *) dst;
145 struct neighbour *n;
146
147 daddr = choose_neigh_daddr(rt, skb, daddr);
148 n = __ipv6_neigh_lookup(dst->dev, daddr);
149 if (n)
150 return n;
151 return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static struct dst_ops ip6_dst_ops_template = {
155 .family = AF_INET6,
156 .protocol = cpu_to_be16(ETH_P_IPV6),
157 .gc = ip6_dst_gc,
158 .gc_thresh = 1024,
159 .check = ip6_dst_check,
160 .default_advmss = ip6_default_advmss,
161 .mtu = ip6_mtu,
162 .cow_metrics = ipv6_cow_metrics,
163 .destroy = ip6_dst_destroy,
164 .ifdown = ip6_dst_ifdown,
165 .negative_advice = ip6_negative_advice,
166 .link_failure = ip6_link_failure,
167 .update_pmtu = ip6_rt_update_pmtu,
168 .redirect = rt6_do_redirect,
169 .local_out = __ip6_local_out,
170 .neigh_lookup = ip6_neigh_lookup,
171 };
172
173 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
174 {
175 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
176
177 return mtu ? : dst->dev->mtu;
178 }
179
180 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
181 struct sk_buff *skb, u32 mtu)
182 {
183 }
184
185 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
186 struct sk_buff *skb)
187 {
188 }
189
190 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
191 unsigned long old)
192 {
193 return NULL;
194 }
195
196 static struct dst_ops ip6_dst_blackhole_ops = {
197 .family = AF_INET6,
198 .protocol = cpu_to_be16(ETH_P_IPV6),
199 .destroy = ip6_dst_destroy,
200 .check = ip6_dst_check,
201 .mtu = ip6_blackhole_mtu,
202 .default_advmss = ip6_default_advmss,
203 .update_pmtu = ip6_rt_blackhole_update_pmtu,
204 .redirect = ip6_rt_blackhole_redirect,
205 .cow_metrics = ip6_rt_blackhole_cow_metrics,
206 .neigh_lookup = ip6_neigh_lookup,
207 };
208
209 static const u32 ip6_template_metrics[RTAX_MAX] = {
210 [RTAX_HOPLIMIT - 1] = 0,
211 };
212
213 static const struct rt6_info ip6_null_entry_template = {
214 .dst = {
215 .__refcnt = ATOMIC_INIT(1),
216 .__use = 1,
217 .obsolete = DST_OBSOLETE_FORCE_CHK,
218 .error = -ENETUNREACH,
219 .input = ip6_pkt_discard,
220 .output = ip6_pkt_discard_out,
221 },
222 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
223 .rt6i_protocol = RTPROT_KERNEL,
224 .rt6i_metric = ~(u32) 0,
225 .rt6i_ref = ATOMIC_INIT(1),
226 };
227
228 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
229
230 static int ip6_pkt_prohibit(struct sk_buff *skb);
231 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
232
233 static const struct rt6_info ip6_prohibit_entry_template = {
234 .dst = {
235 .__refcnt = ATOMIC_INIT(1),
236 .__use = 1,
237 .obsolete = DST_OBSOLETE_FORCE_CHK,
238 .error = -EACCES,
239 .input = ip6_pkt_prohibit,
240 .output = ip6_pkt_prohibit_out,
241 },
242 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
243 .rt6i_protocol = RTPROT_KERNEL,
244 .rt6i_metric = ~(u32) 0,
245 .rt6i_ref = ATOMIC_INIT(1),
246 };
247
248 static const struct rt6_info ip6_blk_hole_entry_template = {
249 .dst = {
250 .__refcnt = ATOMIC_INIT(1),
251 .__use = 1,
252 .obsolete = DST_OBSOLETE_FORCE_CHK,
253 .error = -EINVAL,
254 .input = dst_discard,
255 .output = dst_discard,
256 },
257 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
258 .rt6i_protocol = RTPROT_KERNEL,
259 .rt6i_metric = ~(u32) 0,
260 .rt6i_ref = ATOMIC_INIT(1),
261 };
262
263 #endif
264
265 /* allocate dst with ip6_dst_ops */
266 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
267 struct net_device *dev,
268 int flags,
269 struct fib6_table *table)
270 {
271 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
272 0, DST_OBSOLETE_FORCE_CHK, flags);
273
274 if (rt) {
275 struct dst_entry *dst = &rt->dst;
276
277 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279 rt->rt6i_genid = rt_genid(net);
280 INIT_LIST_HEAD(&rt->rt6i_siblings);
281 rt->rt6i_nsiblings = 0;
282 }
283 return rt;
284 }
285
286 static void ip6_dst_destroy(struct dst_entry *dst)
287 {
288 struct rt6_info *rt = (struct rt6_info *)dst;
289 struct inet6_dev *idev = rt->rt6i_idev;
290
291 if (!(rt->dst.flags & DST_HOST))
292 dst_destroy_metrics_generic(dst);
293
294 if (idev) {
295 rt->rt6i_idev = NULL;
296 in6_dev_put(idev);
297 }
298
299 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
300 dst_release(dst->from);
301
302 if (rt6_has_peer(rt)) {
303 struct inet_peer *peer = rt6_peer_ptr(rt);
304 inet_putpeer(peer);
305 }
306 }
307
308 void rt6_bind_peer(struct rt6_info *rt, int create)
309 {
310 struct inet_peer_base *base;
311 struct inet_peer *peer;
312
313 base = inetpeer_base_ptr(rt->_rt6i_peer);
314 if (!base)
315 return;
316
317 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
318 if (peer) {
319 if (!rt6_set_peer(rt, peer))
320 inet_putpeer(peer);
321 }
322 }
323
324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 int how)
326 {
327 struct rt6_info *rt = (struct rt6_info *)dst;
328 struct inet6_dev *idev = rt->rt6i_idev;
329 struct net_device *loopback_dev =
330 dev_net(dev)->loopback_dev;
331
332 if (dev != loopback_dev) {
333 if (idev && idev->dev == dev) {
334 struct inet6_dev *loopback_idev =
335 in6_dev_get(loopback_dev);
336 if (loopback_idev) {
337 rt->rt6i_idev = loopback_idev;
338 in6_dev_put(idev);
339 }
340 }
341 }
342 }
343
344 static bool rt6_check_expired(const struct rt6_info *rt)
345 {
346 if (rt->rt6i_flags & RTF_EXPIRES) {
347 if (time_after(jiffies, rt->dst.expires))
348 return true;
349 } else if (rt->dst.from) {
350 return rt6_check_expired((struct rt6_info *) rt->dst.from);
351 }
352 return false;
353 }
354
355 static bool rt6_need_strict(const struct in6_addr *daddr)
356 {
357 return ipv6_addr_type(daddr) &
358 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
359 }
360
361 /* Multipath route selection:
362 * Hash based function using packet header and flowlabel.
363 * Adapted from fib_info_hashfn()
364 */
365 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
366 const struct flowi6 *fl6)
367 {
368 unsigned int val = fl6->flowi6_proto;
369
370 val ^= ipv6_addr_hash(&fl6->daddr);
371 val ^= ipv6_addr_hash(&fl6->saddr);
372
373 /* Work only if this not encapsulated */
374 switch (fl6->flowi6_proto) {
375 case IPPROTO_UDP:
376 case IPPROTO_TCP:
377 case IPPROTO_SCTP:
378 val ^= (__force u16)fl6->fl6_sport;
379 val ^= (__force u16)fl6->fl6_dport;
380 break;
381
382 case IPPROTO_ICMPV6:
383 val ^= (__force u16)fl6->fl6_icmp_type;
384 val ^= (__force u16)fl6->fl6_icmp_code;
385 break;
386 }
387 /* RFC6438 recommands to use flowlabel */
388 val ^= (__force u32)fl6->flowlabel;
389
390 /* Perhaps, we need to tune, this function? */
391 val = val ^ (val >> 7) ^ (val >> 12);
392 return val % candidate_count;
393 }
394
395 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
396 struct flowi6 *fl6)
397 {
398 struct rt6_info *sibling, *next_sibling;
399 int route_choosen;
400
401 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
402 /* Don't change the route, if route_choosen == 0
403 * (siblings does not include ourself)
404 */
405 if (route_choosen)
406 list_for_each_entry_safe(sibling, next_sibling,
407 &match->rt6i_siblings, rt6i_siblings) {
408 route_choosen--;
409 if (route_choosen == 0) {
410 match = sibling;
411 break;
412 }
413 }
414 return match;
415 }
416
417 /*
418 * Route lookup. Any table->tb6_lock is implied.
419 */
420
421 static inline struct rt6_info *rt6_device_match(struct net *net,
422 struct rt6_info *rt,
423 const struct in6_addr *saddr,
424 int oif,
425 int flags)
426 {
427 struct rt6_info *local = NULL;
428 struct rt6_info *sprt;
429
430 if (!oif && ipv6_addr_any(saddr))
431 goto out;
432
433 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
434 struct net_device *dev = sprt->dst.dev;
435
436 if (oif) {
437 if (dev->ifindex == oif)
438 return sprt;
439 if (dev->flags & IFF_LOOPBACK) {
440 if (!sprt->rt6i_idev ||
441 sprt->rt6i_idev->dev->ifindex != oif) {
442 if (flags & RT6_LOOKUP_F_IFACE && oif)
443 continue;
444 if (local && (!oif ||
445 local->rt6i_idev->dev->ifindex == oif))
446 continue;
447 }
448 local = sprt;
449 }
450 } else {
451 if (ipv6_chk_addr(net, saddr, dev,
452 flags & RT6_LOOKUP_F_IFACE))
453 return sprt;
454 }
455 }
456
457 if (oif) {
458 if (local)
459 return local;
460
461 if (flags & RT6_LOOKUP_F_IFACE)
462 return net->ipv6.ip6_null_entry;
463 }
464 out:
465 return rt;
466 }
467
468 #ifdef CONFIG_IPV6_ROUTER_PREF
469 static void rt6_probe(struct rt6_info *rt)
470 {
471 struct neighbour *neigh;
472 /*
473 * Okay, this does not seem to be appropriate
474 * for now, however, we need to check if it
475 * is really so; aka Router Reachability Probing.
476 *
477 * Router Reachability Probe MUST be rate-limited
478 * to no more than one per minute.
479 */
480 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
481 return;
482 rcu_read_lock_bh();
483 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
484 if (neigh) {
485 write_lock(&neigh->lock);
486 if (neigh->nud_state & NUD_VALID)
487 goto out;
488 }
489
490 if (!neigh ||
491 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
492 struct in6_addr mcaddr;
493 struct in6_addr *target;
494
495 if (neigh) {
496 neigh->updated = jiffies;
497 write_unlock(&neigh->lock);
498 }
499
500 target = (struct in6_addr *)&rt->rt6i_gateway;
501 addrconf_addr_solict_mult(target, &mcaddr);
502 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
503 } else {
504 out:
505 write_unlock(&neigh->lock);
506 }
507 rcu_read_unlock_bh();
508 }
509 #else
510 static inline void rt6_probe(struct rt6_info *rt)
511 {
512 }
513 #endif
514
515 /*
516 * Default Router Selection (RFC 2461 6.3.6)
517 */
518 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
519 {
520 struct net_device *dev = rt->dst.dev;
521 if (!oif || dev->ifindex == oif)
522 return 2;
523 if ((dev->flags & IFF_LOOPBACK) &&
524 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
525 return 1;
526 return 0;
527 }
528
529 static inline bool rt6_check_neigh(struct rt6_info *rt)
530 {
531 struct neighbour *neigh;
532 bool ret = false;
533
534 if (rt->rt6i_flags & RTF_NONEXTHOP ||
535 !(rt->rt6i_flags & RTF_GATEWAY))
536 return true;
537
538 rcu_read_lock_bh();
539 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
540 if (neigh) {
541 read_lock(&neigh->lock);
542 if (neigh->nud_state & NUD_VALID)
543 ret = true;
544 #ifdef CONFIG_IPV6_ROUTER_PREF
545 else if (!(neigh->nud_state & NUD_FAILED))
546 ret = true;
547 #endif
548 read_unlock(&neigh->lock);
549 }
550 rcu_read_unlock_bh();
551
552 return ret;
553 }
554
555 static int rt6_score_route(struct rt6_info *rt, int oif,
556 int strict)
557 {
558 int m;
559
560 m = rt6_check_dev(rt, oif);
561 if (!m && (strict & RT6_LOOKUP_F_IFACE))
562 return -1;
563 #ifdef CONFIG_IPV6_ROUTER_PREF
564 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
565 #endif
566 if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
567 return -1;
568 return m;
569 }
570
571 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
572 int *mpri, struct rt6_info *match)
573 {
574 int m;
575
576 if (rt6_check_expired(rt))
577 goto out;
578
579 m = rt6_score_route(rt, oif, strict);
580 if (m < 0)
581 goto out;
582
583 if (m > *mpri) {
584 if (strict & RT6_LOOKUP_F_REACHABLE)
585 rt6_probe(match);
586 *mpri = m;
587 match = rt;
588 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
589 rt6_probe(rt);
590 }
591
592 out:
593 return match;
594 }
595
596 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
597 struct rt6_info *rr_head,
598 u32 metric, int oif, int strict)
599 {
600 struct rt6_info *rt, *match;
601 int mpri = -1;
602
603 match = NULL;
604 for (rt = rr_head; rt && rt->rt6i_metric == metric;
605 rt = rt->dst.rt6_next)
606 match = find_match(rt, oif, strict, &mpri, match);
607 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
608 rt = rt->dst.rt6_next)
609 match = find_match(rt, oif, strict, &mpri, match);
610
611 return match;
612 }
613
614 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
615 {
616 struct rt6_info *match, *rt0;
617 struct net *net;
618
619 rt0 = fn->rr_ptr;
620 if (!rt0)
621 fn->rr_ptr = rt0 = fn->leaf;
622
623 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
624
625 if (!match &&
626 (strict & RT6_LOOKUP_F_REACHABLE)) {
627 struct rt6_info *next = rt0->dst.rt6_next;
628
629 /* no entries matched; do round-robin */
630 if (!next || next->rt6i_metric != rt0->rt6i_metric)
631 next = fn->leaf;
632
633 if (next != rt0)
634 fn->rr_ptr = next;
635 }
636
637 net = dev_net(rt0->dst.dev);
638 return match ? match : net->ipv6.ip6_null_entry;
639 }
640
641 #ifdef CONFIG_IPV6_ROUTE_INFO
642 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
643 const struct in6_addr *gwaddr)
644 {
645 struct net *net = dev_net(dev);
646 struct route_info *rinfo = (struct route_info *) opt;
647 struct in6_addr prefix_buf, *prefix;
648 unsigned int pref;
649 unsigned long lifetime;
650 struct rt6_info *rt;
651
652 if (len < sizeof(struct route_info)) {
653 return -EINVAL;
654 }
655
656 /* Sanity check for prefix_len and length */
657 if (rinfo->length > 3) {
658 return -EINVAL;
659 } else if (rinfo->prefix_len > 128) {
660 return -EINVAL;
661 } else if (rinfo->prefix_len > 64) {
662 if (rinfo->length < 2) {
663 return -EINVAL;
664 }
665 } else if (rinfo->prefix_len > 0) {
666 if (rinfo->length < 1) {
667 return -EINVAL;
668 }
669 }
670
671 pref = rinfo->route_pref;
672 if (pref == ICMPV6_ROUTER_PREF_INVALID)
673 return -EINVAL;
674
675 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
676
677 if (rinfo->length == 3)
678 prefix = (struct in6_addr *)rinfo->prefix;
679 else {
680 /* this function is safe */
681 ipv6_addr_prefix(&prefix_buf,
682 (struct in6_addr *)rinfo->prefix,
683 rinfo->prefix_len);
684 prefix = &prefix_buf;
685 }
686
687 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
688 dev->ifindex);
689
690 if (rt && !lifetime) {
691 ip6_del_rt(rt);
692 rt = NULL;
693 }
694
695 if (!rt && lifetime)
696 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
697 pref);
698 else if (rt)
699 rt->rt6i_flags = RTF_ROUTEINFO |
700 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
701
702 if (rt) {
703 if (!addrconf_finite_timeout(lifetime))
704 rt6_clean_expires(rt);
705 else
706 rt6_set_expires(rt, jiffies + HZ * lifetime);
707
708 ip6_rt_put(rt);
709 }
710 return 0;
711 }
712 #endif
713
714 #define BACKTRACK(__net, saddr) \
715 do { \
716 if (rt == __net->ipv6.ip6_null_entry) { \
717 struct fib6_node *pn; \
718 while (1) { \
719 if (fn->fn_flags & RTN_TL_ROOT) \
720 goto out; \
721 pn = fn->parent; \
722 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
723 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
724 else \
725 fn = pn; \
726 if (fn->fn_flags & RTN_RTINFO) \
727 goto restart; \
728 } \
729 } \
730 } while (0)
731
732 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
733 struct fib6_table *table,
734 struct flowi6 *fl6, int flags)
735 {
736 struct fib6_node *fn;
737 struct rt6_info *rt;
738
739 read_lock_bh(&table->tb6_lock);
740 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
741 restart:
742 rt = fn->leaf;
743 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
744 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
745 rt = rt6_multipath_select(rt, fl6);
746 BACKTRACK(net, &fl6->saddr);
747 out:
748 dst_use(&rt->dst, jiffies);
749 read_unlock_bh(&table->tb6_lock);
750 return rt;
751
752 }
753
754 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
755 int flags)
756 {
757 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
758 }
759 EXPORT_SYMBOL_GPL(ip6_route_lookup);
760
761 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
762 const struct in6_addr *saddr, int oif, int strict)
763 {
764 struct flowi6 fl6 = {
765 .flowi6_oif = oif,
766 .daddr = *daddr,
767 };
768 struct dst_entry *dst;
769 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
770
771 if (saddr) {
772 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
773 flags |= RT6_LOOKUP_F_HAS_SADDR;
774 }
775
776 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
777 if (dst->error == 0)
778 return (struct rt6_info *) dst;
779
780 dst_release(dst);
781
782 return NULL;
783 }
784
785 EXPORT_SYMBOL(rt6_lookup);
786
787 /* ip6_ins_rt is called with FREE table->tb6_lock.
788 It takes new route entry, the addition fails by any reason the
789 route is freed. In any case, if caller does not hold it, it may
790 be destroyed.
791 */
792
793 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
794 {
795 int err;
796 struct fib6_table *table;
797
798 table = rt->rt6i_table;
799 write_lock_bh(&table->tb6_lock);
800 err = fib6_add(&table->tb6_root, rt, info);
801 write_unlock_bh(&table->tb6_lock);
802
803 return err;
804 }
805
806 int ip6_ins_rt(struct rt6_info *rt)
807 {
808 struct nl_info info = {
809 .nl_net = dev_net(rt->dst.dev),
810 };
811 return __ip6_ins_rt(rt, &info);
812 }
813
814 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
815 const struct in6_addr *daddr,
816 const struct in6_addr *saddr)
817 {
818 struct rt6_info *rt;
819
820 /*
821 * Clone the route.
822 */
823
824 rt = ip6_rt_copy(ort, daddr);
825
826 if (rt) {
827 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
828 if (ort->rt6i_dst.plen != 128 &&
829 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
830 rt->rt6i_flags |= RTF_ANYCAST;
831 rt->rt6i_gateway = *daddr;
832 }
833
834 rt->rt6i_flags |= RTF_CACHE;
835
836 #ifdef CONFIG_IPV6_SUBTREES
837 if (rt->rt6i_src.plen && saddr) {
838 rt->rt6i_src.addr = *saddr;
839 rt->rt6i_src.plen = 128;
840 }
841 #endif
842 }
843
844 return rt;
845 }
846
847 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
848 const struct in6_addr *daddr)
849 {
850 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
851
852 if (rt)
853 rt->rt6i_flags |= RTF_CACHE;
854 return rt;
855 }
856
857 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
858 struct flowi6 *fl6, int flags)
859 {
860 struct fib6_node *fn;
861 struct rt6_info *rt, *nrt;
862 int strict = 0;
863 int attempts = 3;
864 int err;
865 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
866
867 strict |= flags & RT6_LOOKUP_F_IFACE;
868
869 relookup:
870 read_lock_bh(&table->tb6_lock);
871
872 restart_2:
873 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
874
875 restart:
876 rt = rt6_select(fn, oif, strict | reachable);
877 if (rt->rt6i_nsiblings && oif == 0)
878 rt = rt6_multipath_select(rt, fl6);
879 BACKTRACK(net, &fl6->saddr);
880 if (rt == net->ipv6.ip6_null_entry ||
881 rt->rt6i_flags & RTF_CACHE)
882 goto out;
883
884 dst_hold(&rt->dst);
885 read_unlock_bh(&table->tb6_lock);
886
887 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
888 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
889 else if (!(rt->dst.flags & DST_HOST))
890 nrt = rt6_alloc_clone(rt, &fl6->daddr);
891 else
892 goto out2;
893
894 ip6_rt_put(rt);
895 rt = nrt ? : net->ipv6.ip6_null_entry;
896
897 dst_hold(&rt->dst);
898 if (nrt) {
899 err = ip6_ins_rt(nrt);
900 if (!err)
901 goto out2;
902 }
903
904 if (--attempts <= 0)
905 goto out2;
906
907 /*
908 * Race condition! In the gap, when table->tb6_lock was
909 * released someone could insert this route. Relookup.
910 */
911 ip6_rt_put(rt);
912 goto relookup;
913
914 out:
915 if (reachable) {
916 reachable = 0;
917 goto restart_2;
918 }
919 dst_hold(&rt->dst);
920 read_unlock_bh(&table->tb6_lock);
921 out2:
922 rt->dst.lastuse = jiffies;
923 rt->dst.__use++;
924
925 return rt;
926 }
927
928 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
929 struct flowi6 *fl6, int flags)
930 {
931 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
932 }
933
934 static struct dst_entry *ip6_route_input_lookup(struct net *net,
935 struct net_device *dev,
936 struct flowi6 *fl6, int flags)
937 {
938 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
939 flags |= RT6_LOOKUP_F_IFACE;
940
941 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
942 }
943
944 void ip6_route_input(struct sk_buff *skb)
945 {
946 const struct ipv6hdr *iph = ipv6_hdr(skb);
947 struct net *net = dev_net(skb->dev);
948 int flags = RT6_LOOKUP_F_HAS_SADDR;
949 struct flowi6 fl6 = {
950 .flowi6_iif = skb->dev->ifindex,
951 .daddr = iph->daddr,
952 .saddr = iph->saddr,
953 .flowlabel = ip6_flowinfo(iph),
954 .flowi6_mark = skb->mark,
955 .flowi6_proto = iph->nexthdr,
956 };
957
958 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
959 }
960
961 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
962 struct flowi6 *fl6, int flags)
963 {
964 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
965 }
966
967 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
968 struct flowi6 *fl6)
969 {
970 int flags = 0;
971
972 fl6->flowi6_iif = LOOPBACK_IFINDEX;
973
974 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
975 flags |= RT6_LOOKUP_F_IFACE;
976
977 if (!ipv6_addr_any(&fl6->saddr))
978 flags |= RT6_LOOKUP_F_HAS_SADDR;
979 else if (sk)
980 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
981
982 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
983 }
984
985 EXPORT_SYMBOL(ip6_route_output);
986
987 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
988 {
989 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
990 struct dst_entry *new = NULL;
991
992 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
993 if (rt) {
994 new = &rt->dst;
995
996 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
997 rt6_init_peer(rt, net->ipv6.peers);
998
999 new->__use = 1;
1000 new->input = dst_discard;
1001 new->output = dst_discard;
1002
1003 if (dst_metrics_read_only(&ort->dst))
1004 new->_metrics = ort->dst._metrics;
1005 else
1006 dst_copy_metrics(new, &ort->dst);
1007 rt->rt6i_idev = ort->rt6i_idev;
1008 if (rt->rt6i_idev)
1009 in6_dev_hold(rt->rt6i_idev);
1010
1011 rt->rt6i_gateway = ort->rt6i_gateway;
1012 rt->rt6i_flags = ort->rt6i_flags;
1013 rt6_clean_expires(rt);
1014 rt->rt6i_metric = 0;
1015
1016 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017 #ifdef CONFIG_IPV6_SUBTREES
1018 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019 #endif
1020
1021 dst_free(new);
1022 }
1023
1024 dst_release(dst_orig);
1025 return new ? new : ERR_PTR(-ENOMEM);
1026 }
1027
1028 /*
1029 * Destination cache support functions
1030 */
1031
1032 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033 {
1034 struct rt6_info *rt;
1035
1036 rt = (struct rt6_info *) dst;
1037
1038 /* All IPV6 dsts are created with ->obsolete set to the value
1039 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040 * into this function always.
1041 */
1042 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043 return NULL;
1044
1045 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046 return dst;
1047
1048 return NULL;
1049 }
1050
1051 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052 {
1053 struct rt6_info *rt = (struct rt6_info *) dst;
1054
1055 if (rt) {
1056 if (rt->rt6i_flags & RTF_CACHE) {
1057 if (rt6_check_expired(rt)) {
1058 ip6_del_rt(rt);
1059 dst = NULL;
1060 }
1061 } else {
1062 dst_release(dst);
1063 dst = NULL;
1064 }
1065 }
1066 return dst;
1067 }
1068
1069 static void ip6_link_failure(struct sk_buff *skb)
1070 {
1071 struct rt6_info *rt;
1072
1073 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074
1075 rt = (struct rt6_info *) skb_dst(skb);
1076 if (rt) {
1077 if (rt->rt6i_flags & RTF_CACHE)
1078 rt6_update_expires(rt, 0);
1079 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080 rt->rt6i_node->fn_sernum = -1;
1081 }
1082 }
1083
1084 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085 struct sk_buff *skb, u32 mtu)
1086 {
1087 struct rt6_info *rt6 = (struct rt6_info*)dst;
1088
1089 dst_confirm(dst);
1090 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091 struct net *net = dev_net(dst->dev);
1092
1093 rt6->rt6i_flags |= RTF_MODIFIED;
1094 if (mtu < IPV6_MIN_MTU) {
1095 u32 features = dst_metric(dst, RTAX_FEATURES);
1096 mtu = IPV6_MIN_MTU;
1097 features |= RTAX_FEATURE_ALLFRAG;
1098 dst_metric_set(dst, RTAX_FEATURES, features);
1099 }
1100 dst_metric_set(dst, RTAX_MTU, mtu);
1101 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102 }
1103 }
1104
1105 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106 int oif, u32 mark)
1107 {
1108 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109 struct dst_entry *dst;
1110 struct flowi6 fl6;
1111
1112 memset(&fl6, 0, sizeof(fl6));
1113 fl6.flowi6_oif = oif;
1114 fl6.flowi6_mark = mark;
1115 fl6.flowi6_flags = 0;
1116 fl6.daddr = iph->daddr;
1117 fl6.saddr = iph->saddr;
1118 fl6.flowlabel = ip6_flowinfo(iph);
1119
1120 dst = ip6_route_output(net, NULL, &fl6);
1121 if (!dst->error)
1122 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123 dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126
1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129 ip6_update_pmtu(skb, sock_net(sk), mtu,
1130 sk->sk_bound_dev_if, sk->sk_mark);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133
1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135 {
1136 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137 struct dst_entry *dst;
1138 struct flowi6 fl6;
1139
1140 memset(&fl6, 0, sizeof(fl6));
1141 fl6.flowi6_oif = oif;
1142 fl6.flowi6_mark = mark;
1143 fl6.flowi6_flags = 0;
1144 fl6.daddr = iph->daddr;
1145 fl6.saddr = iph->saddr;
1146 fl6.flowlabel = ip6_flowinfo(iph);
1147
1148 dst = ip6_route_output(net, NULL, &fl6);
1149 if (!dst->error)
1150 rt6_do_redirect(dst, NULL, skb);
1151 dst_release(dst);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_redirect);
1154
1155 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156 {
1157 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160
1161 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162 {
1163 struct net_device *dev = dst->dev;
1164 unsigned int mtu = dst_mtu(dst);
1165 struct net *net = dev_net(dev);
1166
1167 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168
1169 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171
1172 /*
1173 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175 * IPV6_MAXPLEN is also valid and means: "any MSS,
1176 * rely only on pmtu discovery"
1177 */
1178 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179 mtu = IPV6_MAXPLEN;
1180 return mtu;
1181 }
1182
1183 static unsigned int ip6_mtu(const struct dst_entry *dst)
1184 {
1185 struct inet6_dev *idev;
1186 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187
1188 if (mtu)
1189 return mtu;
1190
1191 mtu = IPV6_MIN_MTU;
1192
1193 rcu_read_lock();
1194 idev = __in6_dev_get(dst->dev);
1195 if (idev)
1196 mtu = idev->cnf.mtu6;
1197 rcu_read_unlock();
1198
1199 return mtu;
1200 }
1201
1202 static struct dst_entry *icmp6_dst_gc_list;
1203 static DEFINE_SPINLOCK(icmp6_dst_lock);
1204
1205 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206 struct flowi6 *fl6)
1207 {
1208 struct dst_entry *dst;
1209 struct rt6_info *rt;
1210 struct inet6_dev *idev = in6_dev_get(dev);
1211 struct net *net = dev_net(dev);
1212
1213 if (unlikely(!idev))
1214 return ERR_PTR(-ENODEV);
1215
1216 rt = ip6_dst_alloc(net, dev, 0, NULL);
1217 if (unlikely(!rt)) {
1218 in6_dev_put(idev);
1219 dst = ERR_PTR(-ENOMEM);
1220 goto out;
1221 }
1222
1223 rt->dst.flags |= DST_HOST;
1224 rt->dst.output = ip6_output;
1225 atomic_set(&rt->dst.__refcnt, 1);
1226 rt->rt6i_dst.addr = fl6->daddr;
1227 rt->rt6i_dst.plen = 128;
1228 rt->rt6i_idev = idev;
1229 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230
1231 spin_lock_bh(&icmp6_dst_lock);
1232 rt->dst.next = icmp6_dst_gc_list;
1233 icmp6_dst_gc_list = &rt->dst;
1234 spin_unlock_bh(&icmp6_dst_lock);
1235
1236 fib6_force_start_gc(net);
1237
1238 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239
1240 out:
1241 return dst;
1242 }
1243
1244 int icmp6_dst_gc(void)
1245 {
1246 struct dst_entry *dst, **pprev;
1247 int more = 0;
1248
1249 spin_lock_bh(&icmp6_dst_lock);
1250 pprev = &icmp6_dst_gc_list;
1251
1252 while ((dst = *pprev) != NULL) {
1253 if (!atomic_read(&dst->__refcnt)) {
1254 *pprev = dst->next;
1255 dst_free(dst);
1256 } else {
1257 pprev = &dst->next;
1258 ++more;
1259 }
1260 }
1261
1262 spin_unlock_bh(&icmp6_dst_lock);
1263
1264 return more;
1265 }
1266
1267 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268 void *arg)
1269 {
1270 struct dst_entry *dst, **pprev;
1271
1272 spin_lock_bh(&icmp6_dst_lock);
1273 pprev = &icmp6_dst_gc_list;
1274 while ((dst = *pprev) != NULL) {
1275 struct rt6_info *rt = (struct rt6_info *) dst;
1276 if (func(rt, arg)) {
1277 *pprev = dst->next;
1278 dst_free(dst);
1279 } else {
1280 pprev = &dst->next;
1281 }
1282 }
1283 spin_unlock_bh(&icmp6_dst_lock);
1284 }
1285
1286 static int ip6_dst_gc(struct dst_ops *ops)
1287 {
1288 unsigned long now = jiffies;
1289 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295 int entries;
1296
1297 entries = dst_entries_get_fast(ops);
1298 if (time_after(rt_last_gc + rt_min_interval, now) &&
1299 entries <= rt_max_size)
1300 goto out;
1301
1302 net->ipv6.ip6_rt_gc_expire++;
1303 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304 net->ipv6.ip6_rt_last_gc = now;
1305 entries = dst_entries_get_slow(ops);
1306 if (entries < ops->gc_thresh)
1307 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308 out:
1309 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310 return entries > rt_max_size;
1311 }
1312
1313 int ip6_dst_hoplimit(struct dst_entry *dst)
1314 {
1315 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316 if (hoplimit == 0) {
1317 struct net_device *dev = dst->dev;
1318 struct inet6_dev *idev;
1319
1320 rcu_read_lock();
1321 idev = __in6_dev_get(dev);
1322 if (idev)
1323 hoplimit = idev->cnf.hop_limit;
1324 else
1325 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326 rcu_read_unlock();
1327 }
1328 return hoplimit;
1329 }
1330 EXPORT_SYMBOL(ip6_dst_hoplimit);
1331
1332 /*
1333 *
1334 */
1335
1336 int ip6_route_add(struct fib6_config *cfg)
1337 {
1338 int err;
1339 struct net *net = cfg->fc_nlinfo.nl_net;
1340 struct rt6_info *rt = NULL;
1341 struct net_device *dev = NULL;
1342 struct inet6_dev *idev = NULL;
1343 struct fib6_table *table;
1344 int addr_type;
1345
1346 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347 return -EINVAL;
1348 #ifndef CONFIG_IPV6_SUBTREES
1349 if (cfg->fc_src_len)
1350 return -EINVAL;
1351 #endif
1352 if (cfg->fc_ifindex) {
1353 err = -ENODEV;
1354 dev = dev_get_by_index(net, cfg->fc_ifindex);
1355 if (!dev)
1356 goto out;
1357 idev = in6_dev_get(dev);
1358 if (!idev)
1359 goto out;
1360 }
1361
1362 if (cfg->fc_metric == 0)
1363 cfg->fc_metric = IP6_RT_PRIO_USER;
1364
1365 err = -ENOBUFS;
1366 if (cfg->fc_nlinfo.nlh &&
1367 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368 table = fib6_get_table(net, cfg->fc_table);
1369 if (!table) {
1370 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371 table = fib6_new_table(net, cfg->fc_table);
1372 }
1373 } else {
1374 table = fib6_new_table(net, cfg->fc_table);
1375 }
1376
1377 if (!table)
1378 goto out;
1379
1380 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381
1382 if (!rt) {
1383 err = -ENOMEM;
1384 goto out;
1385 }
1386
1387 if (cfg->fc_flags & RTF_EXPIRES)
1388 rt6_set_expires(rt, jiffies +
1389 clock_t_to_jiffies(cfg->fc_expires));
1390 else
1391 rt6_clean_expires(rt);
1392
1393 if (cfg->fc_protocol == RTPROT_UNSPEC)
1394 cfg->fc_protocol = RTPROT_BOOT;
1395 rt->rt6i_protocol = cfg->fc_protocol;
1396
1397 addr_type = ipv6_addr_type(&cfg->fc_dst);
1398
1399 if (addr_type & IPV6_ADDR_MULTICAST)
1400 rt->dst.input = ip6_mc_input;
1401 else if (cfg->fc_flags & RTF_LOCAL)
1402 rt->dst.input = ip6_input;
1403 else
1404 rt->dst.input = ip6_forward;
1405
1406 rt->dst.output = ip6_output;
1407
1408 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409 rt->rt6i_dst.plen = cfg->fc_dst_len;
1410 if (rt->rt6i_dst.plen == 128)
1411 rt->dst.flags |= DST_HOST;
1412
1413 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415 if (!metrics) {
1416 err = -ENOMEM;
1417 goto out;
1418 }
1419 dst_init_metrics(&rt->dst, metrics, 0);
1420 }
1421 #ifdef CONFIG_IPV6_SUBTREES
1422 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423 rt->rt6i_src.plen = cfg->fc_src_len;
1424 #endif
1425
1426 rt->rt6i_metric = cfg->fc_metric;
1427
1428 /* We cannot add true routes via loopback here,
1429 they would result in kernel looping; promote them to reject routes
1430 */
1431 if ((cfg->fc_flags & RTF_REJECT) ||
1432 (dev && (dev->flags & IFF_LOOPBACK) &&
1433 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434 !(cfg->fc_flags & RTF_LOCAL))) {
1435 /* hold loopback dev/idev if we haven't done so. */
1436 if (dev != net->loopback_dev) {
1437 if (dev) {
1438 dev_put(dev);
1439 in6_dev_put(idev);
1440 }
1441 dev = net->loopback_dev;
1442 dev_hold(dev);
1443 idev = in6_dev_get(dev);
1444 if (!idev) {
1445 err = -ENODEV;
1446 goto out;
1447 }
1448 }
1449 rt->dst.output = ip6_pkt_discard_out;
1450 rt->dst.input = ip6_pkt_discard;
1451 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452 switch (cfg->fc_type) {
1453 case RTN_BLACKHOLE:
1454 rt->dst.error = -EINVAL;
1455 break;
1456 case RTN_PROHIBIT:
1457 rt->dst.error = -EACCES;
1458 break;
1459 case RTN_THROW:
1460 rt->dst.error = -EAGAIN;
1461 break;
1462 default:
1463 rt->dst.error = -ENETUNREACH;
1464 break;
1465 }
1466 goto install_route;
1467 }
1468
1469 if (cfg->fc_flags & RTF_GATEWAY) {
1470 const struct in6_addr *gw_addr;
1471 int gwa_type;
1472
1473 gw_addr = &cfg->fc_gateway;
1474 rt->rt6i_gateway = *gw_addr;
1475 gwa_type = ipv6_addr_type(gw_addr);
1476
1477 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478 struct rt6_info *grt;
1479
1480 /* IPv6 strictly inhibits using not link-local
1481 addresses as nexthop address.
1482 Otherwise, router will not able to send redirects.
1483 It is very good, but in some (rare!) circumstances
1484 (SIT, PtP, NBMA NOARP links) it is handy to allow
1485 some exceptions. --ANK
1486 */
1487 err = -EINVAL;
1488 if (!(gwa_type & IPV6_ADDR_UNICAST))
1489 goto out;
1490
1491 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492
1493 err = -EHOSTUNREACH;
1494 if (!grt)
1495 goto out;
1496 if (dev) {
1497 if (dev != grt->dst.dev) {
1498 ip6_rt_put(grt);
1499 goto out;
1500 }
1501 } else {
1502 dev = grt->dst.dev;
1503 idev = grt->rt6i_idev;
1504 dev_hold(dev);
1505 in6_dev_hold(grt->rt6i_idev);
1506 }
1507 if (!(grt->rt6i_flags & RTF_GATEWAY))
1508 err = 0;
1509 ip6_rt_put(grt);
1510
1511 if (err)
1512 goto out;
1513 }
1514 err = -EINVAL;
1515 if (!dev || (dev->flags & IFF_LOOPBACK))
1516 goto out;
1517 }
1518
1519 err = -ENODEV;
1520 if (!dev)
1521 goto out;
1522
1523 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525 err = -EINVAL;
1526 goto out;
1527 }
1528 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529 rt->rt6i_prefsrc.plen = 128;
1530 } else
1531 rt->rt6i_prefsrc.plen = 0;
1532
1533 rt->rt6i_flags = cfg->fc_flags;
1534
1535 install_route:
1536 if (cfg->fc_mx) {
1537 struct nlattr *nla;
1538 int remaining;
1539
1540 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541 int type = nla_type(nla);
1542
1543 if (type) {
1544 if (type > RTAX_MAX) {
1545 err = -EINVAL;
1546 goto out;
1547 }
1548
1549 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550 }
1551 }
1552 }
1553
1554 rt->dst.dev = dev;
1555 rt->rt6i_idev = idev;
1556 rt->rt6i_table = table;
1557
1558 cfg->fc_nlinfo.nl_net = dev_net(dev);
1559
1560 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561
1562 out:
1563 if (dev)
1564 dev_put(dev);
1565 if (idev)
1566 in6_dev_put(idev);
1567 if (rt)
1568 dst_free(&rt->dst);
1569 return err;
1570 }
1571
1572 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573 {
1574 int err;
1575 struct fib6_table *table;
1576 struct net *net = dev_net(rt->dst.dev);
1577
1578 if (rt == net->ipv6.ip6_null_entry) {
1579 err = -ENOENT;
1580 goto out;
1581 }
1582
1583 table = rt->rt6i_table;
1584 write_lock_bh(&table->tb6_lock);
1585 err = fib6_del(rt, info);
1586 write_unlock_bh(&table->tb6_lock);
1587
1588 out:
1589 ip6_rt_put(rt);
1590 return err;
1591 }
1592
1593 int ip6_del_rt(struct rt6_info *rt)
1594 {
1595 struct nl_info info = {
1596 .nl_net = dev_net(rt->dst.dev),
1597 };
1598 return __ip6_del_rt(rt, &info);
1599 }
1600
1601 static int ip6_route_del(struct fib6_config *cfg)
1602 {
1603 struct fib6_table *table;
1604 struct fib6_node *fn;
1605 struct rt6_info *rt;
1606 int err = -ESRCH;
1607
1608 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609 if (!table)
1610 return err;
1611
1612 read_lock_bh(&table->tb6_lock);
1613
1614 fn = fib6_locate(&table->tb6_root,
1615 &cfg->fc_dst, cfg->fc_dst_len,
1616 &cfg->fc_src, cfg->fc_src_len);
1617
1618 if (fn) {
1619 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620 if (cfg->fc_ifindex &&
1621 (!rt->dst.dev ||
1622 rt->dst.dev->ifindex != cfg->fc_ifindex))
1623 continue;
1624 if (cfg->fc_flags & RTF_GATEWAY &&
1625 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626 continue;
1627 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628 continue;
1629 dst_hold(&rt->dst);
1630 read_unlock_bh(&table->tb6_lock);
1631
1632 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633 }
1634 }
1635 read_unlock_bh(&table->tb6_lock);
1636
1637 return err;
1638 }
1639
1640 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641 {
1642 struct net *net = dev_net(skb->dev);
1643 struct netevent_redirect netevent;
1644 struct rt6_info *rt, *nrt = NULL;
1645 struct ndisc_options ndopts;
1646 struct inet6_dev *in6_dev;
1647 struct neighbour *neigh;
1648 struct rd_msg *msg;
1649 int optlen, on_link;
1650 u8 *lladdr;
1651
1652 optlen = skb->tail - skb->transport_header;
1653 optlen -= sizeof(*msg);
1654
1655 if (optlen < 0) {
1656 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657 return;
1658 }
1659
1660 msg = (struct rd_msg *)icmp6_hdr(skb);
1661
1662 if (ipv6_addr_is_multicast(&msg->dest)) {
1663 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664 return;
1665 }
1666
1667 on_link = 0;
1668 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669 on_link = 1;
1670 } else if (ipv6_addr_type(&msg->target) !=
1671 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673 return;
1674 }
1675
1676 in6_dev = __in6_dev_get(skb->dev);
1677 if (!in6_dev)
1678 return;
1679 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680 return;
1681
1682 /* RFC2461 8.1:
1683 * The IP source address of the Redirect MUST be the same as the current
1684 * first-hop router for the specified ICMP Destination Address.
1685 */
1686
1687 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689 return;
1690 }
1691
1692 lladdr = NULL;
1693 if (ndopts.nd_opts_tgt_lladdr) {
1694 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695 skb->dev);
1696 if (!lladdr) {
1697 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698 return;
1699 }
1700 }
1701
1702 rt = (struct rt6_info *) dst;
1703 if (rt == net->ipv6.ip6_null_entry) {
1704 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705 return;
1706 }
1707
1708 /* Redirect received -> path was valid.
1709 * Look, redirects are sent only in response to data packets,
1710 * so that this nexthop apparently is reachable. --ANK
1711 */
1712 dst_confirm(&rt->dst);
1713
1714 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715 if (!neigh)
1716 return;
1717
1718 /*
1719 * We have finally decided to accept it.
1720 */
1721
1722 neigh_update(neigh, lladdr, NUD_STALE,
1723 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724 NEIGH_UPDATE_F_OVERRIDE|
1725 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726 NEIGH_UPDATE_F_ISROUTER))
1727 );
1728
1729 nrt = ip6_rt_copy(rt, &msg->dest);
1730 if (!nrt)
1731 goto out;
1732
1733 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734 if (on_link)
1735 nrt->rt6i_flags &= ~RTF_GATEWAY;
1736
1737 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738
1739 if (ip6_ins_rt(nrt))
1740 goto out;
1741
1742 netevent.old = &rt->dst;
1743 netevent.new = &nrt->dst;
1744 netevent.daddr = &msg->dest;
1745 netevent.neigh = neigh;
1746 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747
1748 if (rt->rt6i_flags & RTF_CACHE) {
1749 rt = (struct rt6_info *) dst_clone(&rt->dst);
1750 ip6_del_rt(rt);
1751 }
1752
1753 out:
1754 neigh_release(neigh);
1755 }
1756
1757 /*
1758 * Misc support functions
1759 */
1760
1761 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762 const struct in6_addr *dest)
1763 {
1764 struct net *net = dev_net(ort->dst.dev);
1765 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766 ort->rt6i_table);
1767
1768 if (rt) {
1769 rt->dst.input = ort->dst.input;
1770 rt->dst.output = ort->dst.output;
1771 rt->dst.flags |= DST_HOST;
1772
1773 rt->rt6i_dst.addr = *dest;
1774 rt->rt6i_dst.plen = 128;
1775 dst_copy_metrics(&rt->dst, &ort->dst);
1776 rt->dst.error = ort->dst.error;
1777 rt->rt6i_idev = ort->rt6i_idev;
1778 if (rt->rt6i_idev)
1779 in6_dev_hold(rt->rt6i_idev);
1780 rt->dst.lastuse = jiffies;
1781
1782 rt->rt6i_gateway = ort->rt6i_gateway;
1783 rt->rt6i_flags = ort->rt6i_flags;
1784 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785 (RTF_DEFAULT | RTF_ADDRCONF))
1786 rt6_set_from(rt, ort);
1787 else
1788 rt6_clean_expires(rt);
1789 rt->rt6i_metric = 0;
1790
1791 #ifdef CONFIG_IPV6_SUBTREES
1792 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1793 #endif
1794 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1795 rt->rt6i_table = ort->rt6i_table;
1796 }
1797 return rt;
1798 }
1799
1800 #ifdef CONFIG_IPV6_ROUTE_INFO
1801 static struct rt6_info *rt6_get_route_info(struct net *net,
1802 const struct in6_addr *prefix, int prefixlen,
1803 const struct in6_addr *gwaddr, int ifindex)
1804 {
1805 struct fib6_node *fn;
1806 struct rt6_info *rt = NULL;
1807 struct fib6_table *table;
1808
1809 table = fib6_get_table(net, RT6_TABLE_INFO);
1810 if (!table)
1811 return NULL;
1812
1813 read_lock_bh(&table->tb6_lock);
1814 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1815 if (!fn)
1816 goto out;
1817
1818 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1819 if (rt->dst.dev->ifindex != ifindex)
1820 continue;
1821 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1822 continue;
1823 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1824 continue;
1825 dst_hold(&rt->dst);
1826 break;
1827 }
1828 out:
1829 read_unlock_bh(&table->tb6_lock);
1830 return rt;
1831 }
1832
1833 static struct rt6_info *rt6_add_route_info(struct net *net,
1834 const struct in6_addr *prefix, int prefixlen,
1835 const struct in6_addr *gwaddr, int ifindex,
1836 unsigned int pref)
1837 {
1838 struct fib6_config cfg = {
1839 .fc_table = RT6_TABLE_INFO,
1840 .fc_metric = IP6_RT_PRIO_USER,
1841 .fc_ifindex = ifindex,
1842 .fc_dst_len = prefixlen,
1843 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1844 RTF_UP | RTF_PREF(pref),
1845 .fc_nlinfo.portid = 0,
1846 .fc_nlinfo.nlh = NULL,
1847 .fc_nlinfo.nl_net = net,
1848 };
1849
1850 cfg.fc_dst = *prefix;
1851 cfg.fc_gateway = *gwaddr;
1852
1853 /* We should treat it as a default route if prefix length is 0. */
1854 if (!prefixlen)
1855 cfg.fc_flags |= RTF_DEFAULT;
1856
1857 ip6_route_add(&cfg);
1858
1859 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1860 }
1861 #endif
1862
1863 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1864 {
1865 struct rt6_info *rt;
1866 struct fib6_table *table;
1867
1868 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1869 if (!table)
1870 return NULL;
1871
1872 read_lock_bh(&table->tb6_lock);
1873 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1874 if (dev == rt->dst.dev &&
1875 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1876 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1877 break;
1878 }
1879 if (rt)
1880 dst_hold(&rt->dst);
1881 read_unlock_bh(&table->tb6_lock);
1882 return rt;
1883 }
1884
1885 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1886 struct net_device *dev,
1887 unsigned int pref)
1888 {
1889 struct fib6_config cfg = {
1890 .fc_table = RT6_TABLE_DFLT,
1891 .fc_metric = IP6_RT_PRIO_USER,
1892 .fc_ifindex = dev->ifindex,
1893 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1894 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1895 .fc_nlinfo.portid = 0,
1896 .fc_nlinfo.nlh = NULL,
1897 .fc_nlinfo.nl_net = dev_net(dev),
1898 };
1899
1900 cfg.fc_gateway = *gwaddr;
1901
1902 ip6_route_add(&cfg);
1903
1904 return rt6_get_dflt_router(gwaddr, dev);
1905 }
1906
1907 void rt6_purge_dflt_routers(struct net *net)
1908 {
1909 struct rt6_info *rt;
1910 struct fib6_table *table;
1911
1912 /* NOTE: Keep consistent with rt6_get_dflt_router */
1913 table = fib6_get_table(net, RT6_TABLE_DFLT);
1914 if (!table)
1915 return;
1916
1917 restart:
1918 read_lock_bh(&table->tb6_lock);
1919 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1920 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1921 dst_hold(&rt->dst);
1922 read_unlock_bh(&table->tb6_lock);
1923 ip6_del_rt(rt);
1924 goto restart;
1925 }
1926 }
1927 read_unlock_bh(&table->tb6_lock);
1928 }
1929
1930 static void rtmsg_to_fib6_config(struct net *net,
1931 struct in6_rtmsg *rtmsg,
1932 struct fib6_config *cfg)
1933 {
1934 memset(cfg, 0, sizeof(*cfg));
1935
1936 cfg->fc_table = RT6_TABLE_MAIN;
1937 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1938 cfg->fc_metric = rtmsg->rtmsg_metric;
1939 cfg->fc_expires = rtmsg->rtmsg_info;
1940 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1941 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1942 cfg->fc_flags = rtmsg->rtmsg_flags;
1943
1944 cfg->fc_nlinfo.nl_net = net;
1945
1946 cfg->fc_dst = rtmsg->rtmsg_dst;
1947 cfg->fc_src = rtmsg->rtmsg_src;
1948 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1949 }
1950
1951 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1952 {
1953 struct fib6_config cfg;
1954 struct in6_rtmsg rtmsg;
1955 int err;
1956
1957 switch(cmd) {
1958 case SIOCADDRT: /* Add a route */
1959 case SIOCDELRT: /* Delete a route */
1960 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1961 return -EPERM;
1962 err = copy_from_user(&rtmsg, arg,
1963 sizeof(struct in6_rtmsg));
1964 if (err)
1965 return -EFAULT;
1966
1967 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1968
1969 rtnl_lock();
1970 switch (cmd) {
1971 case SIOCADDRT:
1972 err = ip6_route_add(&cfg);
1973 break;
1974 case SIOCDELRT:
1975 err = ip6_route_del(&cfg);
1976 break;
1977 default:
1978 err = -EINVAL;
1979 }
1980 rtnl_unlock();
1981
1982 return err;
1983 }
1984
1985 return -EINVAL;
1986 }
1987
1988 /*
1989 * Drop the packet on the floor
1990 */
1991
1992 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1993 {
1994 int type;
1995 struct dst_entry *dst = skb_dst(skb);
1996 switch (ipstats_mib_noroutes) {
1997 case IPSTATS_MIB_INNOROUTES:
1998 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1999 if (type == IPV6_ADDR_ANY) {
2000 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001 IPSTATS_MIB_INADDRERRORS);
2002 break;
2003 }
2004 /* FALLTHROUGH */
2005 case IPSTATS_MIB_OUTNOROUTES:
2006 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2007 ipstats_mib_noroutes);
2008 break;
2009 }
2010 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2011 kfree_skb(skb);
2012 return 0;
2013 }
2014
2015 static int ip6_pkt_discard(struct sk_buff *skb)
2016 {
2017 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2018 }
2019
2020 static int ip6_pkt_discard_out(struct sk_buff *skb)
2021 {
2022 skb->dev = skb_dst(skb)->dev;
2023 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2024 }
2025
2026 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2027
2028 static int ip6_pkt_prohibit(struct sk_buff *skb)
2029 {
2030 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2031 }
2032
2033 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2034 {
2035 skb->dev = skb_dst(skb)->dev;
2036 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2037 }
2038
2039 #endif
2040
2041 /*
2042 * Allocate a dst for local (unicast / anycast) address.
2043 */
2044
2045 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2046 const struct in6_addr *addr,
2047 bool anycast)
2048 {
2049 struct net *net = dev_net(idev->dev);
2050 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2051
2052 if (!rt) {
2053 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2054 return ERR_PTR(-ENOMEM);
2055 }
2056
2057 in6_dev_hold(idev);
2058
2059 rt->dst.flags |= DST_HOST;
2060 rt->dst.input = ip6_input;
2061 rt->dst.output = ip6_output;
2062 rt->rt6i_idev = idev;
2063
2064 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2065 if (anycast)
2066 rt->rt6i_flags |= RTF_ANYCAST;
2067 else
2068 rt->rt6i_flags |= RTF_LOCAL;
2069
2070 rt->rt6i_dst.addr = *addr;
2071 rt->rt6i_dst.plen = 128;
2072 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2073
2074 atomic_set(&rt->dst.__refcnt, 1);
2075
2076 return rt;
2077 }
2078
2079 int ip6_route_get_saddr(struct net *net,
2080 struct rt6_info *rt,
2081 const struct in6_addr *daddr,
2082 unsigned int prefs,
2083 struct in6_addr *saddr)
2084 {
2085 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2086 int err = 0;
2087 if (rt->rt6i_prefsrc.plen)
2088 *saddr = rt->rt6i_prefsrc.addr;
2089 else
2090 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2091 daddr, prefs, saddr);
2092 return err;
2093 }
2094
2095 /* remove deleted ip from prefsrc entries */
2096 struct arg_dev_net_ip {
2097 struct net_device *dev;
2098 struct net *net;
2099 struct in6_addr *addr;
2100 };
2101
2102 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2103 {
2104 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2105 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2106 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2107
2108 if (((void *)rt->dst.dev == dev || !dev) &&
2109 rt != net->ipv6.ip6_null_entry &&
2110 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2111 /* remove prefsrc entry */
2112 rt->rt6i_prefsrc.plen = 0;
2113 }
2114 return 0;
2115 }
2116
2117 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2118 {
2119 struct net *net = dev_net(ifp->idev->dev);
2120 struct arg_dev_net_ip adni = {
2121 .dev = ifp->idev->dev,
2122 .net = net,
2123 .addr = &ifp->addr,
2124 };
2125 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2126 }
2127
2128 struct arg_dev_net {
2129 struct net_device *dev;
2130 struct net *net;
2131 };
2132
2133 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2134 {
2135 const struct arg_dev_net *adn = arg;
2136 const struct net_device *dev = adn->dev;
2137
2138 if ((rt->dst.dev == dev || !dev) &&
2139 rt != adn->net->ipv6.ip6_null_entry)
2140 return -1;
2141
2142 return 0;
2143 }
2144
2145 void rt6_ifdown(struct net *net, struct net_device *dev)
2146 {
2147 struct arg_dev_net adn = {
2148 .dev = dev,
2149 .net = net,
2150 };
2151
2152 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2153 icmp6_clean_all(fib6_ifdown, &adn);
2154 }
2155
2156 struct rt6_mtu_change_arg {
2157 struct net_device *dev;
2158 unsigned int mtu;
2159 };
2160
2161 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2162 {
2163 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2164 struct inet6_dev *idev;
2165
2166 /* In IPv6 pmtu discovery is not optional,
2167 so that RTAX_MTU lock cannot disable it.
2168 We still use this lock to block changes
2169 caused by addrconf/ndisc.
2170 */
2171
2172 idev = __in6_dev_get(arg->dev);
2173 if (!idev)
2174 return 0;
2175
2176 /* For administrative MTU increase, there is no way to discover
2177 IPv6 PMTU increase, so PMTU increase should be updated here.
2178 Since RFC 1981 doesn't include administrative MTU increase
2179 update PMTU increase is a MUST. (i.e. jumbo frame)
2180 */
2181 /*
2182 If new MTU is less than route PMTU, this new MTU will be the
2183 lowest MTU in the path, update the route PMTU to reflect PMTU
2184 decreases; if new MTU is greater than route PMTU, and the
2185 old MTU is the lowest MTU in the path, update the route PMTU
2186 to reflect the increase. In this case if the other nodes' MTU
2187 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2188 PMTU discouvery.
2189 */
2190 if (rt->dst.dev == arg->dev &&
2191 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2192 (dst_mtu(&rt->dst) >= arg->mtu ||
2193 (dst_mtu(&rt->dst) < arg->mtu &&
2194 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2195 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2196 }
2197 return 0;
2198 }
2199
2200 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2201 {
2202 struct rt6_mtu_change_arg arg = {
2203 .dev = dev,
2204 .mtu = mtu,
2205 };
2206
2207 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2208 }
2209
2210 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2211 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2212 [RTA_OIF] = { .type = NLA_U32 },
2213 [RTA_IIF] = { .type = NLA_U32 },
2214 [RTA_PRIORITY] = { .type = NLA_U32 },
2215 [RTA_METRICS] = { .type = NLA_NESTED },
2216 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2217 };
2218
2219 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2220 struct fib6_config *cfg)
2221 {
2222 struct rtmsg *rtm;
2223 struct nlattr *tb[RTA_MAX+1];
2224 int err;
2225
2226 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2227 if (err < 0)
2228 goto errout;
2229
2230 err = -EINVAL;
2231 rtm = nlmsg_data(nlh);
2232 memset(cfg, 0, sizeof(*cfg));
2233
2234 cfg->fc_table = rtm->rtm_table;
2235 cfg->fc_dst_len = rtm->rtm_dst_len;
2236 cfg->fc_src_len = rtm->rtm_src_len;
2237 cfg->fc_flags = RTF_UP;
2238 cfg->fc_protocol = rtm->rtm_protocol;
2239 cfg->fc_type = rtm->rtm_type;
2240
2241 if (rtm->rtm_type == RTN_UNREACHABLE ||
2242 rtm->rtm_type == RTN_BLACKHOLE ||
2243 rtm->rtm_type == RTN_PROHIBIT ||
2244 rtm->rtm_type == RTN_THROW)
2245 cfg->fc_flags |= RTF_REJECT;
2246
2247 if (rtm->rtm_type == RTN_LOCAL)
2248 cfg->fc_flags |= RTF_LOCAL;
2249
2250 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2251 cfg->fc_nlinfo.nlh = nlh;
2252 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2253
2254 if (tb[RTA_GATEWAY]) {
2255 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2256 cfg->fc_flags |= RTF_GATEWAY;
2257 }
2258
2259 if (tb[RTA_DST]) {
2260 int plen = (rtm->rtm_dst_len + 7) >> 3;
2261
2262 if (nla_len(tb[RTA_DST]) < plen)
2263 goto errout;
2264
2265 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2266 }
2267
2268 if (tb[RTA_SRC]) {
2269 int plen = (rtm->rtm_src_len + 7) >> 3;
2270
2271 if (nla_len(tb[RTA_SRC]) < plen)
2272 goto errout;
2273
2274 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2275 }
2276
2277 if (tb[RTA_PREFSRC])
2278 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2279
2280 if (tb[RTA_OIF])
2281 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2282
2283 if (tb[RTA_PRIORITY])
2284 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2285
2286 if (tb[RTA_METRICS]) {
2287 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2288 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2289 }
2290
2291 if (tb[RTA_TABLE])
2292 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2293
2294 if (tb[RTA_MULTIPATH]) {
2295 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2296 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2297 }
2298
2299 err = 0;
2300 errout:
2301 return err;
2302 }
2303
2304 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2305 {
2306 struct fib6_config r_cfg;
2307 struct rtnexthop *rtnh;
2308 int remaining;
2309 int attrlen;
2310 int err = 0, last_err = 0;
2311
2312 beginning:
2313 rtnh = (struct rtnexthop *)cfg->fc_mp;
2314 remaining = cfg->fc_mp_len;
2315
2316 /* Parse a Multipath Entry */
2317 while (rtnh_ok(rtnh, remaining)) {
2318 memcpy(&r_cfg, cfg, sizeof(*cfg));
2319 if (rtnh->rtnh_ifindex)
2320 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2321
2322 attrlen = rtnh_attrlen(rtnh);
2323 if (attrlen > 0) {
2324 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2325
2326 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2327 if (nla) {
2328 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2329 r_cfg.fc_flags |= RTF_GATEWAY;
2330 }
2331 }
2332 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2333 if (err) {
2334 last_err = err;
2335 /* If we are trying to remove a route, do not stop the
2336 * loop when ip6_route_del() fails (because next hop is
2337 * already gone), we should try to remove all next hops.
2338 */
2339 if (add) {
2340 /* If add fails, we should try to delete all
2341 * next hops that have been already added.
2342 */
2343 add = 0;
2344 goto beginning;
2345 }
2346 }
2347 /* Because each route is added like a single route we remove
2348 * this flag after the first nexthop (if there is a collision,
2349 * we have already fail to add the first nexthop:
2350 * fib6_add_rt2node() has reject it).
2351 */
2352 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2353 rtnh = rtnh_next(rtnh, &remaining);
2354 }
2355
2356 return last_err;
2357 }
2358
2359 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2360 {
2361 struct fib6_config cfg;
2362 int err;
2363
2364 err = rtm_to_fib6_config(skb, nlh, &cfg);
2365 if (err < 0)
2366 return err;
2367
2368 if (cfg.fc_mp)
2369 return ip6_route_multipath(&cfg, 0);
2370 else
2371 return ip6_route_del(&cfg);
2372 }
2373
2374 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2375 {
2376 struct fib6_config cfg;
2377 int err;
2378
2379 err = rtm_to_fib6_config(skb, nlh, &cfg);
2380 if (err < 0)
2381 return err;
2382
2383 if (cfg.fc_mp)
2384 return ip6_route_multipath(&cfg, 1);
2385 else
2386 return ip6_route_add(&cfg);
2387 }
2388
2389 static inline size_t rt6_nlmsg_size(void)
2390 {
2391 return NLMSG_ALIGN(sizeof(struct rtmsg))
2392 + nla_total_size(16) /* RTA_SRC */
2393 + nla_total_size(16) /* RTA_DST */
2394 + nla_total_size(16) /* RTA_GATEWAY */
2395 + nla_total_size(16) /* RTA_PREFSRC */
2396 + nla_total_size(4) /* RTA_TABLE */
2397 + nla_total_size(4) /* RTA_IIF */
2398 + nla_total_size(4) /* RTA_OIF */
2399 + nla_total_size(4) /* RTA_PRIORITY */
2400 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2401 + nla_total_size(sizeof(struct rta_cacheinfo));
2402 }
2403
2404 static int rt6_fill_node(struct net *net,
2405 struct sk_buff *skb, struct rt6_info *rt,
2406 struct in6_addr *dst, struct in6_addr *src,
2407 int iif, int type, u32 portid, u32 seq,
2408 int prefix, int nowait, unsigned int flags)
2409 {
2410 struct rtmsg *rtm;
2411 struct nlmsghdr *nlh;
2412 long expires;
2413 u32 table;
2414
2415 if (prefix) { /* user wants prefix routes only */
2416 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2417 /* success since this is not a prefix route */
2418 return 1;
2419 }
2420 }
2421
2422 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2423 if (!nlh)
2424 return -EMSGSIZE;
2425
2426 rtm = nlmsg_data(nlh);
2427 rtm->rtm_family = AF_INET6;
2428 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2429 rtm->rtm_src_len = rt->rt6i_src.plen;
2430 rtm->rtm_tos = 0;
2431 if (rt->rt6i_table)
2432 table = rt->rt6i_table->tb6_id;
2433 else
2434 table = RT6_TABLE_UNSPEC;
2435 rtm->rtm_table = table;
2436 if (nla_put_u32(skb, RTA_TABLE, table))
2437 goto nla_put_failure;
2438 if (rt->rt6i_flags & RTF_REJECT) {
2439 switch (rt->dst.error) {
2440 case -EINVAL:
2441 rtm->rtm_type = RTN_BLACKHOLE;
2442 break;
2443 case -EACCES:
2444 rtm->rtm_type = RTN_PROHIBIT;
2445 break;
2446 case -EAGAIN:
2447 rtm->rtm_type = RTN_THROW;
2448 break;
2449 default:
2450 rtm->rtm_type = RTN_UNREACHABLE;
2451 break;
2452 }
2453 }
2454 else if (rt->rt6i_flags & RTF_LOCAL)
2455 rtm->rtm_type = RTN_LOCAL;
2456 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2457 rtm->rtm_type = RTN_LOCAL;
2458 else
2459 rtm->rtm_type = RTN_UNICAST;
2460 rtm->rtm_flags = 0;
2461 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2462 rtm->rtm_protocol = rt->rt6i_protocol;
2463 if (rt->rt6i_flags & RTF_DYNAMIC)
2464 rtm->rtm_protocol = RTPROT_REDIRECT;
2465 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2466 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2467 rtm->rtm_protocol = RTPROT_RA;
2468 else
2469 rtm->rtm_protocol = RTPROT_KERNEL;
2470 }
2471
2472 if (rt->rt6i_flags & RTF_CACHE)
2473 rtm->rtm_flags |= RTM_F_CLONED;
2474
2475 if (dst) {
2476 if (nla_put(skb, RTA_DST, 16, dst))
2477 goto nla_put_failure;
2478 rtm->rtm_dst_len = 128;
2479 } else if (rtm->rtm_dst_len)
2480 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2481 goto nla_put_failure;
2482 #ifdef CONFIG_IPV6_SUBTREES
2483 if (src) {
2484 if (nla_put(skb, RTA_SRC, 16, src))
2485 goto nla_put_failure;
2486 rtm->rtm_src_len = 128;
2487 } else if (rtm->rtm_src_len &&
2488 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2489 goto nla_put_failure;
2490 #endif
2491 if (iif) {
2492 #ifdef CONFIG_IPV6_MROUTE
2493 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2494 int err = ip6mr_get_route(net, skb, rtm, nowait);
2495 if (err <= 0) {
2496 if (!nowait) {
2497 if (err == 0)
2498 return 0;
2499 goto nla_put_failure;
2500 } else {
2501 if (err == -EMSGSIZE)
2502 goto nla_put_failure;
2503 }
2504 }
2505 } else
2506 #endif
2507 if (nla_put_u32(skb, RTA_IIF, iif))
2508 goto nla_put_failure;
2509 } else if (dst) {
2510 struct in6_addr saddr_buf;
2511 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2512 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2513 goto nla_put_failure;
2514 }
2515
2516 if (rt->rt6i_prefsrc.plen) {
2517 struct in6_addr saddr_buf;
2518 saddr_buf = rt->rt6i_prefsrc.addr;
2519 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2520 goto nla_put_failure;
2521 }
2522
2523 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2524 goto nla_put_failure;
2525
2526 if (rt->rt6i_flags & RTF_GATEWAY) {
2527 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2528 goto nla_put_failure;
2529 }
2530
2531 if (rt->dst.dev &&
2532 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2533 goto nla_put_failure;
2534 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2535 goto nla_put_failure;
2536
2537 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2538
2539 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2540 goto nla_put_failure;
2541
2542 return nlmsg_end(skb, nlh);
2543
2544 nla_put_failure:
2545 nlmsg_cancel(skb, nlh);
2546 return -EMSGSIZE;
2547 }
2548
2549 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2550 {
2551 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2552 int prefix;
2553
2554 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2555 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2556 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2557 } else
2558 prefix = 0;
2559
2560 return rt6_fill_node(arg->net,
2561 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2562 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2563 prefix, 0, NLM_F_MULTI);
2564 }
2565
2566 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2567 {
2568 struct net *net = sock_net(in_skb->sk);
2569 struct nlattr *tb[RTA_MAX+1];
2570 struct rt6_info *rt;
2571 struct sk_buff *skb;
2572 struct rtmsg *rtm;
2573 struct flowi6 fl6;
2574 int err, iif = 0, oif = 0;
2575
2576 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2577 if (err < 0)
2578 goto errout;
2579
2580 err = -EINVAL;
2581 memset(&fl6, 0, sizeof(fl6));
2582
2583 if (tb[RTA_SRC]) {
2584 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2585 goto errout;
2586
2587 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2588 }
2589
2590 if (tb[RTA_DST]) {
2591 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2592 goto errout;
2593
2594 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2595 }
2596
2597 if (tb[RTA_IIF])
2598 iif = nla_get_u32(tb[RTA_IIF]);
2599
2600 if (tb[RTA_OIF])
2601 oif = nla_get_u32(tb[RTA_OIF]);
2602
2603 if (iif) {
2604 struct net_device *dev;
2605 int flags = 0;
2606
2607 dev = __dev_get_by_index(net, iif);
2608 if (!dev) {
2609 err = -ENODEV;
2610 goto errout;
2611 }
2612
2613 fl6.flowi6_iif = iif;
2614
2615 if (!ipv6_addr_any(&fl6.saddr))
2616 flags |= RT6_LOOKUP_F_HAS_SADDR;
2617
2618 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2619 flags);
2620 } else {
2621 fl6.flowi6_oif = oif;
2622
2623 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2624 }
2625
2626 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2627 if (!skb) {
2628 ip6_rt_put(rt);
2629 err = -ENOBUFS;
2630 goto errout;
2631 }
2632
2633 /* Reserve room for dummy headers, this skb can pass
2634 through good chunk of routing engine.
2635 */
2636 skb_reset_mac_header(skb);
2637 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2638
2639 skb_dst_set(skb, &rt->dst);
2640
2641 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2642 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2643 nlh->nlmsg_seq, 0, 0, 0);
2644 if (err < 0) {
2645 kfree_skb(skb);
2646 goto errout;
2647 }
2648
2649 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2650 errout:
2651 return err;
2652 }
2653
2654 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2655 {
2656 struct sk_buff *skb;
2657 struct net *net = info->nl_net;
2658 u32 seq;
2659 int err;
2660
2661 err = -ENOBUFS;
2662 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2663
2664 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2665 if (!skb)
2666 goto errout;
2667
2668 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2669 event, info->portid, seq, 0, 0, 0);
2670 if (err < 0) {
2671 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2672 WARN_ON(err == -EMSGSIZE);
2673 kfree_skb(skb);
2674 goto errout;
2675 }
2676 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2677 info->nlh, gfp_any());
2678 return;
2679 errout:
2680 if (err < 0)
2681 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2682 }
2683
2684 static int ip6_route_dev_notify(struct notifier_block *this,
2685 unsigned long event, void *data)
2686 {
2687 struct net_device *dev = (struct net_device *)data;
2688 struct net *net = dev_net(dev);
2689
2690 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2691 net->ipv6.ip6_null_entry->dst.dev = dev;
2692 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2693 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2694 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2695 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2696 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2697 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2698 #endif
2699 }
2700
2701 return NOTIFY_OK;
2702 }
2703
2704 /*
2705 * /proc
2706 */
2707
2708 #ifdef CONFIG_PROC_FS
2709
2710 struct rt6_proc_arg
2711 {
2712 char *buffer;
2713 int offset;
2714 int length;
2715 int skip;
2716 int len;
2717 };
2718
2719 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2720 {
2721 struct seq_file *m = p_arg;
2722
2723 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2724
2725 #ifdef CONFIG_IPV6_SUBTREES
2726 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2727 #else
2728 seq_puts(m, "00000000000000000000000000000000 00 ");
2729 #endif
2730 if (rt->rt6i_flags & RTF_GATEWAY) {
2731 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2732 } else {
2733 seq_puts(m, "00000000000000000000000000000000");
2734 }
2735 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2736 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2737 rt->dst.__use, rt->rt6i_flags,
2738 rt->dst.dev ? rt->dst.dev->name : "");
2739 return 0;
2740 }
2741
2742 static int ipv6_route_show(struct seq_file *m, void *v)
2743 {
2744 struct net *net = (struct net *)m->private;
2745 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2746 return 0;
2747 }
2748
2749 static int ipv6_route_open(struct inode *inode, struct file *file)
2750 {
2751 return single_open_net(inode, file, ipv6_route_show);
2752 }
2753
2754 static const struct file_operations ipv6_route_proc_fops = {
2755 .owner = THIS_MODULE,
2756 .open = ipv6_route_open,
2757 .read = seq_read,
2758 .llseek = seq_lseek,
2759 .release = single_release_net,
2760 };
2761
2762 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2763 {
2764 struct net *net = (struct net *)seq->private;
2765 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2766 net->ipv6.rt6_stats->fib_nodes,
2767 net->ipv6.rt6_stats->fib_route_nodes,
2768 net->ipv6.rt6_stats->fib_rt_alloc,
2769 net->ipv6.rt6_stats->fib_rt_entries,
2770 net->ipv6.rt6_stats->fib_rt_cache,
2771 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2772 net->ipv6.rt6_stats->fib_discarded_routes);
2773
2774 return 0;
2775 }
2776
2777 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2778 {
2779 return single_open_net(inode, file, rt6_stats_seq_show);
2780 }
2781
2782 static const struct file_operations rt6_stats_seq_fops = {
2783 .owner = THIS_MODULE,
2784 .open = rt6_stats_seq_open,
2785 .read = seq_read,
2786 .llseek = seq_lseek,
2787 .release = single_release_net,
2788 };
2789 #endif /* CONFIG_PROC_FS */
2790
2791 #ifdef CONFIG_SYSCTL
2792
2793 static
2794 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2795 void __user *buffer, size_t *lenp, loff_t *ppos)
2796 {
2797 struct net *net;
2798 int delay;
2799 if (!write)
2800 return -EINVAL;
2801
2802 net = (struct net *)ctl->extra1;
2803 delay = net->ipv6.sysctl.flush_delay;
2804 proc_dointvec(ctl, write, buffer, lenp, ppos);
2805 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2806 return 0;
2807 }
2808
2809 ctl_table ipv6_route_table_template[] = {
2810 {
2811 .procname = "flush",
2812 .data = &init_net.ipv6.sysctl.flush_delay,
2813 .maxlen = sizeof(int),
2814 .mode = 0200,
2815 .proc_handler = ipv6_sysctl_rtcache_flush
2816 },
2817 {
2818 .procname = "gc_thresh",
2819 .data = &ip6_dst_ops_template.gc_thresh,
2820 .maxlen = sizeof(int),
2821 .mode = 0644,
2822 .proc_handler = proc_dointvec,
2823 },
2824 {
2825 .procname = "max_size",
2826 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2827 .maxlen = sizeof(int),
2828 .mode = 0644,
2829 .proc_handler = proc_dointvec,
2830 },
2831 {
2832 .procname = "gc_min_interval",
2833 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2834 .maxlen = sizeof(int),
2835 .mode = 0644,
2836 .proc_handler = proc_dointvec_jiffies,
2837 },
2838 {
2839 .procname = "gc_timeout",
2840 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2841 .maxlen = sizeof(int),
2842 .mode = 0644,
2843 .proc_handler = proc_dointvec_jiffies,
2844 },
2845 {
2846 .procname = "gc_interval",
2847 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2848 .maxlen = sizeof(int),
2849 .mode = 0644,
2850 .proc_handler = proc_dointvec_jiffies,
2851 },
2852 {
2853 .procname = "gc_elasticity",
2854 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2855 .maxlen = sizeof(int),
2856 .mode = 0644,
2857 .proc_handler = proc_dointvec,
2858 },
2859 {
2860 .procname = "mtu_expires",
2861 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2862 .maxlen = sizeof(int),
2863 .mode = 0644,
2864 .proc_handler = proc_dointvec_jiffies,
2865 },
2866 {
2867 .procname = "min_adv_mss",
2868 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2869 .maxlen = sizeof(int),
2870 .mode = 0644,
2871 .proc_handler = proc_dointvec,
2872 },
2873 {
2874 .procname = "gc_min_interval_ms",
2875 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2876 .maxlen = sizeof(int),
2877 .mode = 0644,
2878 .proc_handler = proc_dointvec_ms_jiffies,
2879 },
2880 { }
2881 };
2882
2883 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2884 {
2885 struct ctl_table *table;
2886
2887 table = kmemdup(ipv6_route_table_template,
2888 sizeof(ipv6_route_table_template),
2889 GFP_KERNEL);
2890
2891 if (table) {
2892 table[0].data = &net->ipv6.sysctl.flush_delay;
2893 table[0].extra1 = net;
2894 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2895 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2896 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2897 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2898 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2899 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2900 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2901 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2902 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2903
2904 /* Don't export sysctls to unprivileged users */
2905 if (net->user_ns != &init_user_ns)
2906 table[0].procname = NULL;
2907 }
2908
2909 return table;
2910 }
2911 #endif
2912
2913 static int __net_init ip6_route_net_init(struct net *net)
2914 {
2915 int ret = -ENOMEM;
2916
2917 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2918 sizeof(net->ipv6.ip6_dst_ops));
2919
2920 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2921 goto out_ip6_dst_ops;
2922
2923 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2924 sizeof(*net->ipv6.ip6_null_entry),
2925 GFP_KERNEL);
2926 if (!net->ipv6.ip6_null_entry)
2927 goto out_ip6_dst_entries;
2928 net->ipv6.ip6_null_entry->dst.path =
2929 (struct dst_entry *)net->ipv6.ip6_null_entry;
2930 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2931 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2932 ip6_template_metrics, true);
2933
2934 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2935 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2936 sizeof(*net->ipv6.ip6_prohibit_entry),
2937 GFP_KERNEL);
2938 if (!net->ipv6.ip6_prohibit_entry)
2939 goto out_ip6_null_entry;
2940 net->ipv6.ip6_prohibit_entry->dst.path =
2941 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2942 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2943 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2944 ip6_template_metrics, true);
2945
2946 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2947 sizeof(*net->ipv6.ip6_blk_hole_entry),
2948 GFP_KERNEL);
2949 if (!net->ipv6.ip6_blk_hole_entry)
2950 goto out_ip6_prohibit_entry;
2951 net->ipv6.ip6_blk_hole_entry->dst.path =
2952 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2953 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2954 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2955 ip6_template_metrics, true);
2956 #endif
2957
2958 net->ipv6.sysctl.flush_delay = 0;
2959 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2960 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2961 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2962 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2963 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2964 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2965 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2966
2967 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2968
2969 ret = 0;
2970 out:
2971 return ret;
2972
2973 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2974 out_ip6_prohibit_entry:
2975 kfree(net->ipv6.ip6_prohibit_entry);
2976 out_ip6_null_entry:
2977 kfree(net->ipv6.ip6_null_entry);
2978 #endif
2979 out_ip6_dst_entries:
2980 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2981 out_ip6_dst_ops:
2982 goto out;
2983 }
2984
2985 static void __net_exit ip6_route_net_exit(struct net *net)
2986 {
2987 kfree(net->ipv6.ip6_null_entry);
2988 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2989 kfree(net->ipv6.ip6_prohibit_entry);
2990 kfree(net->ipv6.ip6_blk_hole_entry);
2991 #endif
2992 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2993 }
2994
2995 static int __net_init ip6_route_net_init_late(struct net *net)
2996 {
2997 #ifdef CONFIG_PROC_FS
2998 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2999 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3000 #endif
3001 return 0;
3002 }
3003
3004 static void __net_exit ip6_route_net_exit_late(struct net *net)
3005 {
3006 #ifdef CONFIG_PROC_FS
3007 remove_proc_entry("ipv6_route", net->proc_net);
3008 remove_proc_entry("rt6_stats", net->proc_net);
3009 #endif
3010 }
3011
3012 static struct pernet_operations ip6_route_net_ops = {
3013 .init = ip6_route_net_init,
3014 .exit = ip6_route_net_exit,
3015 };
3016
3017 static int __net_init ipv6_inetpeer_init(struct net *net)
3018 {
3019 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3020
3021 if (!bp)
3022 return -ENOMEM;
3023 inet_peer_base_init(bp);
3024 net->ipv6.peers = bp;
3025 return 0;
3026 }
3027
3028 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3029 {
3030 struct inet_peer_base *bp = net->ipv6.peers;
3031
3032 net->ipv6.peers = NULL;
3033 inetpeer_invalidate_tree(bp);
3034 kfree(bp);
3035 }
3036
3037 static struct pernet_operations ipv6_inetpeer_ops = {
3038 .init = ipv6_inetpeer_init,
3039 .exit = ipv6_inetpeer_exit,
3040 };
3041
3042 static struct pernet_operations ip6_route_net_late_ops = {
3043 .init = ip6_route_net_init_late,
3044 .exit = ip6_route_net_exit_late,
3045 };
3046
3047 static struct notifier_block ip6_route_dev_notifier = {
3048 .notifier_call = ip6_route_dev_notify,
3049 .priority = 0,
3050 };
3051
3052 int __init ip6_route_init(void)
3053 {
3054 int ret;
3055
3056 ret = -ENOMEM;
3057 ip6_dst_ops_template.kmem_cachep =
3058 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3059 SLAB_HWCACHE_ALIGN, NULL);
3060 if (!ip6_dst_ops_template.kmem_cachep)
3061 goto out;
3062
3063 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3064 if (ret)
3065 goto out_kmem_cache;
3066
3067 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3068 if (ret)
3069 goto out_dst_entries;
3070
3071 ret = register_pernet_subsys(&ip6_route_net_ops);
3072 if (ret)
3073 goto out_register_inetpeer;
3074
3075 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3076
3077 /* Registering of the loopback is done before this portion of code,
3078 * the loopback reference in rt6_info will not be taken, do it
3079 * manually for init_net */
3080 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3081 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3082 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3083 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3084 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3085 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3086 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3087 #endif
3088 ret = fib6_init();
3089 if (ret)
3090 goto out_register_subsys;
3091
3092 ret = xfrm6_init();
3093 if (ret)
3094 goto out_fib6_init;
3095
3096 ret = fib6_rules_init();
3097 if (ret)
3098 goto xfrm6_init;
3099
3100 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3101 if (ret)
3102 goto fib6_rules_init;
3103
3104 ret = -ENOBUFS;
3105 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3106 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3107 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3108 goto out_register_late_subsys;
3109
3110 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3111 if (ret)
3112 goto out_register_late_subsys;
3113
3114 out:
3115 return ret;
3116
3117 out_register_late_subsys:
3118 unregister_pernet_subsys(&ip6_route_net_late_ops);
3119 fib6_rules_init:
3120 fib6_rules_cleanup();
3121 xfrm6_init:
3122 xfrm6_fini();
3123 out_fib6_init:
3124 fib6_gc_cleanup();
3125 out_register_subsys:
3126 unregister_pernet_subsys(&ip6_route_net_ops);
3127 out_register_inetpeer:
3128 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3129 out_dst_entries:
3130 dst_entries_destroy(&ip6_dst_blackhole_ops);
3131 out_kmem_cache:
3132 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3133 goto out;
3134 }
3135
3136 void ip6_route_cleanup(void)
3137 {
3138 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3139 unregister_pernet_subsys(&ip6_route_net_late_ops);
3140 fib6_rules_cleanup();
3141 xfrm6_fini();
3142 fib6_gc_cleanup();
3143 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3144 unregister_pernet_subsys(&ip6_route_net_ops);
3145 dst_entries_destroy(&ip6_dst_blackhole_ops);
3146 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3147 }
This page took 0.134751 seconds and 4 git commands to generate.