Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
85
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
90 unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
94 #endif
95
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
105 peer = rt6_get_peer_create(rt);
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
129 {
130 struct in6_addr *p = &rt->rt6i_gateway;
131
132 if (!ipv6_addr_any(p))
133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
136 return daddr;
137 }
138
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
142 {
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
146 daddr = choose_neigh_daddr(rt, skb, daddr);
147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
161 rt->n = n;
162
163 return 0;
164 }
165
166 static struct dst_ops ip6_dst_ops_template = {
167 .family = AF_INET6,
168 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
172 .default_advmss = ip6_default_advmss,
173 .mtu = ip6_mtu,
174 .cow_metrics = ipv6_cow_metrics,
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
180 .redirect = rt6_do_redirect,
181 .local_out = __ip6_local_out,
182 .neigh_lookup = ip6_neigh_lookup,
183 };
184
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
190 }
191
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
194 {
195 }
196
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
199 {
200 }
201
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204 {
205 return NULL;
206 }
207
208 static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
210 .protocol = cpu_to_be16(ETH_P_IPV6),
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
213 .mtu = ip6_blackhole_mtu,
214 .default_advmss = ip6_default_advmss,
215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
216 .redirect = ip6_rt_blackhole_redirect,
217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
218 .neigh_lookup = ip6_neigh_lookup,
219 };
220
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223 };
224
225 static const struct rt6_info ip6_null_entry_template = {
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = DST_OBSOLETE_FORCE_CHK,
230 .error = -ENETUNREACH,
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
235 .rt6i_protocol = RTPROT_KERNEL,
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238 };
239
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245 static const struct rt6_info ip6_prohibit_entry_template = {
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = DST_OBSOLETE_FORCE_CHK,
250 .error = -EACCES,
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
255 .rt6i_protocol = RTPROT_KERNEL,
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258 };
259
260 static const struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = DST_OBSOLETE_FORCE_CHK,
265 .error = -EINVAL,
266 .input = dst_discard,
267 .output = dst_discard,
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
270 .rt6i_protocol = RTPROT_KERNEL,
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273 };
274
275 #endif
276
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 struct net_device *dev,
280 int flags,
281 struct fib6_table *table)
282 {
283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_FORCE_CHK, flags);
285
286 if (rt) {
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 rt->rt6i_genid = rt_genid(net);
292 }
293 return rt;
294 }
295
296 static void ip6_dst_destroy(struct dst_entry *dst)
297 {
298 struct rt6_info *rt = (struct rt6_info *)dst;
299 struct inet6_dev *idev = rt->rt6i_idev;
300
301 if (rt->n)
302 neigh_release(rt->n);
303
304 if (!(rt->dst.flags & DST_HOST))
305 dst_destroy_metrics_generic(dst);
306
307 if (idev) {
308 rt->rt6i_idev = NULL;
309 in6_dev_put(idev);
310 }
311
312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 dst_release(dst->from);
314
315 if (rt6_has_peer(rt)) {
316 struct inet_peer *peer = rt6_peer_ptr(rt);
317 inet_putpeer(peer);
318 }
319 }
320
321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323 static u32 rt6_peer_genid(void)
324 {
325 return atomic_read(&__rt6_peer_genid);
326 }
327
328 void rt6_bind_peer(struct rt6_info *rt, int create)
329 {
330 struct inet_peer_base *base;
331 struct inet_peer *peer;
332
333 base = inetpeer_base_ptr(rt->_rt6i_peer);
334 if (!base)
335 return;
336
337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338 if (peer) {
339 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 }
344 }
345
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 int how)
348 {
349 struct rt6_info *rt = (struct rt6_info *)dst;
350 struct inet6_dev *idev = rt->rt6i_idev;
351 struct net_device *loopback_dev =
352 dev_net(dev)->loopback_dev;
353
354 if (dev != loopback_dev) {
355 if (idev && idev->dev == dev) {
356 struct inet6_dev *loopback_idev =
357 in6_dev_get(loopback_dev);
358 if (loopback_idev) {
359 rt->rt6i_idev = loopback_idev;
360 in6_dev_put(idev);
361 }
362 }
363 if (rt->n && rt->n->dev == dev) {
364 rt->n->dev = loopback_dev;
365 dev_hold(loopback_dev);
366 dev_put(dev);
367 }
368 }
369 }
370
371 static bool rt6_check_expired(const struct rt6_info *rt)
372 {
373 if (rt->rt6i_flags & RTF_EXPIRES) {
374 if (time_after(jiffies, rt->dst.expires))
375 return true;
376 } else if (rt->dst.from) {
377 return rt6_check_expired((struct rt6_info *) rt->dst.from);
378 }
379 return false;
380 }
381
382 static bool rt6_need_strict(const struct in6_addr *daddr)
383 {
384 return ipv6_addr_type(daddr) &
385 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
386 }
387
388 /*
389 * Route lookup. Any table->tb6_lock is implied.
390 */
391
392 static inline struct rt6_info *rt6_device_match(struct net *net,
393 struct rt6_info *rt,
394 const struct in6_addr *saddr,
395 int oif,
396 int flags)
397 {
398 struct rt6_info *local = NULL;
399 struct rt6_info *sprt;
400
401 if (!oif && ipv6_addr_any(saddr))
402 goto out;
403
404 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
405 struct net_device *dev = sprt->dst.dev;
406
407 if (oif) {
408 if (dev->ifindex == oif)
409 return sprt;
410 if (dev->flags & IFF_LOOPBACK) {
411 if (!sprt->rt6i_idev ||
412 sprt->rt6i_idev->dev->ifindex != oif) {
413 if (flags & RT6_LOOKUP_F_IFACE && oif)
414 continue;
415 if (local && (!oif ||
416 local->rt6i_idev->dev->ifindex == oif))
417 continue;
418 }
419 local = sprt;
420 }
421 } else {
422 if (ipv6_chk_addr(net, saddr, dev,
423 flags & RT6_LOOKUP_F_IFACE))
424 return sprt;
425 }
426 }
427
428 if (oif) {
429 if (local)
430 return local;
431
432 if (flags & RT6_LOOKUP_F_IFACE)
433 return net->ipv6.ip6_null_entry;
434 }
435 out:
436 return rt;
437 }
438
439 #ifdef CONFIG_IPV6_ROUTER_PREF
440 static void rt6_probe(struct rt6_info *rt)
441 {
442 struct neighbour *neigh;
443 /*
444 * Okay, this does not seem to be appropriate
445 * for now, however, we need to check if it
446 * is really so; aka Router Reachability Probing.
447 *
448 * Router Reachability Probe MUST be rate-limited
449 * to no more than one per minute.
450 */
451 neigh = rt ? rt->n : NULL;
452 if (!neigh || (neigh->nud_state & NUD_VALID))
453 return;
454 read_lock_bh(&neigh->lock);
455 if (!(neigh->nud_state & NUD_VALID) &&
456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
457 struct in6_addr mcaddr;
458 struct in6_addr *target;
459
460 neigh->updated = jiffies;
461 read_unlock_bh(&neigh->lock);
462
463 target = (struct in6_addr *)&neigh->primary_key;
464 addrconf_addr_solict_mult(target, &mcaddr);
465 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466 } else {
467 read_unlock_bh(&neigh->lock);
468 }
469 }
470 #else
471 static inline void rt6_probe(struct rt6_info *rt)
472 {
473 }
474 #endif
475
476 /*
477 * Default Router Selection (RFC 2461 6.3.6)
478 */
479 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
480 {
481 struct net_device *dev = rt->dst.dev;
482 if (!oif || dev->ifindex == oif)
483 return 2;
484 if ((dev->flags & IFF_LOOPBACK) &&
485 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486 return 1;
487 return 0;
488 }
489
490 static inline int rt6_check_neigh(struct rt6_info *rt)
491 {
492 struct neighbour *neigh;
493 int m;
494
495 neigh = rt->n;
496 if (rt->rt6i_flags & RTF_NONEXTHOP ||
497 !(rt->rt6i_flags & RTF_GATEWAY))
498 m = 1;
499 else if (neigh) {
500 read_lock_bh(&neigh->lock);
501 if (neigh->nud_state & NUD_VALID)
502 m = 2;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 else if (neigh->nud_state & NUD_FAILED)
505 m = 0;
506 #endif
507 else
508 m = 1;
509 read_unlock_bh(&neigh->lock);
510 } else
511 m = 0;
512 return m;
513 }
514
515 static int rt6_score_route(struct rt6_info *rt, int oif,
516 int strict)
517 {
518 int m, n;
519
520 m = rt6_check_dev(rt, oif);
521 if (!m && (strict & RT6_LOOKUP_F_IFACE))
522 return -1;
523 #ifdef CONFIG_IPV6_ROUTER_PREF
524 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
525 #endif
526 n = rt6_check_neigh(rt);
527 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
528 return -1;
529 return m;
530 }
531
532 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533 int *mpri, struct rt6_info *match)
534 {
535 int m;
536
537 if (rt6_check_expired(rt))
538 goto out;
539
540 m = rt6_score_route(rt, oif, strict);
541 if (m < 0)
542 goto out;
543
544 if (m > *mpri) {
545 if (strict & RT6_LOOKUP_F_REACHABLE)
546 rt6_probe(match);
547 *mpri = m;
548 match = rt;
549 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
550 rt6_probe(rt);
551 }
552
553 out:
554 return match;
555 }
556
557 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558 struct rt6_info *rr_head,
559 u32 metric, int oif, int strict)
560 {
561 struct rt6_info *rt, *match;
562 int mpri = -1;
563
564 match = NULL;
565 for (rt = rr_head; rt && rt->rt6i_metric == metric;
566 rt = rt->dst.rt6_next)
567 match = find_match(rt, oif, strict, &mpri, match);
568 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
569 rt = rt->dst.rt6_next)
570 match = find_match(rt, oif, strict, &mpri, match);
571
572 return match;
573 }
574
575 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576 {
577 struct rt6_info *match, *rt0;
578 struct net *net;
579
580 rt0 = fn->rr_ptr;
581 if (!rt0)
582 fn->rr_ptr = rt0 = fn->leaf;
583
584 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
585
586 if (!match &&
587 (strict & RT6_LOOKUP_F_REACHABLE)) {
588 struct rt6_info *next = rt0->dst.rt6_next;
589
590 /* no entries matched; do round-robin */
591 if (!next || next->rt6i_metric != rt0->rt6i_metric)
592 next = fn->leaf;
593
594 if (next != rt0)
595 fn->rr_ptr = next;
596 }
597
598 net = dev_net(rt0->dst.dev);
599 return match ? match : net->ipv6.ip6_null_entry;
600 }
601
602 #ifdef CONFIG_IPV6_ROUTE_INFO
603 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
604 const struct in6_addr *gwaddr)
605 {
606 struct net *net = dev_net(dev);
607 struct route_info *rinfo = (struct route_info *) opt;
608 struct in6_addr prefix_buf, *prefix;
609 unsigned int pref;
610 unsigned long lifetime;
611 struct rt6_info *rt;
612
613 if (len < sizeof(struct route_info)) {
614 return -EINVAL;
615 }
616
617 /* Sanity check for prefix_len and length */
618 if (rinfo->length > 3) {
619 return -EINVAL;
620 } else if (rinfo->prefix_len > 128) {
621 return -EINVAL;
622 } else if (rinfo->prefix_len > 64) {
623 if (rinfo->length < 2) {
624 return -EINVAL;
625 }
626 } else if (rinfo->prefix_len > 0) {
627 if (rinfo->length < 1) {
628 return -EINVAL;
629 }
630 }
631
632 pref = rinfo->route_pref;
633 if (pref == ICMPV6_ROUTER_PREF_INVALID)
634 return -EINVAL;
635
636 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
637
638 if (rinfo->length == 3)
639 prefix = (struct in6_addr *)rinfo->prefix;
640 else {
641 /* this function is safe */
642 ipv6_addr_prefix(&prefix_buf,
643 (struct in6_addr *)rinfo->prefix,
644 rinfo->prefix_len);
645 prefix = &prefix_buf;
646 }
647
648 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649 dev->ifindex);
650
651 if (rt && !lifetime) {
652 ip6_del_rt(rt);
653 rt = NULL;
654 }
655
656 if (!rt && lifetime)
657 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
658 pref);
659 else if (rt)
660 rt->rt6i_flags = RTF_ROUTEINFO |
661 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662
663 if (rt) {
664 if (!addrconf_finite_timeout(lifetime))
665 rt6_clean_expires(rt);
666 else
667 rt6_set_expires(rt, jiffies + HZ * lifetime);
668
669 dst_release(&rt->dst);
670 }
671 return 0;
672 }
673 #endif
674
675 #define BACKTRACK(__net, saddr) \
676 do { \
677 if (rt == __net->ipv6.ip6_null_entry) { \
678 struct fib6_node *pn; \
679 while (1) { \
680 if (fn->fn_flags & RTN_TL_ROOT) \
681 goto out; \
682 pn = fn->parent; \
683 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
684 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
685 else \
686 fn = pn; \
687 if (fn->fn_flags & RTN_RTINFO) \
688 goto restart; \
689 } \
690 } \
691 } while (0)
692
693 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694 struct fib6_table *table,
695 struct flowi6 *fl6, int flags)
696 {
697 struct fib6_node *fn;
698 struct rt6_info *rt;
699
700 read_lock_bh(&table->tb6_lock);
701 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
702 restart:
703 rt = fn->leaf;
704 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705 BACKTRACK(net, &fl6->saddr);
706 out:
707 dst_use(&rt->dst, jiffies);
708 read_unlock_bh(&table->tb6_lock);
709 return rt;
710
711 }
712
713 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714 int flags)
715 {
716 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717 }
718 EXPORT_SYMBOL_GPL(ip6_route_lookup);
719
720 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721 const struct in6_addr *saddr, int oif, int strict)
722 {
723 struct flowi6 fl6 = {
724 .flowi6_oif = oif,
725 .daddr = *daddr,
726 };
727 struct dst_entry *dst;
728 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
729
730 if (saddr) {
731 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
732 flags |= RT6_LOOKUP_F_HAS_SADDR;
733 }
734
735 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
736 if (dst->error == 0)
737 return (struct rt6_info *) dst;
738
739 dst_release(dst);
740
741 return NULL;
742 }
743
744 EXPORT_SYMBOL(rt6_lookup);
745
746 /* ip6_ins_rt is called with FREE table->tb6_lock.
747 It takes new route entry, the addition fails by any reason the
748 route is freed. In any case, if caller does not hold it, it may
749 be destroyed.
750 */
751
752 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
753 {
754 int err;
755 struct fib6_table *table;
756
757 table = rt->rt6i_table;
758 write_lock_bh(&table->tb6_lock);
759 err = fib6_add(&table->tb6_root, rt, info);
760 write_unlock_bh(&table->tb6_lock);
761
762 return err;
763 }
764
765 int ip6_ins_rt(struct rt6_info *rt)
766 {
767 struct nl_info info = {
768 .nl_net = dev_net(rt->dst.dev),
769 };
770 return __ip6_ins_rt(rt, &info);
771 }
772
773 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
774 const struct in6_addr *daddr,
775 const struct in6_addr *saddr)
776 {
777 struct rt6_info *rt;
778
779 /*
780 * Clone the route.
781 */
782
783 rt = ip6_rt_copy(ort, daddr);
784
785 if (rt) {
786 int attempts = !in_softirq();
787
788 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
789 if (ort->rt6i_dst.plen != 128 &&
790 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
791 rt->rt6i_flags |= RTF_ANYCAST;
792 rt->rt6i_gateway = *daddr;
793 }
794
795 rt->rt6i_flags |= RTF_CACHE;
796
797 #ifdef CONFIG_IPV6_SUBTREES
798 if (rt->rt6i_src.plen && saddr) {
799 rt->rt6i_src.addr = *saddr;
800 rt->rt6i_src.plen = 128;
801 }
802 #endif
803
804 retry:
805 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
806 struct net *net = dev_net(rt->dst.dev);
807 int saved_rt_min_interval =
808 net->ipv6.sysctl.ip6_rt_gc_min_interval;
809 int saved_rt_elasticity =
810 net->ipv6.sysctl.ip6_rt_gc_elasticity;
811
812 if (attempts-- > 0) {
813 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815
816 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
817
818 net->ipv6.sysctl.ip6_rt_gc_elasticity =
819 saved_rt_elasticity;
820 net->ipv6.sysctl.ip6_rt_gc_min_interval =
821 saved_rt_min_interval;
822 goto retry;
823 }
824
825 net_warn_ratelimited("Neighbour table overflow\n");
826 dst_free(&rt->dst);
827 return NULL;
828 }
829 }
830
831 return rt;
832 }
833
834 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835 const struct in6_addr *daddr)
836 {
837 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838
839 if (rt) {
840 rt->rt6i_flags |= RTF_CACHE;
841 rt->n = neigh_clone(ort->n);
842 }
843 return rt;
844 }
845
846 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
847 struct flowi6 *fl6, int flags)
848 {
849 struct fib6_node *fn;
850 struct rt6_info *rt, *nrt;
851 int strict = 0;
852 int attempts = 3;
853 int err;
854 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
855
856 strict |= flags & RT6_LOOKUP_F_IFACE;
857
858 relookup:
859 read_lock_bh(&table->tb6_lock);
860
861 restart_2:
862 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
863
864 restart:
865 rt = rt6_select(fn, oif, strict | reachable);
866
867 BACKTRACK(net, &fl6->saddr);
868 if (rt == net->ipv6.ip6_null_entry ||
869 rt->rt6i_flags & RTF_CACHE)
870 goto out;
871
872 dst_hold(&rt->dst);
873 read_unlock_bh(&table->tb6_lock);
874
875 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
876 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
877 else if (!(rt->dst.flags & DST_HOST))
878 nrt = rt6_alloc_clone(rt, &fl6->daddr);
879 else
880 goto out2;
881
882 dst_release(&rt->dst);
883 rt = nrt ? : net->ipv6.ip6_null_entry;
884
885 dst_hold(&rt->dst);
886 if (nrt) {
887 err = ip6_ins_rt(nrt);
888 if (!err)
889 goto out2;
890 }
891
892 if (--attempts <= 0)
893 goto out2;
894
895 /*
896 * Race condition! In the gap, when table->tb6_lock was
897 * released someone could insert this route. Relookup.
898 */
899 dst_release(&rt->dst);
900 goto relookup;
901
902 out:
903 if (reachable) {
904 reachable = 0;
905 goto restart_2;
906 }
907 dst_hold(&rt->dst);
908 read_unlock_bh(&table->tb6_lock);
909 out2:
910 rt->dst.lastuse = jiffies;
911 rt->dst.__use++;
912
913 return rt;
914 }
915
916 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
917 struct flowi6 *fl6, int flags)
918 {
919 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920 }
921
922 static struct dst_entry *ip6_route_input_lookup(struct net *net,
923 struct net_device *dev,
924 struct flowi6 *fl6, int flags)
925 {
926 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927 flags |= RT6_LOOKUP_F_IFACE;
928
929 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930 }
931
932 void ip6_route_input(struct sk_buff *skb)
933 {
934 const struct ipv6hdr *iph = ipv6_hdr(skb);
935 struct net *net = dev_net(skb->dev);
936 int flags = RT6_LOOKUP_F_HAS_SADDR;
937 struct flowi6 fl6 = {
938 .flowi6_iif = skb->dev->ifindex,
939 .daddr = iph->daddr,
940 .saddr = iph->saddr,
941 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
942 .flowi6_mark = skb->mark,
943 .flowi6_proto = iph->nexthdr,
944 };
945
946 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947 }
948
949 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
950 struct flowi6 *fl6, int flags)
951 {
952 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953 }
954
955 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
956 struct flowi6 *fl6)
957 {
958 int flags = 0;
959
960 fl6->flowi6_iif = LOOPBACK_IFINDEX;
961
962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
963 flags |= RT6_LOOKUP_F_IFACE;
964
965 if (!ipv6_addr_any(&fl6->saddr))
966 flags |= RT6_LOOKUP_F_HAS_SADDR;
967 else if (sk)
968 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
969
970 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
971 }
972
973 EXPORT_SYMBOL(ip6_route_output);
974
975 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
976 {
977 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
978 struct dst_entry *new = NULL;
979
980 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
981 if (rt) {
982 new = &rt->dst;
983
984 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985 rt6_init_peer(rt, net->ipv6.peers);
986
987 new->__use = 1;
988 new->input = dst_discard;
989 new->output = dst_discard;
990
991 if (dst_metrics_read_only(&ort->dst))
992 new->_metrics = ort->dst._metrics;
993 else
994 dst_copy_metrics(new, &ort->dst);
995 rt->rt6i_idev = ort->rt6i_idev;
996 if (rt->rt6i_idev)
997 in6_dev_hold(rt->rt6i_idev);
998
999 rt->rt6i_gateway = ort->rt6i_gateway;
1000 rt->rt6i_flags = ort->rt6i_flags;
1001 rt6_clean_expires(rt);
1002 rt->rt6i_metric = 0;
1003
1004 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005 #ifdef CONFIG_IPV6_SUBTREES
1006 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007 #endif
1008
1009 dst_free(new);
1010 }
1011
1012 dst_release(dst_orig);
1013 return new ? new : ERR_PTR(-ENOMEM);
1014 }
1015
1016 /*
1017 * Destination cache support functions
1018 */
1019
1020 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021 {
1022 struct rt6_info *rt;
1023
1024 rt = (struct rt6_info *) dst;
1025
1026 /* All IPV6 dsts are created with ->obsolete set to the value
1027 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028 * into this function always.
1029 */
1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 return NULL;
1032
1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035 if (!rt6_has_peer(rt))
1036 rt6_bind_peer(rt, 0);
1037 rt->rt6i_peer_genid = rt6_peer_genid();
1038 }
1039 return dst;
1040 }
1041 return NULL;
1042 }
1043
1044 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045 {
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047
1048 if (rt) {
1049 if (rt->rt6i_flags & RTF_CACHE) {
1050 if (rt6_check_expired(rt)) {
1051 ip6_del_rt(rt);
1052 dst = NULL;
1053 }
1054 } else {
1055 dst_release(dst);
1056 dst = NULL;
1057 }
1058 }
1059 return dst;
1060 }
1061
1062 static void ip6_link_failure(struct sk_buff *skb)
1063 {
1064 struct rt6_info *rt;
1065
1066 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1067
1068 rt = (struct rt6_info *) skb_dst(skb);
1069 if (rt) {
1070 if (rt->rt6i_flags & RTF_CACHE)
1071 rt6_update_expires(rt, 0);
1072 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1073 rt->rt6i_node->fn_sernum = -1;
1074 }
1075 }
1076
1077 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078 struct sk_buff *skb, u32 mtu)
1079 {
1080 struct rt6_info *rt6 = (struct rt6_info*)dst;
1081
1082 dst_confirm(dst);
1083 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1084 struct net *net = dev_net(dst->dev);
1085
1086 rt6->rt6i_flags |= RTF_MODIFIED;
1087 if (mtu < IPV6_MIN_MTU) {
1088 u32 features = dst_metric(dst, RTAX_FEATURES);
1089 mtu = IPV6_MIN_MTU;
1090 features |= RTAX_FEATURE_ALLFRAG;
1091 dst_metric_set(dst, RTAX_FEATURES, features);
1092 }
1093 dst_metric_set(dst, RTAX_MTU, mtu);
1094 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1095 }
1096 }
1097
1098 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099 int oif, u32 mark)
1100 {
1101 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102 struct dst_entry *dst;
1103 struct flowi6 fl6;
1104
1105 memset(&fl6, 0, sizeof(fl6));
1106 fl6.flowi6_oif = oif;
1107 fl6.flowi6_mark = mark;
1108 fl6.flowi6_flags = 0;
1109 fl6.daddr = iph->daddr;
1110 fl6.saddr = iph->saddr;
1111 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112
1113 dst = ip6_route_output(net, NULL, &fl6);
1114 if (!dst->error)
1115 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1116 dst_release(dst);
1117 }
1118 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119
1120 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121 {
1122 ip6_update_pmtu(skb, sock_net(sk), mtu,
1123 sk->sk_bound_dev_if, sk->sk_mark);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126
1127 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128 {
1129 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130 struct dst_entry *dst;
1131 struct flowi6 fl6;
1132
1133 memset(&fl6, 0, sizeof(fl6));
1134 fl6.flowi6_oif = oif;
1135 fl6.flowi6_mark = mark;
1136 fl6.flowi6_flags = 0;
1137 fl6.daddr = iph->daddr;
1138 fl6.saddr = iph->saddr;
1139 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140
1141 dst = ip6_route_output(net, NULL, &fl6);
1142 if (!dst->error)
1143 rt6_do_redirect(dst, NULL, skb);
1144 dst_release(dst);
1145 }
1146 EXPORT_SYMBOL_GPL(ip6_redirect);
1147
1148 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149 {
1150 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151 }
1152 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153
1154 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1155 {
1156 struct net_device *dev = dst->dev;
1157 unsigned int mtu = dst_mtu(dst);
1158 struct net *net = dev_net(dev);
1159
1160 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161
1162 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1164
1165 /*
1166 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168 * IPV6_MAXPLEN is also valid and means: "any MSS,
1169 * rely only on pmtu discovery"
1170 */
1171 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172 mtu = IPV6_MAXPLEN;
1173 return mtu;
1174 }
1175
1176 static unsigned int ip6_mtu(const struct dst_entry *dst)
1177 {
1178 struct inet6_dev *idev;
1179 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180
1181 if (mtu)
1182 return mtu;
1183
1184 mtu = IPV6_MIN_MTU;
1185
1186 rcu_read_lock();
1187 idev = __in6_dev_get(dst->dev);
1188 if (idev)
1189 mtu = idev->cnf.mtu6;
1190 rcu_read_unlock();
1191
1192 return mtu;
1193 }
1194
1195 static struct dst_entry *icmp6_dst_gc_list;
1196 static DEFINE_SPINLOCK(icmp6_dst_lock);
1197
1198 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1199 struct neighbour *neigh,
1200 struct flowi6 *fl6)
1201 {
1202 struct dst_entry *dst;
1203 struct rt6_info *rt;
1204 struct inet6_dev *idev = in6_dev_get(dev);
1205 struct net *net = dev_net(dev);
1206
1207 if (unlikely(!idev))
1208 return ERR_PTR(-ENODEV);
1209
1210 rt = ip6_dst_alloc(net, dev, 0, NULL);
1211 if (unlikely(!rt)) {
1212 in6_dev_put(idev);
1213 dst = ERR_PTR(-ENOMEM);
1214 goto out;
1215 }
1216
1217 if (neigh)
1218 neigh_hold(neigh);
1219 else {
1220 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1221 if (IS_ERR(neigh)) {
1222 in6_dev_put(idev);
1223 dst_free(&rt->dst);
1224 return ERR_CAST(neigh);
1225 }
1226 }
1227
1228 rt->dst.flags |= DST_HOST;
1229 rt->dst.output = ip6_output;
1230 rt->n = neigh;
1231 atomic_set(&rt->dst.__refcnt, 1);
1232 rt->rt6i_dst.addr = fl6->daddr;
1233 rt->rt6i_dst.plen = 128;
1234 rt->rt6i_idev = idev;
1235 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1236
1237 spin_lock_bh(&icmp6_dst_lock);
1238 rt->dst.next = icmp6_dst_gc_list;
1239 icmp6_dst_gc_list = &rt->dst;
1240 spin_unlock_bh(&icmp6_dst_lock);
1241
1242 fib6_force_start_gc(net);
1243
1244 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245
1246 out:
1247 return dst;
1248 }
1249
1250 int icmp6_dst_gc(void)
1251 {
1252 struct dst_entry *dst, **pprev;
1253 int more = 0;
1254
1255 spin_lock_bh(&icmp6_dst_lock);
1256 pprev = &icmp6_dst_gc_list;
1257
1258 while ((dst = *pprev) != NULL) {
1259 if (!atomic_read(&dst->__refcnt)) {
1260 *pprev = dst->next;
1261 dst_free(dst);
1262 } else {
1263 pprev = &dst->next;
1264 ++more;
1265 }
1266 }
1267
1268 spin_unlock_bh(&icmp6_dst_lock);
1269
1270 return more;
1271 }
1272
1273 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274 void *arg)
1275 {
1276 struct dst_entry *dst, **pprev;
1277
1278 spin_lock_bh(&icmp6_dst_lock);
1279 pprev = &icmp6_dst_gc_list;
1280 while ((dst = *pprev) != NULL) {
1281 struct rt6_info *rt = (struct rt6_info *) dst;
1282 if (func(rt, arg)) {
1283 *pprev = dst->next;
1284 dst_free(dst);
1285 } else {
1286 pprev = &dst->next;
1287 }
1288 }
1289 spin_unlock_bh(&icmp6_dst_lock);
1290 }
1291
1292 static int ip6_dst_gc(struct dst_ops *ops)
1293 {
1294 unsigned long now = jiffies;
1295 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1296 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1301 int entries;
1302
1303 entries = dst_entries_get_fast(ops);
1304 if (time_after(rt_last_gc + rt_min_interval, now) &&
1305 entries <= rt_max_size)
1306 goto out;
1307
1308 net->ipv6.ip6_rt_gc_expire++;
1309 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310 net->ipv6.ip6_rt_last_gc = now;
1311 entries = dst_entries_get_slow(ops);
1312 if (entries < ops->gc_thresh)
1313 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1314 out:
1315 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1316 return entries > rt_max_size;
1317 }
1318
1319 /* Clean host part of a prefix. Not necessary in radix tree,
1320 but results in cleaner routing tables.
1321
1322 Remove it only when all the things will work!
1323 */
1324
1325 int ip6_dst_hoplimit(struct dst_entry *dst)
1326 {
1327 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1328 if (hoplimit == 0) {
1329 struct net_device *dev = dst->dev;
1330 struct inet6_dev *idev;
1331
1332 rcu_read_lock();
1333 idev = __in6_dev_get(dev);
1334 if (idev)
1335 hoplimit = idev->cnf.hop_limit;
1336 else
1337 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1338 rcu_read_unlock();
1339 }
1340 return hoplimit;
1341 }
1342 EXPORT_SYMBOL(ip6_dst_hoplimit);
1343
1344 /*
1345 *
1346 */
1347
1348 int ip6_route_add(struct fib6_config *cfg)
1349 {
1350 int err;
1351 struct net *net = cfg->fc_nlinfo.nl_net;
1352 struct rt6_info *rt = NULL;
1353 struct net_device *dev = NULL;
1354 struct inet6_dev *idev = NULL;
1355 struct fib6_table *table;
1356 int addr_type;
1357
1358 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1359 return -EINVAL;
1360 #ifndef CONFIG_IPV6_SUBTREES
1361 if (cfg->fc_src_len)
1362 return -EINVAL;
1363 #endif
1364 if (cfg->fc_ifindex) {
1365 err = -ENODEV;
1366 dev = dev_get_by_index(net, cfg->fc_ifindex);
1367 if (!dev)
1368 goto out;
1369 idev = in6_dev_get(dev);
1370 if (!idev)
1371 goto out;
1372 }
1373
1374 if (cfg->fc_metric == 0)
1375 cfg->fc_metric = IP6_RT_PRIO_USER;
1376
1377 err = -ENOBUFS;
1378 if (cfg->fc_nlinfo.nlh &&
1379 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1380 table = fib6_get_table(net, cfg->fc_table);
1381 if (!table) {
1382 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1383 table = fib6_new_table(net, cfg->fc_table);
1384 }
1385 } else {
1386 table = fib6_new_table(net, cfg->fc_table);
1387 }
1388
1389 if (!table)
1390 goto out;
1391
1392 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1393
1394 if (!rt) {
1395 err = -ENOMEM;
1396 goto out;
1397 }
1398
1399 if (cfg->fc_flags & RTF_EXPIRES)
1400 rt6_set_expires(rt, jiffies +
1401 clock_t_to_jiffies(cfg->fc_expires));
1402 else
1403 rt6_clean_expires(rt);
1404
1405 if (cfg->fc_protocol == RTPROT_UNSPEC)
1406 cfg->fc_protocol = RTPROT_BOOT;
1407 rt->rt6i_protocol = cfg->fc_protocol;
1408
1409 addr_type = ipv6_addr_type(&cfg->fc_dst);
1410
1411 if (addr_type & IPV6_ADDR_MULTICAST)
1412 rt->dst.input = ip6_mc_input;
1413 else if (cfg->fc_flags & RTF_LOCAL)
1414 rt->dst.input = ip6_input;
1415 else
1416 rt->dst.input = ip6_forward;
1417
1418 rt->dst.output = ip6_output;
1419
1420 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421 rt->rt6i_dst.plen = cfg->fc_dst_len;
1422 if (rt->rt6i_dst.plen == 128)
1423 rt->dst.flags |= DST_HOST;
1424
1425 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427 if (!metrics) {
1428 err = -ENOMEM;
1429 goto out;
1430 }
1431 dst_init_metrics(&rt->dst, metrics, 0);
1432 }
1433 #ifdef CONFIG_IPV6_SUBTREES
1434 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435 rt->rt6i_src.plen = cfg->fc_src_len;
1436 #endif
1437
1438 rt->rt6i_metric = cfg->fc_metric;
1439
1440 /* We cannot add true routes via loopback here,
1441 they would result in kernel looping; promote them to reject routes
1442 */
1443 if ((cfg->fc_flags & RTF_REJECT) ||
1444 (dev && (dev->flags & IFF_LOOPBACK) &&
1445 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446 !(cfg->fc_flags & RTF_LOCAL))) {
1447 /* hold loopback dev/idev if we haven't done so. */
1448 if (dev != net->loopback_dev) {
1449 if (dev) {
1450 dev_put(dev);
1451 in6_dev_put(idev);
1452 }
1453 dev = net->loopback_dev;
1454 dev_hold(dev);
1455 idev = in6_dev_get(dev);
1456 if (!idev) {
1457 err = -ENODEV;
1458 goto out;
1459 }
1460 }
1461 rt->dst.output = ip6_pkt_discard_out;
1462 rt->dst.input = ip6_pkt_discard;
1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464 switch (cfg->fc_type) {
1465 case RTN_BLACKHOLE:
1466 rt->dst.error = -EINVAL;
1467 break;
1468 case RTN_PROHIBIT:
1469 rt->dst.error = -EACCES;
1470 break;
1471 case RTN_THROW:
1472 rt->dst.error = -EAGAIN;
1473 break;
1474 default:
1475 rt->dst.error = -ENETUNREACH;
1476 break;
1477 }
1478 goto install_route;
1479 }
1480
1481 if (cfg->fc_flags & RTF_GATEWAY) {
1482 const struct in6_addr *gw_addr;
1483 int gwa_type;
1484
1485 gw_addr = &cfg->fc_gateway;
1486 rt->rt6i_gateway = *gw_addr;
1487 gwa_type = ipv6_addr_type(gw_addr);
1488
1489 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490 struct rt6_info *grt;
1491
1492 /* IPv6 strictly inhibits using not link-local
1493 addresses as nexthop address.
1494 Otherwise, router will not able to send redirects.
1495 It is very good, but in some (rare!) circumstances
1496 (SIT, PtP, NBMA NOARP links) it is handy to allow
1497 some exceptions. --ANK
1498 */
1499 err = -EINVAL;
1500 if (!(gwa_type & IPV6_ADDR_UNICAST))
1501 goto out;
1502
1503 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1504
1505 err = -EHOSTUNREACH;
1506 if (!grt)
1507 goto out;
1508 if (dev) {
1509 if (dev != grt->dst.dev) {
1510 dst_release(&grt->dst);
1511 goto out;
1512 }
1513 } else {
1514 dev = grt->dst.dev;
1515 idev = grt->rt6i_idev;
1516 dev_hold(dev);
1517 in6_dev_hold(grt->rt6i_idev);
1518 }
1519 if (!(grt->rt6i_flags & RTF_GATEWAY))
1520 err = 0;
1521 dst_release(&grt->dst);
1522
1523 if (err)
1524 goto out;
1525 }
1526 err = -EINVAL;
1527 if (!dev || (dev->flags & IFF_LOOPBACK))
1528 goto out;
1529 }
1530
1531 err = -ENODEV;
1532 if (!dev)
1533 goto out;
1534
1535 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537 err = -EINVAL;
1538 goto out;
1539 }
1540 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1541 rt->rt6i_prefsrc.plen = 128;
1542 } else
1543 rt->rt6i_prefsrc.plen = 0;
1544
1545 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1546 err = rt6_bind_neighbour(rt, dev);
1547 if (err)
1548 goto out;
1549 }
1550
1551 rt->rt6i_flags = cfg->fc_flags;
1552
1553 install_route:
1554 if (cfg->fc_mx) {
1555 struct nlattr *nla;
1556 int remaining;
1557
1558 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1559 int type = nla_type(nla);
1560
1561 if (type) {
1562 if (type > RTAX_MAX) {
1563 err = -EINVAL;
1564 goto out;
1565 }
1566
1567 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1568 }
1569 }
1570 }
1571
1572 rt->dst.dev = dev;
1573 rt->rt6i_idev = idev;
1574 rt->rt6i_table = table;
1575
1576 cfg->fc_nlinfo.nl_net = dev_net(dev);
1577
1578 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1579
1580 out:
1581 if (dev)
1582 dev_put(dev);
1583 if (idev)
1584 in6_dev_put(idev);
1585 if (rt)
1586 dst_free(&rt->dst);
1587 return err;
1588 }
1589
1590 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1591 {
1592 int err;
1593 struct fib6_table *table;
1594 struct net *net = dev_net(rt->dst.dev);
1595
1596 if (rt == net->ipv6.ip6_null_entry)
1597 return -ENOENT;
1598
1599 table = rt->rt6i_table;
1600 write_lock_bh(&table->tb6_lock);
1601
1602 err = fib6_del(rt, info);
1603 dst_release(&rt->dst);
1604
1605 write_unlock_bh(&table->tb6_lock);
1606
1607 return err;
1608 }
1609
1610 int ip6_del_rt(struct rt6_info *rt)
1611 {
1612 struct nl_info info = {
1613 .nl_net = dev_net(rt->dst.dev),
1614 };
1615 return __ip6_del_rt(rt, &info);
1616 }
1617
1618 static int ip6_route_del(struct fib6_config *cfg)
1619 {
1620 struct fib6_table *table;
1621 struct fib6_node *fn;
1622 struct rt6_info *rt;
1623 int err = -ESRCH;
1624
1625 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1626 if (!table)
1627 return err;
1628
1629 read_lock_bh(&table->tb6_lock);
1630
1631 fn = fib6_locate(&table->tb6_root,
1632 &cfg->fc_dst, cfg->fc_dst_len,
1633 &cfg->fc_src, cfg->fc_src_len);
1634
1635 if (fn) {
1636 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1637 if (cfg->fc_ifindex &&
1638 (!rt->dst.dev ||
1639 rt->dst.dev->ifindex != cfg->fc_ifindex))
1640 continue;
1641 if (cfg->fc_flags & RTF_GATEWAY &&
1642 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1643 continue;
1644 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1645 continue;
1646 dst_hold(&rt->dst);
1647 read_unlock_bh(&table->tb6_lock);
1648
1649 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1650 }
1651 }
1652 read_unlock_bh(&table->tb6_lock);
1653
1654 return err;
1655 }
1656
1657 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1658 {
1659 struct net *net = dev_net(skb->dev);
1660 struct netevent_redirect netevent;
1661 struct rt6_info *rt, *nrt = NULL;
1662 const struct in6_addr *target;
1663 struct ndisc_options ndopts;
1664 const struct in6_addr *dest;
1665 struct neighbour *old_neigh;
1666 struct inet6_dev *in6_dev;
1667 struct neighbour *neigh;
1668 struct icmp6hdr *icmph;
1669 int optlen, on_link;
1670 u8 *lladdr;
1671
1672 optlen = skb->tail - skb->transport_header;
1673 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1674
1675 if (optlen < 0) {
1676 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1677 return;
1678 }
1679
1680 icmph = icmp6_hdr(skb);
1681 target = (const struct in6_addr *) (icmph + 1);
1682 dest = target + 1;
1683
1684 if (ipv6_addr_is_multicast(dest)) {
1685 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1686 return;
1687 }
1688
1689 on_link = 0;
1690 if (ipv6_addr_equal(dest, target)) {
1691 on_link = 1;
1692 } else if (ipv6_addr_type(target) !=
1693 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1694 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1695 return;
1696 }
1697
1698 in6_dev = __in6_dev_get(skb->dev);
1699 if (!in6_dev)
1700 return;
1701 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1702 return;
1703
1704 /* RFC2461 8.1:
1705 * The IP source address of the Redirect MUST be the same as the current
1706 * first-hop router for the specified ICMP Destination Address.
1707 */
1708
1709 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1710 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1711 return;
1712 }
1713
1714 lladdr = NULL;
1715 if (ndopts.nd_opts_tgt_lladdr) {
1716 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1717 skb->dev);
1718 if (!lladdr) {
1719 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1720 return;
1721 }
1722 }
1723
1724 rt = (struct rt6_info *) dst;
1725 if (rt == net->ipv6.ip6_null_entry) {
1726 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1727 return;
1728 }
1729
1730 /* Redirect received -> path was valid.
1731 * Look, redirects are sent only in response to data packets,
1732 * so that this nexthop apparently is reachable. --ANK
1733 */
1734 dst_confirm(&rt->dst);
1735
1736 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1737 if (!neigh)
1738 return;
1739
1740 /* Duplicate redirect: silently ignore. */
1741 old_neigh = rt->n;
1742 if (neigh == old_neigh)
1743 goto out;
1744
1745 /*
1746 * We have finally decided to accept it.
1747 */
1748
1749 neigh_update(neigh, lladdr, NUD_STALE,
1750 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1751 NEIGH_UPDATE_F_OVERRIDE|
1752 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1753 NEIGH_UPDATE_F_ISROUTER))
1754 );
1755
1756 nrt = ip6_rt_copy(rt, dest);
1757 if (!nrt)
1758 goto out;
1759
1760 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1761 if (on_link)
1762 nrt->rt6i_flags &= ~RTF_GATEWAY;
1763
1764 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1765 nrt->n = neigh_clone(neigh);
1766
1767 if (ip6_ins_rt(nrt))
1768 goto out;
1769
1770 netevent.old = &rt->dst;
1771 netevent.old_neigh = old_neigh;
1772 netevent.new = &nrt->dst;
1773 netevent.new_neigh = neigh;
1774 netevent.daddr = dest;
1775 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1776
1777 if (rt->rt6i_flags & RTF_CACHE) {
1778 rt = (struct rt6_info *) dst_clone(&rt->dst);
1779 ip6_del_rt(rt);
1780 }
1781
1782 out:
1783 neigh_release(neigh);
1784 }
1785
1786 /*
1787 * Misc support functions
1788 */
1789
1790 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1791 const struct in6_addr *dest)
1792 {
1793 struct net *net = dev_net(ort->dst.dev);
1794 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1795 ort->rt6i_table);
1796
1797 if (rt) {
1798 rt->dst.input = ort->dst.input;
1799 rt->dst.output = ort->dst.output;
1800 rt->dst.flags |= DST_HOST;
1801
1802 rt->rt6i_dst.addr = *dest;
1803 rt->rt6i_dst.plen = 128;
1804 dst_copy_metrics(&rt->dst, &ort->dst);
1805 rt->dst.error = ort->dst.error;
1806 rt->rt6i_idev = ort->rt6i_idev;
1807 if (rt->rt6i_idev)
1808 in6_dev_hold(rt->rt6i_idev);
1809 rt->dst.lastuse = jiffies;
1810
1811 rt->rt6i_gateway = ort->rt6i_gateway;
1812 rt->rt6i_flags = ort->rt6i_flags;
1813 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1814 (RTF_DEFAULT | RTF_ADDRCONF))
1815 rt6_set_from(rt, ort);
1816 else
1817 rt6_clean_expires(rt);
1818 rt->rt6i_metric = 0;
1819
1820 #ifdef CONFIG_IPV6_SUBTREES
1821 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1822 #endif
1823 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1824 rt->rt6i_table = ort->rt6i_table;
1825 }
1826 return rt;
1827 }
1828
1829 #ifdef CONFIG_IPV6_ROUTE_INFO
1830 static struct rt6_info *rt6_get_route_info(struct net *net,
1831 const struct in6_addr *prefix, int prefixlen,
1832 const struct in6_addr *gwaddr, int ifindex)
1833 {
1834 struct fib6_node *fn;
1835 struct rt6_info *rt = NULL;
1836 struct fib6_table *table;
1837
1838 table = fib6_get_table(net, RT6_TABLE_INFO);
1839 if (!table)
1840 return NULL;
1841
1842 read_lock_bh(&table->tb6_lock);
1843 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1844 if (!fn)
1845 goto out;
1846
1847 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1848 if (rt->dst.dev->ifindex != ifindex)
1849 continue;
1850 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1851 continue;
1852 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1853 continue;
1854 dst_hold(&rt->dst);
1855 break;
1856 }
1857 out:
1858 read_unlock_bh(&table->tb6_lock);
1859 return rt;
1860 }
1861
1862 static struct rt6_info *rt6_add_route_info(struct net *net,
1863 const struct in6_addr *prefix, int prefixlen,
1864 const struct in6_addr *gwaddr, int ifindex,
1865 unsigned int pref)
1866 {
1867 struct fib6_config cfg = {
1868 .fc_table = RT6_TABLE_INFO,
1869 .fc_metric = IP6_RT_PRIO_USER,
1870 .fc_ifindex = ifindex,
1871 .fc_dst_len = prefixlen,
1872 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1873 RTF_UP | RTF_PREF(pref),
1874 .fc_nlinfo.portid = 0,
1875 .fc_nlinfo.nlh = NULL,
1876 .fc_nlinfo.nl_net = net,
1877 };
1878
1879 cfg.fc_dst = *prefix;
1880 cfg.fc_gateway = *gwaddr;
1881
1882 /* We should treat it as a default route if prefix length is 0. */
1883 if (!prefixlen)
1884 cfg.fc_flags |= RTF_DEFAULT;
1885
1886 ip6_route_add(&cfg);
1887
1888 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1889 }
1890 #endif
1891
1892 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1893 {
1894 struct rt6_info *rt;
1895 struct fib6_table *table;
1896
1897 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1898 if (!table)
1899 return NULL;
1900
1901 read_lock_bh(&table->tb6_lock);
1902 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1903 if (dev == rt->dst.dev &&
1904 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1905 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1906 break;
1907 }
1908 if (rt)
1909 dst_hold(&rt->dst);
1910 read_unlock_bh(&table->tb6_lock);
1911 return rt;
1912 }
1913
1914 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1915 struct net_device *dev,
1916 unsigned int pref)
1917 {
1918 struct fib6_config cfg = {
1919 .fc_table = RT6_TABLE_DFLT,
1920 .fc_metric = IP6_RT_PRIO_USER,
1921 .fc_ifindex = dev->ifindex,
1922 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1923 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1924 .fc_nlinfo.portid = 0,
1925 .fc_nlinfo.nlh = NULL,
1926 .fc_nlinfo.nl_net = dev_net(dev),
1927 };
1928
1929 cfg.fc_gateway = *gwaddr;
1930
1931 ip6_route_add(&cfg);
1932
1933 return rt6_get_dflt_router(gwaddr, dev);
1934 }
1935
1936 void rt6_purge_dflt_routers(struct net *net)
1937 {
1938 struct rt6_info *rt;
1939 struct fib6_table *table;
1940
1941 /* NOTE: Keep consistent with rt6_get_dflt_router */
1942 table = fib6_get_table(net, RT6_TABLE_DFLT);
1943 if (!table)
1944 return;
1945
1946 restart:
1947 read_lock_bh(&table->tb6_lock);
1948 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1949 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1950 dst_hold(&rt->dst);
1951 read_unlock_bh(&table->tb6_lock);
1952 ip6_del_rt(rt);
1953 goto restart;
1954 }
1955 }
1956 read_unlock_bh(&table->tb6_lock);
1957 }
1958
1959 static void rtmsg_to_fib6_config(struct net *net,
1960 struct in6_rtmsg *rtmsg,
1961 struct fib6_config *cfg)
1962 {
1963 memset(cfg, 0, sizeof(*cfg));
1964
1965 cfg->fc_table = RT6_TABLE_MAIN;
1966 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1967 cfg->fc_metric = rtmsg->rtmsg_metric;
1968 cfg->fc_expires = rtmsg->rtmsg_info;
1969 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1970 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1971 cfg->fc_flags = rtmsg->rtmsg_flags;
1972
1973 cfg->fc_nlinfo.nl_net = net;
1974
1975 cfg->fc_dst = rtmsg->rtmsg_dst;
1976 cfg->fc_src = rtmsg->rtmsg_src;
1977 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1978 }
1979
1980 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1981 {
1982 struct fib6_config cfg;
1983 struct in6_rtmsg rtmsg;
1984 int err;
1985
1986 switch(cmd) {
1987 case SIOCADDRT: /* Add a route */
1988 case SIOCDELRT: /* Delete a route */
1989 if (!capable(CAP_NET_ADMIN))
1990 return -EPERM;
1991 err = copy_from_user(&rtmsg, arg,
1992 sizeof(struct in6_rtmsg));
1993 if (err)
1994 return -EFAULT;
1995
1996 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1997
1998 rtnl_lock();
1999 switch (cmd) {
2000 case SIOCADDRT:
2001 err = ip6_route_add(&cfg);
2002 break;
2003 case SIOCDELRT:
2004 err = ip6_route_del(&cfg);
2005 break;
2006 default:
2007 err = -EINVAL;
2008 }
2009 rtnl_unlock();
2010
2011 return err;
2012 }
2013
2014 return -EINVAL;
2015 }
2016
2017 /*
2018 * Drop the packet on the floor
2019 */
2020
2021 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2022 {
2023 int type;
2024 struct dst_entry *dst = skb_dst(skb);
2025 switch (ipstats_mib_noroutes) {
2026 case IPSTATS_MIB_INNOROUTES:
2027 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2028 if (type == IPV6_ADDR_ANY) {
2029 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2030 IPSTATS_MIB_INADDRERRORS);
2031 break;
2032 }
2033 /* FALLTHROUGH */
2034 case IPSTATS_MIB_OUTNOROUTES:
2035 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2036 ipstats_mib_noroutes);
2037 break;
2038 }
2039 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2040 kfree_skb(skb);
2041 return 0;
2042 }
2043
2044 static int ip6_pkt_discard(struct sk_buff *skb)
2045 {
2046 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2047 }
2048
2049 static int ip6_pkt_discard_out(struct sk_buff *skb)
2050 {
2051 skb->dev = skb_dst(skb)->dev;
2052 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2053 }
2054
2055 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2056
2057 static int ip6_pkt_prohibit(struct sk_buff *skb)
2058 {
2059 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2060 }
2061
2062 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2063 {
2064 skb->dev = skb_dst(skb)->dev;
2065 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2066 }
2067
2068 #endif
2069
2070 /*
2071 * Allocate a dst for local (unicast / anycast) address.
2072 */
2073
2074 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2075 const struct in6_addr *addr,
2076 bool anycast)
2077 {
2078 struct net *net = dev_net(idev->dev);
2079 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2080 int err;
2081
2082 if (!rt) {
2083 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2084 return ERR_PTR(-ENOMEM);
2085 }
2086
2087 in6_dev_hold(idev);
2088
2089 rt->dst.flags |= DST_HOST;
2090 rt->dst.input = ip6_input;
2091 rt->dst.output = ip6_output;
2092 rt->rt6i_idev = idev;
2093
2094 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2095 if (anycast)
2096 rt->rt6i_flags |= RTF_ANYCAST;
2097 else
2098 rt->rt6i_flags |= RTF_LOCAL;
2099 err = rt6_bind_neighbour(rt, rt->dst.dev);
2100 if (err) {
2101 dst_free(&rt->dst);
2102 return ERR_PTR(err);
2103 }
2104
2105 rt->rt6i_dst.addr = *addr;
2106 rt->rt6i_dst.plen = 128;
2107 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2108
2109 atomic_set(&rt->dst.__refcnt, 1);
2110
2111 return rt;
2112 }
2113
2114 int ip6_route_get_saddr(struct net *net,
2115 struct rt6_info *rt,
2116 const struct in6_addr *daddr,
2117 unsigned int prefs,
2118 struct in6_addr *saddr)
2119 {
2120 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2121 int err = 0;
2122 if (rt->rt6i_prefsrc.plen)
2123 *saddr = rt->rt6i_prefsrc.addr;
2124 else
2125 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2126 daddr, prefs, saddr);
2127 return err;
2128 }
2129
2130 /* remove deleted ip from prefsrc entries */
2131 struct arg_dev_net_ip {
2132 struct net_device *dev;
2133 struct net *net;
2134 struct in6_addr *addr;
2135 };
2136
2137 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2138 {
2139 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2140 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2141 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2142
2143 if (((void *)rt->dst.dev == dev || !dev) &&
2144 rt != net->ipv6.ip6_null_entry &&
2145 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2146 /* remove prefsrc entry */
2147 rt->rt6i_prefsrc.plen = 0;
2148 }
2149 return 0;
2150 }
2151
2152 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2153 {
2154 struct net *net = dev_net(ifp->idev->dev);
2155 struct arg_dev_net_ip adni = {
2156 .dev = ifp->idev->dev,
2157 .net = net,
2158 .addr = &ifp->addr,
2159 };
2160 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2161 }
2162
2163 struct arg_dev_net {
2164 struct net_device *dev;
2165 struct net *net;
2166 };
2167
2168 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2169 {
2170 const struct arg_dev_net *adn = arg;
2171 const struct net_device *dev = adn->dev;
2172
2173 if ((rt->dst.dev == dev || !dev) &&
2174 rt != adn->net->ipv6.ip6_null_entry)
2175 return -1;
2176
2177 return 0;
2178 }
2179
2180 void rt6_ifdown(struct net *net, struct net_device *dev)
2181 {
2182 struct arg_dev_net adn = {
2183 .dev = dev,
2184 .net = net,
2185 };
2186
2187 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2188 icmp6_clean_all(fib6_ifdown, &adn);
2189 }
2190
2191 struct rt6_mtu_change_arg {
2192 struct net_device *dev;
2193 unsigned int mtu;
2194 };
2195
2196 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2197 {
2198 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2199 struct inet6_dev *idev;
2200
2201 /* In IPv6 pmtu discovery is not optional,
2202 so that RTAX_MTU lock cannot disable it.
2203 We still use this lock to block changes
2204 caused by addrconf/ndisc.
2205 */
2206
2207 idev = __in6_dev_get(arg->dev);
2208 if (!idev)
2209 return 0;
2210
2211 /* For administrative MTU increase, there is no way to discover
2212 IPv6 PMTU increase, so PMTU increase should be updated here.
2213 Since RFC 1981 doesn't include administrative MTU increase
2214 update PMTU increase is a MUST. (i.e. jumbo frame)
2215 */
2216 /*
2217 If new MTU is less than route PMTU, this new MTU will be the
2218 lowest MTU in the path, update the route PMTU to reflect PMTU
2219 decreases; if new MTU is greater than route PMTU, and the
2220 old MTU is the lowest MTU in the path, update the route PMTU
2221 to reflect the increase. In this case if the other nodes' MTU
2222 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2223 PMTU discouvery.
2224 */
2225 if (rt->dst.dev == arg->dev &&
2226 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2227 (dst_mtu(&rt->dst) >= arg->mtu ||
2228 (dst_mtu(&rt->dst) < arg->mtu &&
2229 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2230 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2231 }
2232 return 0;
2233 }
2234
2235 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2236 {
2237 struct rt6_mtu_change_arg arg = {
2238 .dev = dev,
2239 .mtu = mtu,
2240 };
2241
2242 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2243 }
2244
2245 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2246 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2247 [RTA_OIF] = { .type = NLA_U32 },
2248 [RTA_IIF] = { .type = NLA_U32 },
2249 [RTA_PRIORITY] = { .type = NLA_U32 },
2250 [RTA_METRICS] = { .type = NLA_NESTED },
2251 };
2252
2253 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2254 struct fib6_config *cfg)
2255 {
2256 struct rtmsg *rtm;
2257 struct nlattr *tb[RTA_MAX+1];
2258 int err;
2259
2260 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2261 if (err < 0)
2262 goto errout;
2263
2264 err = -EINVAL;
2265 rtm = nlmsg_data(nlh);
2266 memset(cfg, 0, sizeof(*cfg));
2267
2268 cfg->fc_table = rtm->rtm_table;
2269 cfg->fc_dst_len = rtm->rtm_dst_len;
2270 cfg->fc_src_len = rtm->rtm_src_len;
2271 cfg->fc_flags = RTF_UP;
2272 cfg->fc_protocol = rtm->rtm_protocol;
2273 cfg->fc_type = rtm->rtm_type;
2274
2275 if (rtm->rtm_type == RTN_UNREACHABLE ||
2276 rtm->rtm_type == RTN_BLACKHOLE ||
2277 rtm->rtm_type == RTN_PROHIBIT ||
2278 rtm->rtm_type == RTN_THROW)
2279 cfg->fc_flags |= RTF_REJECT;
2280
2281 if (rtm->rtm_type == RTN_LOCAL)
2282 cfg->fc_flags |= RTF_LOCAL;
2283
2284 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2285 cfg->fc_nlinfo.nlh = nlh;
2286 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2287
2288 if (tb[RTA_GATEWAY]) {
2289 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2290 cfg->fc_flags |= RTF_GATEWAY;
2291 }
2292
2293 if (tb[RTA_DST]) {
2294 int plen = (rtm->rtm_dst_len + 7) >> 3;
2295
2296 if (nla_len(tb[RTA_DST]) < plen)
2297 goto errout;
2298
2299 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2300 }
2301
2302 if (tb[RTA_SRC]) {
2303 int plen = (rtm->rtm_src_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_SRC]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2309 }
2310
2311 if (tb[RTA_PREFSRC])
2312 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2313
2314 if (tb[RTA_OIF])
2315 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2316
2317 if (tb[RTA_PRIORITY])
2318 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2319
2320 if (tb[RTA_METRICS]) {
2321 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2322 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2323 }
2324
2325 if (tb[RTA_TABLE])
2326 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2327
2328 err = 0;
2329 errout:
2330 return err;
2331 }
2332
2333 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2334 {
2335 struct fib6_config cfg;
2336 int err;
2337
2338 err = rtm_to_fib6_config(skb, nlh, &cfg);
2339 if (err < 0)
2340 return err;
2341
2342 return ip6_route_del(&cfg);
2343 }
2344
2345 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2346 {
2347 struct fib6_config cfg;
2348 int err;
2349
2350 err = rtm_to_fib6_config(skb, nlh, &cfg);
2351 if (err < 0)
2352 return err;
2353
2354 return ip6_route_add(&cfg);
2355 }
2356
2357 static inline size_t rt6_nlmsg_size(void)
2358 {
2359 return NLMSG_ALIGN(sizeof(struct rtmsg))
2360 + nla_total_size(16) /* RTA_SRC */
2361 + nla_total_size(16) /* RTA_DST */
2362 + nla_total_size(16) /* RTA_GATEWAY */
2363 + nla_total_size(16) /* RTA_PREFSRC */
2364 + nla_total_size(4) /* RTA_TABLE */
2365 + nla_total_size(4) /* RTA_IIF */
2366 + nla_total_size(4) /* RTA_OIF */
2367 + nla_total_size(4) /* RTA_PRIORITY */
2368 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2369 + nla_total_size(sizeof(struct rta_cacheinfo));
2370 }
2371
2372 static int rt6_fill_node(struct net *net,
2373 struct sk_buff *skb, struct rt6_info *rt,
2374 struct in6_addr *dst, struct in6_addr *src,
2375 int iif, int type, u32 portid, u32 seq,
2376 int prefix, int nowait, unsigned int flags)
2377 {
2378 struct rtmsg *rtm;
2379 struct nlmsghdr *nlh;
2380 long expires;
2381 u32 table;
2382 struct neighbour *n;
2383
2384 if (prefix) { /* user wants prefix routes only */
2385 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2386 /* success since this is not a prefix route */
2387 return 1;
2388 }
2389 }
2390
2391 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2392 if (!nlh)
2393 return -EMSGSIZE;
2394
2395 rtm = nlmsg_data(nlh);
2396 rtm->rtm_family = AF_INET6;
2397 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2398 rtm->rtm_src_len = rt->rt6i_src.plen;
2399 rtm->rtm_tos = 0;
2400 if (rt->rt6i_table)
2401 table = rt->rt6i_table->tb6_id;
2402 else
2403 table = RT6_TABLE_UNSPEC;
2404 rtm->rtm_table = table;
2405 if (nla_put_u32(skb, RTA_TABLE, table))
2406 goto nla_put_failure;
2407 if (rt->rt6i_flags & RTF_REJECT) {
2408 switch (rt->dst.error) {
2409 case -EINVAL:
2410 rtm->rtm_type = RTN_BLACKHOLE;
2411 break;
2412 case -EACCES:
2413 rtm->rtm_type = RTN_PROHIBIT;
2414 break;
2415 case -EAGAIN:
2416 rtm->rtm_type = RTN_THROW;
2417 break;
2418 default:
2419 rtm->rtm_type = RTN_UNREACHABLE;
2420 break;
2421 }
2422 }
2423 else if (rt->rt6i_flags & RTF_LOCAL)
2424 rtm->rtm_type = RTN_LOCAL;
2425 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2426 rtm->rtm_type = RTN_LOCAL;
2427 else
2428 rtm->rtm_type = RTN_UNICAST;
2429 rtm->rtm_flags = 0;
2430 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2431 rtm->rtm_protocol = rt->rt6i_protocol;
2432 if (rt->rt6i_flags & RTF_DYNAMIC)
2433 rtm->rtm_protocol = RTPROT_REDIRECT;
2434 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2435 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2436 rtm->rtm_protocol = RTPROT_RA;
2437 else
2438 rtm->rtm_protocol = RTPROT_KERNEL;
2439 }
2440
2441 if (rt->rt6i_flags & RTF_CACHE)
2442 rtm->rtm_flags |= RTM_F_CLONED;
2443
2444 if (dst) {
2445 if (nla_put(skb, RTA_DST, 16, dst))
2446 goto nla_put_failure;
2447 rtm->rtm_dst_len = 128;
2448 } else if (rtm->rtm_dst_len)
2449 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2450 goto nla_put_failure;
2451 #ifdef CONFIG_IPV6_SUBTREES
2452 if (src) {
2453 if (nla_put(skb, RTA_SRC, 16, src))
2454 goto nla_put_failure;
2455 rtm->rtm_src_len = 128;
2456 } else if (rtm->rtm_src_len &&
2457 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2458 goto nla_put_failure;
2459 #endif
2460 if (iif) {
2461 #ifdef CONFIG_IPV6_MROUTE
2462 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2463 int err = ip6mr_get_route(net, skb, rtm, nowait);
2464 if (err <= 0) {
2465 if (!nowait) {
2466 if (err == 0)
2467 return 0;
2468 goto nla_put_failure;
2469 } else {
2470 if (err == -EMSGSIZE)
2471 goto nla_put_failure;
2472 }
2473 }
2474 } else
2475 #endif
2476 if (nla_put_u32(skb, RTA_IIF, iif))
2477 goto nla_put_failure;
2478 } else if (dst) {
2479 struct in6_addr saddr_buf;
2480 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2481 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2482 goto nla_put_failure;
2483 }
2484
2485 if (rt->rt6i_prefsrc.plen) {
2486 struct in6_addr saddr_buf;
2487 saddr_buf = rt->rt6i_prefsrc.addr;
2488 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489 goto nla_put_failure;
2490 }
2491
2492 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2493 goto nla_put_failure;
2494
2495 n = rt->n;
2496 if (n) {
2497 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2498 goto nla_put_failure;
2499 }
2500
2501 if (rt->dst.dev &&
2502 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2503 goto nla_put_failure;
2504 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2505 goto nla_put_failure;
2506
2507 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2508
2509 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2510 goto nla_put_failure;
2511
2512 return nlmsg_end(skb, nlh);
2513
2514 nla_put_failure:
2515 nlmsg_cancel(skb, nlh);
2516 return -EMSGSIZE;
2517 }
2518
2519 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2520 {
2521 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2522 int prefix;
2523
2524 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2525 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2526 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2527 } else
2528 prefix = 0;
2529
2530 return rt6_fill_node(arg->net,
2531 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2532 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2533 prefix, 0, NLM_F_MULTI);
2534 }
2535
2536 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2537 {
2538 struct net *net = sock_net(in_skb->sk);
2539 struct nlattr *tb[RTA_MAX+1];
2540 struct rt6_info *rt;
2541 struct sk_buff *skb;
2542 struct rtmsg *rtm;
2543 struct flowi6 fl6;
2544 int err, iif = 0, oif = 0;
2545
2546 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2547 if (err < 0)
2548 goto errout;
2549
2550 err = -EINVAL;
2551 memset(&fl6, 0, sizeof(fl6));
2552
2553 if (tb[RTA_SRC]) {
2554 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2555 goto errout;
2556
2557 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2558 }
2559
2560 if (tb[RTA_DST]) {
2561 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2562 goto errout;
2563
2564 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2565 }
2566
2567 if (tb[RTA_IIF])
2568 iif = nla_get_u32(tb[RTA_IIF]);
2569
2570 if (tb[RTA_OIF])
2571 oif = nla_get_u32(tb[RTA_OIF]);
2572
2573 if (iif) {
2574 struct net_device *dev;
2575 int flags = 0;
2576
2577 dev = __dev_get_by_index(net, iif);
2578 if (!dev) {
2579 err = -ENODEV;
2580 goto errout;
2581 }
2582
2583 fl6.flowi6_iif = iif;
2584
2585 if (!ipv6_addr_any(&fl6.saddr))
2586 flags |= RT6_LOOKUP_F_HAS_SADDR;
2587
2588 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2589 flags);
2590 } else {
2591 fl6.flowi6_oif = oif;
2592
2593 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2594 }
2595
2596 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2597 if (!skb) {
2598 dst_release(&rt->dst);
2599 err = -ENOBUFS;
2600 goto errout;
2601 }
2602
2603 /* Reserve room for dummy headers, this skb can pass
2604 through good chunk of routing engine.
2605 */
2606 skb_reset_mac_header(skb);
2607 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2608
2609 skb_dst_set(skb, &rt->dst);
2610
2611 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2612 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2613 nlh->nlmsg_seq, 0, 0, 0);
2614 if (err < 0) {
2615 kfree_skb(skb);
2616 goto errout;
2617 }
2618
2619 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2620 errout:
2621 return err;
2622 }
2623
2624 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2625 {
2626 struct sk_buff *skb;
2627 struct net *net = info->nl_net;
2628 u32 seq;
2629 int err;
2630
2631 err = -ENOBUFS;
2632 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2633
2634 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2635 if (!skb)
2636 goto errout;
2637
2638 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2639 event, info->portid, seq, 0, 0, 0);
2640 if (err < 0) {
2641 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2642 WARN_ON(err == -EMSGSIZE);
2643 kfree_skb(skb);
2644 goto errout;
2645 }
2646 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2647 info->nlh, gfp_any());
2648 return;
2649 errout:
2650 if (err < 0)
2651 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2652 }
2653
2654 static int ip6_route_dev_notify(struct notifier_block *this,
2655 unsigned long event, void *data)
2656 {
2657 struct net_device *dev = (struct net_device *)data;
2658 struct net *net = dev_net(dev);
2659
2660 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2661 net->ipv6.ip6_null_entry->dst.dev = dev;
2662 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2663 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2664 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2665 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2666 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2667 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2668 #endif
2669 }
2670
2671 return NOTIFY_OK;
2672 }
2673
2674 /*
2675 * /proc
2676 */
2677
2678 #ifdef CONFIG_PROC_FS
2679
2680 struct rt6_proc_arg
2681 {
2682 char *buffer;
2683 int offset;
2684 int length;
2685 int skip;
2686 int len;
2687 };
2688
2689 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2690 {
2691 struct seq_file *m = p_arg;
2692 struct neighbour *n;
2693
2694 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2695
2696 #ifdef CONFIG_IPV6_SUBTREES
2697 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2698 #else
2699 seq_puts(m, "00000000000000000000000000000000 00 ");
2700 #endif
2701 n = rt->n;
2702 if (n) {
2703 seq_printf(m, "%pi6", n->primary_key);
2704 } else {
2705 seq_puts(m, "00000000000000000000000000000000");
2706 }
2707 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2708 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2709 rt->dst.__use, rt->rt6i_flags,
2710 rt->dst.dev ? rt->dst.dev->name : "");
2711 return 0;
2712 }
2713
2714 static int ipv6_route_show(struct seq_file *m, void *v)
2715 {
2716 struct net *net = (struct net *)m->private;
2717 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2718 return 0;
2719 }
2720
2721 static int ipv6_route_open(struct inode *inode, struct file *file)
2722 {
2723 return single_open_net(inode, file, ipv6_route_show);
2724 }
2725
2726 static const struct file_operations ipv6_route_proc_fops = {
2727 .owner = THIS_MODULE,
2728 .open = ipv6_route_open,
2729 .read = seq_read,
2730 .llseek = seq_lseek,
2731 .release = single_release_net,
2732 };
2733
2734 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2735 {
2736 struct net *net = (struct net *)seq->private;
2737 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2738 net->ipv6.rt6_stats->fib_nodes,
2739 net->ipv6.rt6_stats->fib_route_nodes,
2740 net->ipv6.rt6_stats->fib_rt_alloc,
2741 net->ipv6.rt6_stats->fib_rt_entries,
2742 net->ipv6.rt6_stats->fib_rt_cache,
2743 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2744 net->ipv6.rt6_stats->fib_discarded_routes);
2745
2746 return 0;
2747 }
2748
2749 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2750 {
2751 return single_open_net(inode, file, rt6_stats_seq_show);
2752 }
2753
2754 static const struct file_operations rt6_stats_seq_fops = {
2755 .owner = THIS_MODULE,
2756 .open = rt6_stats_seq_open,
2757 .read = seq_read,
2758 .llseek = seq_lseek,
2759 .release = single_release_net,
2760 };
2761 #endif /* CONFIG_PROC_FS */
2762
2763 #ifdef CONFIG_SYSCTL
2764
2765 static
2766 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2767 void __user *buffer, size_t *lenp, loff_t *ppos)
2768 {
2769 struct net *net;
2770 int delay;
2771 if (!write)
2772 return -EINVAL;
2773
2774 net = (struct net *)ctl->extra1;
2775 delay = net->ipv6.sysctl.flush_delay;
2776 proc_dointvec(ctl, write, buffer, lenp, ppos);
2777 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2778 return 0;
2779 }
2780
2781 ctl_table ipv6_route_table_template[] = {
2782 {
2783 .procname = "flush",
2784 .data = &init_net.ipv6.sysctl.flush_delay,
2785 .maxlen = sizeof(int),
2786 .mode = 0200,
2787 .proc_handler = ipv6_sysctl_rtcache_flush
2788 },
2789 {
2790 .procname = "gc_thresh",
2791 .data = &ip6_dst_ops_template.gc_thresh,
2792 .maxlen = sizeof(int),
2793 .mode = 0644,
2794 .proc_handler = proc_dointvec,
2795 },
2796 {
2797 .procname = "max_size",
2798 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2799 .maxlen = sizeof(int),
2800 .mode = 0644,
2801 .proc_handler = proc_dointvec,
2802 },
2803 {
2804 .procname = "gc_min_interval",
2805 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2806 .maxlen = sizeof(int),
2807 .mode = 0644,
2808 .proc_handler = proc_dointvec_jiffies,
2809 },
2810 {
2811 .procname = "gc_timeout",
2812 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2813 .maxlen = sizeof(int),
2814 .mode = 0644,
2815 .proc_handler = proc_dointvec_jiffies,
2816 },
2817 {
2818 .procname = "gc_interval",
2819 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2820 .maxlen = sizeof(int),
2821 .mode = 0644,
2822 .proc_handler = proc_dointvec_jiffies,
2823 },
2824 {
2825 .procname = "gc_elasticity",
2826 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2827 .maxlen = sizeof(int),
2828 .mode = 0644,
2829 .proc_handler = proc_dointvec,
2830 },
2831 {
2832 .procname = "mtu_expires",
2833 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2834 .maxlen = sizeof(int),
2835 .mode = 0644,
2836 .proc_handler = proc_dointvec_jiffies,
2837 },
2838 {
2839 .procname = "min_adv_mss",
2840 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2841 .maxlen = sizeof(int),
2842 .mode = 0644,
2843 .proc_handler = proc_dointvec,
2844 },
2845 {
2846 .procname = "gc_min_interval_ms",
2847 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2848 .maxlen = sizeof(int),
2849 .mode = 0644,
2850 .proc_handler = proc_dointvec_ms_jiffies,
2851 },
2852 { }
2853 };
2854
2855 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2856 {
2857 struct ctl_table *table;
2858
2859 table = kmemdup(ipv6_route_table_template,
2860 sizeof(ipv6_route_table_template),
2861 GFP_KERNEL);
2862
2863 if (table) {
2864 table[0].data = &net->ipv6.sysctl.flush_delay;
2865 table[0].extra1 = net;
2866 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2867 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2868 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2869 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2870 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2871 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2872 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2873 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2874 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2875 }
2876
2877 return table;
2878 }
2879 #endif
2880
2881 static int __net_init ip6_route_net_init(struct net *net)
2882 {
2883 int ret = -ENOMEM;
2884
2885 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2886 sizeof(net->ipv6.ip6_dst_ops));
2887
2888 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2889 goto out_ip6_dst_ops;
2890
2891 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2892 sizeof(*net->ipv6.ip6_null_entry),
2893 GFP_KERNEL);
2894 if (!net->ipv6.ip6_null_entry)
2895 goto out_ip6_dst_entries;
2896 net->ipv6.ip6_null_entry->dst.path =
2897 (struct dst_entry *)net->ipv6.ip6_null_entry;
2898 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2899 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2900 ip6_template_metrics, true);
2901
2902 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2904 sizeof(*net->ipv6.ip6_prohibit_entry),
2905 GFP_KERNEL);
2906 if (!net->ipv6.ip6_prohibit_entry)
2907 goto out_ip6_null_entry;
2908 net->ipv6.ip6_prohibit_entry->dst.path =
2909 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2910 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2911 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2912 ip6_template_metrics, true);
2913
2914 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2915 sizeof(*net->ipv6.ip6_blk_hole_entry),
2916 GFP_KERNEL);
2917 if (!net->ipv6.ip6_blk_hole_entry)
2918 goto out_ip6_prohibit_entry;
2919 net->ipv6.ip6_blk_hole_entry->dst.path =
2920 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2921 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2922 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2923 ip6_template_metrics, true);
2924 #endif
2925
2926 net->ipv6.sysctl.flush_delay = 0;
2927 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2928 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2929 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2930 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2931 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2932 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2933 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2934
2935 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2936
2937 ret = 0;
2938 out:
2939 return ret;
2940
2941 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2942 out_ip6_prohibit_entry:
2943 kfree(net->ipv6.ip6_prohibit_entry);
2944 out_ip6_null_entry:
2945 kfree(net->ipv6.ip6_null_entry);
2946 #endif
2947 out_ip6_dst_entries:
2948 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2949 out_ip6_dst_ops:
2950 goto out;
2951 }
2952
2953 static void __net_exit ip6_route_net_exit(struct net *net)
2954 {
2955 kfree(net->ipv6.ip6_null_entry);
2956 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2957 kfree(net->ipv6.ip6_prohibit_entry);
2958 kfree(net->ipv6.ip6_blk_hole_entry);
2959 #endif
2960 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2961 }
2962
2963 static int __net_init ip6_route_net_init_late(struct net *net)
2964 {
2965 #ifdef CONFIG_PROC_FS
2966 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2967 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2968 #endif
2969 return 0;
2970 }
2971
2972 static void __net_exit ip6_route_net_exit_late(struct net *net)
2973 {
2974 #ifdef CONFIG_PROC_FS
2975 proc_net_remove(net, "ipv6_route");
2976 proc_net_remove(net, "rt6_stats");
2977 #endif
2978 }
2979
2980 static struct pernet_operations ip6_route_net_ops = {
2981 .init = ip6_route_net_init,
2982 .exit = ip6_route_net_exit,
2983 };
2984
2985 static int __net_init ipv6_inetpeer_init(struct net *net)
2986 {
2987 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2988
2989 if (!bp)
2990 return -ENOMEM;
2991 inet_peer_base_init(bp);
2992 net->ipv6.peers = bp;
2993 return 0;
2994 }
2995
2996 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2997 {
2998 struct inet_peer_base *bp = net->ipv6.peers;
2999
3000 net->ipv6.peers = NULL;
3001 inetpeer_invalidate_tree(bp);
3002 kfree(bp);
3003 }
3004
3005 static struct pernet_operations ipv6_inetpeer_ops = {
3006 .init = ipv6_inetpeer_init,
3007 .exit = ipv6_inetpeer_exit,
3008 };
3009
3010 static struct pernet_operations ip6_route_net_late_ops = {
3011 .init = ip6_route_net_init_late,
3012 .exit = ip6_route_net_exit_late,
3013 };
3014
3015 static struct notifier_block ip6_route_dev_notifier = {
3016 .notifier_call = ip6_route_dev_notify,
3017 .priority = 0,
3018 };
3019
3020 int __init ip6_route_init(void)
3021 {
3022 int ret;
3023
3024 ret = -ENOMEM;
3025 ip6_dst_ops_template.kmem_cachep =
3026 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3027 SLAB_HWCACHE_ALIGN, NULL);
3028 if (!ip6_dst_ops_template.kmem_cachep)
3029 goto out;
3030
3031 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3032 if (ret)
3033 goto out_kmem_cache;
3034
3035 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3036 if (ret)
3037 goto out_dst_entries;
3038
3039 ret = register_pernet_subsys(&ip6_route_net_ops);
3040 if (ret)
3041 goto out_register_inetpeer;
3042
3043 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3044
3045 /* Registering of the loopback is done before this portion of code,
3046 * the loopback reference in rt6_info will not be taken, do it
3047 * manually for init_net */
3048 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3049 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3051 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3052 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3053 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3054 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3055 #endif
3056 ret = fib6_init();
3057 if (ret)
3058 goto out_register_subsys;
3059
3060 ret = xfrm6_init();
3061 if (ret)
3062 goto out_fib6_init;
3063
3064 ret = fib6_rules_init();
3065 if (ret)
3066 goto xfrm6_init;
3067
3068 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3069 if (ret)
3070 goto fib6_rules_init;
3071
3072 ret = -ENOBUFS;
3073 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3074 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3075 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3076 goto out_register_late_subsys;
3077
3078 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3079 if (ret)
3080 goto out_register_late_subsys;
3081
3082 out:
3083 return ret;
3084
3085 out_register_late_subsys:
3086 unregister_pernet_subsys(&ip6_route_net_late_ops);
3087 fib6_rules_init:
3088 fib6_rules_cleanup();
3089 xfrm6_init:
3090 xfrm6_fini();
3091 out_fib6_init:
3092 fib6_gc_cleanup();
3093 out_register_subsys:
3094 unregister_pernet_subsys(&ip6_route_net_ops);
3095 out_register_inetpeer:
3096 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3097 out_dst_entries:
3098 dst_entries_destroy(&ip6_dst_blackhole_ops);
3099 out_kmem_cache:
3100 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3101 goto out;
3102 }
3103
3104 void ip6_route_cleanup(void)
3105 {
3106 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3107 unregister_pernet_subsys(&ip6_route_net_late_ops);
3108 fib6_rules_cleanup();
3109 xfrm6_fini();
3110 fib6_gc_cleanup();
3111 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3112 unregister_pernet_subsys(&ip6_route_net_ops);
3113 dst_entries_destroy(&ip6_dst_blackhole_ops);
3114 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3115 }
This page took 0.093443 seconds and 5 git commands to generate.