Staging: Merge branch 'tidspbridge-for-2.6.39' of git://dev.omapzoom.org/pub/scm...
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .default_advmss = ip6_default_advmss,
107 .default_mtu = ip6_default_mtu,
108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
113 .local_out = __ip6_local_out,
114 };
115
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 {
118 }
119
120 static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = cpu_to_be16(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 };
127
128 static struct rt6_info ip6_null_entry_template = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .input = ip6_pkt_discard,
135 .output = ip6_pkt_discard_out,
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_protocol = RTPROT_KERNEL,
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141 };
142
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145 static int ip6_pkt_prohibit(struct sk_buff *skb);
146 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
147
148 static struct rt6_info ip6_prohibit_entry_template = {
149 .dst = {
150 .__refcnt = ATOMIC_INIT(1),
151 .__use = 1,
152 .obsolete = -1,
153 .error = -EACCES,
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
158 .rt6i_protocol = RTPROT_KERNEL,
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161 };
162
163 static struct rt6_info ip6_blk_hole_entry_template = {
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .input = dst_discard,
170 .output = dst_discard,
171 },
172 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
173 .rt6i_protocol = RTPROT_KERNEL,
174 .rt6i_metric = ~(u32) 0,
175 .rt6i_ref = ATOMIC_INIT(1),
176 };
177
178 #endif
179
180 /* allocate dst with ip6_dst_ops */
181 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
182 {
183 return (struct rt6_info *)dst_alloc(ops);
184 }
185
186 static void ip6_dst_destroy(struct dst_entry *dst)
187 {
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 struct inet6_dev *idev = rt->rt6i_idev;
190 struct inet_peer *peer = rt->rt6i_peer;
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
195 }
196 if (peer) {
197 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
198 rt->rt6i_peer = NULL;
199 inet_putpeer(peer);
200 }
201 }
202
203 void rt6_bind_peer(struct rt6_info *rt, int create)
204 {
205 struct inet_peer *peer;
206
207 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
208 return;
209
210 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
211 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
212 inet_putpeer(peer);
213 }
214
215 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
216 int how)
217 {
218 struct rt6_info *rt = (struct rt6_info *)dst;
219 struct inet6_dev *idev = rt->rt6i_idev;
220 struct net_device *loopback_dev =
221 dev_net(dev)->loopback_dev;
222
223 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
224 struct inet6_dev *loopback_idev =
225 in6_dev_get(loopback_dev);
226 if (loopback_idev != NULL) {
227 rt->rt6i_idev = loopback_idev;
228 in6_dev_put(idev);
229 }
230 }
231 }
232
233 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234 {
235 return (rt->rt6i_flags & RTF_EXPIRES) &&
236 time_after(jiffies, rt->rt6i_expires);
237 }
238
239 static inline int rt6_need_strict(struct in6_addr *daddr)
240 {
241 return ipv6_addr_type(daddr) &
242 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
243 }
244
245 /*
246 * Route lookup. Any table->tb6_lock is implied.
247 */
248
249 static inline struct rt6_info *rt6_device_match(struct net *net,
250 struct rt6_info *rt,
251 struct in6_addr *saddr,
252 int oif,
253 int flags)
254 {
255 struct rt6_info *local = NULL;
256 struct rt6_info *sprt;
257
258 if (!oif && ipv6_addr_any(saddr))
259 goto out;
260
261 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
262 struct net_device *dev = sprt->rt6i_dev;
263
264 if (oif) {
265 if (dev->ifindex == oif)
266 return sprt;
267 if (dev->flags & IFF_LOOPBACK) {
268 if (sprt->rt6i_idev == NULL ||
269 sprt->rt6i_idev->dev->ifindex != oif) {
270 if (flags & RT6_LOOKUP_F_IFACE && oif)
271 continue;
272 if (local && (!oif ||
273 local->rt6i_idev->dev->ifindex == oif))
274 continue;
275 }
276 local = sprt;
277 }
278 } else {
279 if (ipv6_chk_addr(net, saddr, dev,
280 flags & RT6_LOOKUP_F_IFACE))
281 return sprt;
282 }
283 }
284
285 if (oif) {
286 if (local)
287 return local;
288
289 if (flags & RT6_LOOKUP_F_IFACE)
290 return net->ipv6.ip6_null_entry;
291 }
292 out:
293 return rt;
294 }
295
296 #ifdef CONFIG_IPV6_ROUTER_PREF
297 static void rt6_probe(struct rt6_info *rt)
298 {
299 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
300 /*
301 * Okay, this does not seem to be appropriate
302 * for now, however, we need to check if it
303 * is really so; aka Router Reachability Probing.
304 *
305 * Router Reachability Probe MUST be rate-limited
306 * to no more than one per minute.
307 */
308 if (!neigh || (neigh->nud_state & NUD_VALID))
309 return;
310 read_lock_bh(&neigh->lock);
311 if (!(neigh->nud_state & NUD_VALID) &&
312 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
313 struct in6_addr mcaddr;
314 struct in6_addr *target;
315
316 neigh->updated = jiffies;
317 read_unlock_bh(&neigh->lock);
318
319 target = (struct in6_addr *)&neigh->primary_key;
320 addrconf_addr_solict_mult(target, &mcaddr);
321 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
322 } else
323 read_unlock_bh(&neigh->lock);
324 }
325 #else
326 static inline void rt6_probe(struct rt6_info *rt)
327 {
328 }
329 #endif
330
331 /*
332 * Default Router Selection (RFC 2461 6.3.6)
333 */
334 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
335 {
336 struct net_device *dev = rt->rt6i_dev;
337 if (!oif || dev->ifindex == oif)
338 return 2;
339 if ((dev->flags & IFF_LOOPBACK) &&
340 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
341 return 1;
342 return 0;
343 }
344
345 static inline int rt6_check_neigh(struct rt6_info *rt)
346 {
347 struct neighbour *neigh = rt->rt6i_nexthop;
348 int m;
349 if (rt->rt6i_flags & RTF_NONEXTHOP ||
350 !(rt->rt6i_flags & RTF_GATEWAY))
351 m = 1;
352 else if (neigh) {
353 read_lock_bh(&neigh->lock);
354 if (neigh->nud_state & NUD_VALID)
355 m = 2;
356 #ifdef CONFIG_IPV6_ROUTER_PREF
357 else if (neigh->nud_state & NUD_FAILED)
358 m = 0;
359 #endif
360 else
361 m = 1;
362 read_unlock_bh(&neigh->lock);
363 } else
364 m = 0;
365 return m;
366 }
367
368 static int rt6_score_route(struct rt6_info *rt, int oif,
369 int strict)
370 {
371 int m, n;
372
373 m = rt6_check_dev(rt, oif);
374 if (!m && (strict & RT6_LOOKUP_F_IFACE))
375 return -1;
376 #ifdef CONFIG_IPV6_ROUTER_PREF
377 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
378 #endif
379 n = rt6_check_neigh(rt);
380 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
381 return -1;
382 return m;
383 }
384
385 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
386 int *mpri, struct rt6_info *match)
387 {
388 int m;
389
390 if (rt6_check_expired(rt))
391 goto out;
392
393 m = rt6_score_route(rt, oif, strict);
394 if (m < 0)
395 goto out;
396
397 if (m > *mpri) {
398 if (strict & RT6_LOOKUP_F_REACHABLE)
399 rt6_probe(match);
400 *mpri = m;
401 match = rt;
402 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
403 rt6_probe(rt);
404 }
405
406 out:
407 return match;
408 }
409
410 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
411 struct rt6_info *rr_head,
412 u32 metric, int oif, int strict)
413 {
414 struct rt6_info *rt, *match;
415 int mpri = -1;
416
417 match = NULL;
418 for (rt = rr_head; rt && rt->rt6i_metric == metric;
419 rt = rt->dst.rt6_next)
420 match = find_match(rt, oif, strict, &mpri, match);
421 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
422 rt = rt->dst.rt6_next)
423 match = find_match(rt, oif, strict, &mpri, match);
424
425 return match;
426 }
427
428 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
429 {
430 struct rt6_info *match, *rt0;
431 struct net *net;
432
433 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
434 __func__, fn->leaf, oif);
435
436 rt0 = fn->rr_ptr;
437 if (!rt0)
438 fn->rr_ptr = rt0 = fn->leaf;
439
440 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
441
442 if (!match &&
443 (strict & RT6_LOOKUP_F_REACHABLE)) {
444 struct rt6_info *next = rt0->dst.rt6_next;
445
446 /* no entries matched; do round-robin */
447 if (!next || next->rt6i_metric != rt0->rt6i_metric)
448 next = fn->leaf;
449
450 if (next != rt0)
451 fn->rr_ptr = next;
452 }
453
454 RT6_TRACE("%s() => %p\n",
455 __func__, match);
456
457 net = dev_net(rt0->rt6i_dev);
458 return match ? match : net->ipv6.ip6_null_entry;
459 }
460
461 #ifdef CONFIG_IPV6_ROUTE_INFO
462 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
463 struct in6_addr *gwaddr)
464 {
465 struct net *net = dev_net(dev);
466 struct route_info *rinfo = (struct route_info *) opt;
467 struct in6_addr prefix_buf, *prefix;
468 unsigned int pref;
469 unsigned long lifetime;
470 struct rt6_info *rt;
471
472 if (len < sizeof(struct route_info)) {
473 return -EINVAL;
474 }
475
476 /* Sanity check for prefix_len and length */
477 if (rinfo->length > 3) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 128) {
480 return -EINVAL;
481 } else if (rinfo->prefix_len > 64) {
482 if (rinfo->length < 2) {
483 return -EINVAL;
484 }
485 } else if (rinfo->prefix_len > 0) {
486 if (rinfo->length < 1) {
487 return -EINVAL;
488 }
489 }
490
491 pref = rinfo->route_pref;
492 if (pref == ICMPV6_ROUTER_PREF_INVALID)
493 return -EINVAL;
494
495 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
496
497 if (rinfo->length == 3)
498 prefix = (struct in6_addr *)rinfo->prefix;
499 else {
500 /* this function is safe */
501 ipv6_addr_prefix(&prefix_buf,
502 (struct in6_addr *)rinfo->prefix,
503 rinfo->prefix_len);
504 prefix = &prefix_buf;
505 }
506
507 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
508 dev->ifindex);
509
510 if (rt && !lifetime) {
511 ip6_del_rt(rt);
512 rt = NULL;
513 }
514
515 if (!rt && lifetime)
516 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
517 pref);
518 else if (rt)
519 rt->rt6i_flags = RTF_ROUTEINFO |
520 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
521
522 if (rt) {
523 if (!addrconf_finite_timeout(lifetime)) {
524 rt->rt6i_flags &= ~RTF_EXPIRES;
525 } else {
526 rt->rt6i_expires = jiffies + HZ * lifetime;
527 rt->rt6i_flags |= RTF_EXPIRES;
528 }
529 dst_release(&rt->dst);
530 }
531 return 0;
532 }
533 #endif
534
535 #define BACKTRACK(__net, saddr) \
536 do { \
537 if (rt == __net->ipv6.ip6_null_entry) { \
538 struct fib6_node *pn; \
539 while (1) { \
540 if (fn->fn_flags & RTN_TL_ROOT) \
541 goto out; \
542 pn = fn->parent; \
543 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
544 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
545 else \
546 fn = pn; \
547 if (fn->fn_flags & RTN_RTINFO) \
548 goto restart; \
549 } \
550 } \
551 } while(0)
552
553 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
554 struct fib6_table *table,
555 struct flowi *fl, int flags)
556 {
557 struct fib6_node *fn;
558 struct rt6_info *rt;
559
560 read_lock_bh(&table->tb6_lock);
561 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
562 restart:
563 rt = fn->leaf;
564 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
565 BACKTRACK(net, &fl->fl6_src);
566 out:
567 dst_use(&rt->dst, jiffies);
568 read_unlock_bh(&table->tb6_lock);
569 return rt;
570
571 }
572
573 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
574 const struct in6_addr *saddr, int oif, int strict)
575 {
576 struct flowi fl = {
577 .oif = oif,
578 .fl6_dst = *daddr,
579 };
580 struct dst_entry *dst;
581 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
582
583 if (saddr) {
584 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
585 flags |= RT6_LOOKUP_F_HAS_SADDR;
586 }
587
588 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
589 if (dst->error == 0)
590 return (struct rt6_info *) dst;
591
592 dst_release(dst);
593
594 return NULL;
595 }
596
597 EXPORT_SYMBOL(rt6_lookup);
598
599 /* ip6_ins_rt is called with FREE table->tb6_lock.
600 It takes new route entry, the addition fails by any reason the
601 route is freed. In any case, if caller does not hold it, it may
602 be destroyed.
603 */
604
605 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
606 {
607 int err;
608 struct fib6_table *table;
609
610 table = rt->rt6i_table;
611 write_lock_bh(&table->tb6_lock);
612 err = fib6_add(&table->tb6_root, rt, info);
613 write_unlock_bh(&table->tb6_lock);
614
615 return err;
616 }
617
618 int ip6_ins_rt(struct rt6_info *rt)
619 {
620 struct nl_info info = {
621 .nl_net = dev_net(rt->rt6i_dev),
622 };
623 return __ip6_ins_rt(rt, &info);
624 }
625
626 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
627 struct in6_addr *saddr)
628 {
629 struct rt6_info *rt;
630
631 /*
632 * Clone the route.
633 */
634
635 rt = ip6_rt_copy(ort);
636
637 if (rt) {
638 struct neighbour *neigh;
639 int attempts = !in_softirq();
640
641 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
642 if (rt->rt6i_dst.plen != 128 &&
643 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
644 rt->rt6i_flags |= RTF_ANYCAST;
645 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
646 }
647
648 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
649 rt->rt6i_dst.plen = 128;
650 rt->rt6i_flags |= RTF_CACHE;
651 rt->dst.flags |= DST_HOST;
652
653 #ifdef CONFIG_IPV6_SUBTREES
654 if (rt->rt6i_src.plen && saddr) {
655 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
656 rt->rt6i_src.plen = 128;
657 }
658 #endif
659
660 retry:
661 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
662 if (IS_ERR(neigh)) {
663 struct net *net = dev_net(rt->rt6i_dev);
664 int saved_rt_min_interval =
665 net->ipv6.sysctl.ip6_rt_gc_min_interval;
666 int saved_rt_elasticity =
667 net->ipv6.sysctl.ip6_rt_gc_elasticity;
668
669 if (attempts-- > 0) {
670 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
671 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
672
673 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
674
675 net->ipv6.sysctl.ip6_rt_gc_elasticity =
676 saved_rt_elasticity;
677 net->ipv6.sysctl.ip6_rt_gc_min_interval =
678 saved_rt_min_interval;
679 goto retry;
680 }
681
682 if (net_ratelimit())
683 printk(KERN_WARNING
684 "ipv6: Neighbour table overflow.\n");
685 dst_free(&rt->dst);
686 return NULL;
687 }
688 rt->rt6i_nexthop = neigh;
689
690 }
691
692 return rt;
693 }
694
695 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
696 {
697 struct rt6_info *rt = ip6_rt_copy(ort);
698 if (rt) {
699 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
700 rt->rt6i_dst.plen = 128;
701 rt->rt6i_flags |= RTF_CACHE;
702 rt->dst.flags |= DST_HOST;
703 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
704 }
705 return rt;
706 }
707
708 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
709 struct flowi *fl, int flags)
710 {
711 struct fib6_node *fn;
712 struct rt6_info *rt, *nrt;
713 int strict = 0;
714 int attempts = 3;
715 int err;
716 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
717
718 strict |= flags & RT6_LOOKUP_F_IFACE;
719
720 relookup:
721 read_lock_bh(&table->tb6_lock);
722
723 restart_2:
724 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
725
726 restart:
727 rt = rt6_select(fn, oif, strict | reachable);
728
729 BACKTRACK(net, &fl->fl6_src);
730 if (rt == net->ipv6.ip6_null_entry ||
731 rt->rt6i_flags & RTF_CACHE)
732 goto out;
733
734 dst_hold(&rt->dst);
735 read_unlock_bh(&table->tb6_lock);
736
737 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
738 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
739 else
740 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
741
742 dst_release(&rt->dst);
743 rt = nrt ? : net->ipv6.ip6_null_entry;
744
745 dst_hold(&rt->dst);
746 if (nrt) {
747 err = ip6_ins_rt(nrt);
748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
756 * Race condition! In the gap, when table->tb6_lock was
757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->dst);
760 goto relookup;
761
762 out:
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
767 dst_hold(&rt->dst);
768 read_unlock_bh(&table->tb6_lock);
769 out2:
770 rt->dst.lastuse = jiffies;
771 rt->dst.__use++;
772
773 return rt;
774 }
775
776 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
777 struct flowi *fl, int flags)
778 {
779 return ip6_pol_route(net, table, fl->iif, fl, flags);
780 }
781
782 void ip6_route_input(struct sk_buff *skb)
783 {
784 struct ipv6hdr *iph = ipv6_hdr(skb);
785 struct net *net = dev_net(skb->dev);
786 int flags = RT6_LOOKUP_F_HAS_SADDR;
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .fl6_dst = iph->daddr,
790 .fl6_src = iph->saddr,
791 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
792 .mark = skb->mark,
793 .proto = iph->nexthdr,
794 };
795
796 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
797 flags |= RT6_LOOKUP_F_IFACE;
798
799 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
800 }
801
802 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
803 struct flowi *fl, int flags)
804 {
805 return ip6_pol_route(net, table, fl->oif, fl, flags);
806 }
807
808 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
809 struct flowi *fl)
810 {
811 int flags = 0;
812
813 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
814 flags |= RT6_LOOKUP_F_IFACE;
815
816 if (!ipv6_addr_any(&fl->fl6_src))
817 flags |= RT6_LOOKUP_F_HAS_SADDR;
818 else if (sk)
819 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
820
821 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
822 }
823
824 EXPORT_SYMBOL(ip6_route_output);
825
826 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
827 {
828 struct rt6_info *ort = (struct rt6_info *) *dstp;
829 struct rt6_info *rt = (struct rt6_info *)
830 dst_alloc(&ip6_dst_blackhole_ops);
831 struct dst_entry *new = NULL;
832
833 if (rt) {
834 new = &rt->dst;
835
836 atomic_set(&new->__refcnt, 1);
837 new->__use = 1;
838 new->input = dst_discard;
839 new->output = dst_discard;
840
841 dst_copy_metrics(new, &ort->dst);
842 new->dev = ort->dst.dev;
843 if (new->dev)
844 dev_hold(new->dev);
845 rt->rt6i_idev = ort->rt6i_idev;
846 if (rt->rt6i_idev)
847 in6_dev_hold(rt->rt6i_idev);
848 rt->rt6i_expires = 0;
849
850 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
851 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
852 rt->rt6i_metric = 0;
853
854 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
855 #ifdef CONFIG_IPV6_SUBTREES
856 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
857 #endif
858
859 dst_free(new);
860 }
861
862 dst_release(*dstp);
863 *dstp = new;
864 return new ? 0 : -ENOMEM;
865 }
866 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
867
868 /*
869 * Destination cache support functions
870 */
871
872 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
873 {
874 struct rt6_info *rt;
875
876 rt = (struct rt6_info *) dst;
877
878 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
879 return dst;
880
881 return NULL;
882 }
883
884 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
885 {
886 struct rt6_info *rt = (struct rt6_info *) dst;
887
888 if (rt) {
889 if (rt->rt6i_flags & RTF_CACHE) {
890 if (rt6_check_expired(rt)) {
891 ip6_del_rt(rt);
892 dst = NULL;
893 }
894 } else {
895 dst_release(dst);
896 dst = NULL;
897 }
898 }
899 return dst;
900 }
901
902 static void ip6_link_failure(struct sk_buff *skb)
903 {
904 struct rt6_info *rt;
905
906 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
907
908 rt = (struct rt6_info *) skb_dst(skb);
909 if (rt) {
910 if (rt->rt6i_flags&RTF_CACHE) {
911 dst_set_expires(&rt->dst, 0);
912 rt->rt6i_flags |= RTF_EXPIRES;
913 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
914 rt->rt6i_node->fn_sernum = -1;
915 }
916 }
917
918 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
919 {
920 struct rt6_info *rt6 = (struct rt6_info*)dst;
921
922 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
923 rt6->rt6i_flags |= RTF_MODIFIED;
924 if (mtu < IPV6_MIN_MTU) {
925 u32 features = dst_metric(dst, RTAX_FEATURES);
926 mtu = IPV6_MIN_MTU;
927 features |= RTAX_FEATURE_ALLFRAG;
928 dst_metric_set(dst, RTAX_FEATURES, features);
929 }
930 dst_metric_set(dst, RTAX_MTU, mtu);
931 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
932 }
933 }
934
935 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
936 {
937 struct net_device *dev = dst->dev;
938 unsigned int mtu = dst_mtu(dst);
939 struct net *net = dev_net(dev);
940
941 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
942
943 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
944 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
945
946 /*
947 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
948 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
949 * IPV6_MAXPLEN is also valid and means: "any MSS,
950 * rely only on pmtu discovery"
951 */
952 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
953 mtu = IPV6_MAXPLEN;
954 return mtu;
955 }
956
957 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
958 {
959 unsigned int mtu = IPV6_MIN_MTU;
960 struct inet6_dev *idev;
961
962 rcu_read_lock();
963 idev = __in6_dev_get(dst->dev);
964 if (idev)
965 mtu = idev->cnf.mtu6;
966 rcu_read_unlock();
967
968 return mtu;
969 }
970
971 static struct dst_entry *icmp6_dst_gc_list;
972 static DEFINE_SPINLOCK(icmp6_dst_lock);
973
974 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
975 struct neighbour *neigh,
976 const struct in6_addr *addr)
977 {
978 struct rt6_info *rt;
979 struct inet6_dev *idev = in6_dev_get(dev);
980 struct net *net = dev_net(dev);
981
982 if (unlikely(idev == NULL))
983 return NULL;
984
985 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
986 if (unlikely(rt == NULL)) {
987 in6_dev_put(idev);
988 goto out;
989 }
990
991 dev_hold(dev);
992 if (neigh)
993 neigh_hold(neigh);
994 else {
995 neigh = ndisc_get_neigh(dev, addr);
996 if (IS_ERR(neigh))
997 neigh = NULL;
998 }
999
1000 rt->rt6i_dev = dev;
1001 rt->rt6i_idev = idev;
1002 rt->rt6i_nexthop = neigh;
1003 atomic_set(&rt->dst.__refcnt, 1);
1004 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1005 rt->dst.output = ip6_output;
1006
1007 #if 0 /* there's no chance to use these for ndisc */
1008 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1009 ? DST_HOST
1010 : 0;
1011 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1012 rt->rt6i_dst.plen = 128;
1013 #endif
1014
1015 spin_lock_bh(&icmp6_dst_lock);
1016 rt->dst.next = icmp6_dst_gc_list;
1017 icmp6_dst_gc_list = &rt->dst;
1018 spin_unlock_bh(&icmp6_dst_lock);
1019
1020 fib6_force_start_gc(net);
1021
1022 out:
1023 return &rt->dst;
1024 }
1025
1026 int icmp6_dst_gc(void)
1027 {
1028 struct dst_entry *dst, *next, **pprev;
1029 int more = 0;
1030
1031 next = NULL;
1032
1033 spin_lock_bh(&icmp6_dst_lock);
1034 pprev = &icmp6_dst_gc_list;
1035
1036 while ((dst = *pprev) != NULL) {
1037 if (!atomic_read(&dst->__refcnt)) {
1038 *pprev = dst->next;
1039 dst_free(dst);
1040 } else {
1041 pprev = &dst->next;
1042 ++more;
1043 }
1044 }
1045
1046 spin_unlock_bh(&icmp6_dst_lock);
1047
1048 return more;
1049 }
1050
1051 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1052 void *arg)
1053 {
1054 struct dst_entry *dst, **pprev;
1055
1056 spin_lock_bh(&icmp6_dst_lock);
1057 pprev = &icmp6_dst_gc_list;
1058 while ((dst = *pprev) != NULL) {
1059 struct rt6_info *rt = (struct rt6_info *) dst;
1060 if (func(rt, arg)) {
1061 *pprev = dst->next;
1062 dst_free(dst);
1063 } else {
1064 pprev = &dst->next;
1065 }
1066 }
1067 spin_unlock_bh(&icmp6_dst_lock);
1068 }
1069
1070 static int ip6_dst_gc(struct dst_ops *ops)
1071 {
1072 unsigned long now = jiffies;
1073 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1074 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1075 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1076 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1077 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1078 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1079 int entries;
1080
1081 entries = dst_entries_get_fast(ops);
1082 if (time_after(rt_last_gc + rt_min_interval, now) &&
1083 entries <= rt_max_size)
1084 goto out;
1085
1086 net->ipv6.ip6_rt_gc_expire++;
1087 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1088 net->ipv6.ip6_rt_last_gc = now;
1089 entries = dst_entries_get_slow(ops);
1090 if (entries < ops->gc_thresh)
1091 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1092 out:
1093 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1094 return entries > rt_max_size;
1095 }
1096
1097 /* Clean host part of a prefix. Not necessary in radix tree,
1098 but results in cleaner routing tables.
1099
1100 Remove it only when all the things will work!
1101 */
1102
1103 int ip6_dst_hoplimit(struct dst_entry *dst)
1104 {
1105 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1106 if (hoplimit == 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev;
1109
1110 rcu_read_lock();
1111 idev = __in6_dev_get(dev);
1112 if (idev)
1113 hoplimit = idev->cnf.hop_limit;
1114 else
1115 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1116 rcu_read_unlock();
1117 }
1118 return hoplimit;
1119 }
1120 EXPORT_SYMBOL(ip6_dst_hoplimit);
1121
1122 /*
1123 *
1124 */
1125
1126 int ip6_route_add(struct fib6_config *cfg)
1127 {
1128 int err;
1129 struct net *net = cfg->fc_nlinfo.nl_net;
1130 struct rt6_info *rt = NULL;
1131 struct net_device *dev = NULL;
1132 struct inet6_dev *idev = NULL;
1133 struct fib6_table *table;
1134 int addr_type;
1135
1136 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1137 return -EINVAL;
1138 #ifndef CONFIG_IPV6_SUBTREES
1139 if (cfg->fc_src_len)
1140 return -EINVAL;
1141 #endif
1142 if (cfg->fc_ifindex) {
1143 err = -ENODEV;
1144 dev = dev_get_by_index(net, cfg->fc_ifindex);
1145 if (!dev)
1146 goto out;
1147 idev = in6_dev_get(dev);
1148 if (!idev)
1149 goto out;
1150 }
1151
1152 if (cfg->fc_metric == 0)
1153 cfg->fc_metric = IP6_RT_PRIO_USER;
1154
1155 table = fib6_new_table(net, cfg->fc_table);
1156 if (table == NULL) {
1157 err = -ENOBUFS;
1158 goto out;
1159 }
1160
1161 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1162
1163 if (rt == NULL) {
1164 err = -ENOMEM;
1165 goto out;
1166 }
1167
1168 rt->dst.obsolete = -1;
1169 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1170 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1171 0;
1172
1173 if (cfg->fc_protocol == RTPROT_UNSPEC)
1174 cfg->fc_protocol = RTPROT_BOOT;
1175 rt->rt6i_protocol = cfg->fc_protocol;
1176
1177 addr_type = ipv6_addr_type(&cfg->fc_dst);
1178
1179 if (addr_type & IPV6_ADDR_MULTICAST)
1180 rt->dst.input = ip6_mc_input;
1181 else if (cfg->fc_flags & RTF_LOCAL)
1182 rt->dst.input = ip6_input;
1183 else
1184 rt->dst.input = ip6_forward;
1185
1186 rt->dst.output = ip6_output;
1187
1188 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1189 rt->rt6i_dst.plen = cfg->fc_dst_len;
1190 if (rt->rt6i_dst.plen == 128)
1191 rt->dst.flags = DST_HOST;
1192
1193 #ifdef CONFIG_IPV6_SUBTREES
1194 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1195 rt->rt6i_src.plen = cfg->fc_src_len;
1196 #endif
1197
1198 rt->rt6i_metric = cfg->fc_metric;
1199
1200 /* We cannot add true routes via loopback here,
1201 they would result in kernel looping; promote them to reject routes
1202 */
1203 if ((cfg->fc_flags & RTF_REJECT) ||
1204 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1205 && !(cfg->fc_flags&RTF_LOCAL))) {
1206 /* hold loopback dev/idev if we haven't done so. */
1207 if (dev != net->loopback_dev) {
1208 if (dev) {
1209 dev_put(dev);
1210 in6_dev_put(idev);
1211 }
1212 dev = net->loopback_dev;
1213 dev_hold(dev);
1214 idev = in6_dev_get(dev);
1215 if (!idev) {
1216 err = -ENODEV;
1217 goto out;
1218 }
1219 }
1220 rt->dst.output = ip6_pkt_discard_out;
1221 rt->dst.input = ip6_pkt_discard;
1222 rt->dst.error = -ENETUNREACH;
1223 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1224 goto install_route;
1225 }
1226
1227 if (cfg->fc_flags & RTF_GATEWAY) {
1228 struct in6_addr *gw_addr;
1229 int gwa_type;
1230
1231 gw_addr = &cfg->fc_gateway;
1232 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1233 gwa_type = ipv6_addr_type(gw_addr);
1234
1235 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1236 struct rt6_info *grt;
1237
1238 /* IPv6 strictly inhibits using not link-local
1239 addresses as nexthop address.
1240 Otherwise, router will not able to send redirects.
1241 It is very good, but in some (rare!) circumstances
1242 (SIT, PtP, NBMA NOARP links) it is handy to allow
1243 some exceptions. --ANK
1244 */
1245 err = -EINVAL;
1246 if (!(gwa_type&IPV6_ADDR_UNICAST))
1247 goto out;
1248
1249 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1250
1251 err = -EHOSTUNREACH;
1252 if (grt == NULL)
1253 goto out;
1254 if (dev) {
1255 if (dev != grt->rt6i_dev) {
1256 dst_release(&grt->dst);
1257 goto out;
1258 }
1259 } else {
1260 dev = grt->rt6i_dev;
1261 idev = grt->rt6i_idev;
1262 dev_hold(dev);
1263 in6_dev_hold(grt->rt6i_idev);
1264 }
1265 if (!(grt->rt6i_flags&RTF_GATEWAY))
1266 err = 0;
1267 dst_release(&grt->dst);
1268
1269 if (err)
1270 goto out;
1271 }
1272 err = -EINVAL;
1273 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1274 goto out;
1275 }
1276
1277 err = -ENODEV;
1278 if (dev == NULL)
1279 goto out;
1280
1281 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1282 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1283 if (IS_ERR(rt->rt6i_nexthop)) {
1284 err = PTR_ERR(rt->rt6i_nexthop);
1285 rt->rt6i_nexthop = NULL;
1286 goto out;
1287 }
1288 }
1289
1290 rt->rt6i_flags = cfg->fc_flags;
1291
1292 install_route:
1293 if (cfg->fc_mx) {
1294 struct nlattr *nla;
1295 int remaining;
1296
1297 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1298 int type = nla_type(nla);
1299
1300 if (type) {
1301 if (type > RTAX_MAX) {
1302 err = -EINVAL;
1303 goto out;
1304 }
1305
1306 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1307 }
1308 }
1309 }
1310
1311 rt->dst.dev = dev;
1312 rt->rt6i_idev = idev;
1313 rt->rt6i_table = table;
1314
1315 cfg->fc_nlinfo.nl_net = dev_net(dev);
1316
1317 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1318
1319 out:
1320 if (dev)
1321 dev_put(dev);
1322 if (idev)
1323 in6_dev_put(idev);
1324 if (rt)
1325 dst_free(&rt->dst);
1326 return err;
1327 }
1328
1329 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1330 {
1331 int err;
1332 struct fib6_table *table;
1333 struct net *net = dev_net(rt->rt6i_dev);
1334
1335 if (rt == net->ipv6.ip6_null_entry)
1336 return -ENOENT;
1337
1338 table = rt->rt6i_table;
1339 write_lock_bh(&table->tb6_lock);
1340
1341 err = fib6_del(rt, info);
1342 dst_release(&rt->dst);
1343
1344 write_unlock_bh(&table->tb6_lock);
1345
1346 return err;
1347 }
1348
1349 int ip6_del_rt(struct rt6_info *rt)
1350 {
1351 struct nl_info info = {
1352 .nl_net = dev_net(rt->rt6i_dev),
1353 };
1354 return __ip6_del_rt(rt, &info);
1355 }
1356
1357 static int ip6_route_del(struct fib6_config *cfg)
1358 {
1359 struct fib6_table *table;
1360 struct fib6_node *fn;
1361 struct rt6_info *rt;
1362 int err = -ESRCH;
1363
1364 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1365 if (table == NULL)
1366 return err;
1367
1368 read_lock_bh(&table->tb6_lock);
1369
1370 fn = fib6_locate(&table->tb6_root,
1371 &cfg->fc_dst, cfg->fc_dst_len,
1372 &cfg->fc_src, cfg->fc_src_len);
1373
1374 if (fn) {
1375 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1376 if (cfg->fc_ifindex &&
1377 (rt->rt6i_dev == NULL ||
1378 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1379 continue;
1380 if (cfg->fc_flags & RTF_GATEWAY &&
1381 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1382 continue;
1383 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1384 continue;
1385 dst_hold(&rt->dst);
1386 read_unlock_bh(&table->tb6_lock);
1387
1388 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1389 }
1390 }
1391 read_unlock_bh(&table->tb6_lock);
1392
1393 return err;
1394 }
1395
1396 /*
1397 * Handle redirects
1398 */
1399 struct ip6rd_flowi {
1400 struct flowi fl;
1401 struct in6_addr gateway;
1402 };
1403
1404 static struct rt6_info *__ip6_route_redirect(struct net *net,
1405 struct fib6_table *table,
1406 struct flowi *fl,
1407 int flags)
1408 {
1409 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1410 struct rt6_info *rt;
1411 struct fib6_node *fn;
1412
1413 /*
1414 * Get the "current" route for this destination and
1415 * check if the redirect has come from approriate router.
1416 *
1417 * RFC 2461 specifies that redirects should only be
1418 * accepted if they come from the nexthop to the target.
1419 * Due to the way the routes are chosen, this notion
1420 * is a bit fuzzy and one might need to check all possible
1421 * routes.
1422 */
1423
1424 read_lock_bh(&table->tb6_lock);
1425 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1426 restart:
1427 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1428 /*
1429 * Current route is on-link; redirect is always invalid.
1430 *
1431 * Seems, previous statement is not true. It could
1432 * be node, which looks for us as on-link (f.e. proxy ndisc)
1433 * But then router serving it might decide, that we should
1434 * know truth 8)8) --ANK (980726).
1435 */
1436 if (rt6_check_expired(rt))
1437 continue;
1438 if (!(rt->rt6i_flags & RTF_GATEWAY))
1439 continue;
1440 if (fl->oif != rt->rt6i_dev->ifindex)
1441 continue;
1442 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1443 continue;
1444 break;
1445 }
1446
1447 if (!rt)
1448 rt = net->ipv6.ip6_null_entry;
1449 BACKTRACK(net, &fl->fl6_src);
1450 out:
1451 dst_hold(&rt->dst);
1452
1453 read_unlock_bh(&table->tb6_lock);
1454
1455 return rt;
1456 };
1457
1458 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1459 struct in6_addr *src,
1460 struct in6_addr *gateway,
1461 struct net_device *dev)
1462 {
1463 int flags = RT6_LOOKUP_F_HAS_SADDR;
1464 struct net *net = dev_net(dev);
1465 struct ip6rd_flowi rdfl = {
1466 .fl = {
1467 .oif = dev->ifindex,
1468 .fl6_dst = *dest,
1469 .fl6_src = *src,
1470 },
1471 };
1472
1473 ipv6_addr_copy(&rdfl.gateway, gateway);
1474
1475 if (rt6_need_strict(dest))
1476 flags |= RT6_LOOKUP_F_IFACE;
1477
1478 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1479 flags, __ip6_route_redirect);
1480 }
1481
1482 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1483 struct in6_addr *saddr,
1484 struct neighbour *neigh, u8 *lladdr, int on_link)
1485 {
1486 struct rt6_info *rt, *nrt = NULL;
1487 struct netevent_redirect netevent;
1488 struct net *net = dev_net(neigh->dev);
1489
1490 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1491
1492 if (rt == net->ipv6.ip6_null_entry) {
1493 if (net_ratelimit())
1494 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1495 "for redirect target\n");
1496 goto out;
1497 }
1498
1499 /*
1500 * We have finally decided to accept it.
1501 */
1502
1503 neigh_update(neigh, lladdr, NUD_STALE,
1504 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1505 NEIGH_UPDATE_F_OVERRIDE|
1506 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1507 NEIGH_UPDATE_F_ISROUTER))
1508 );
1509
1510 /*
1511 * Redirect received -> path was valid.
1512 * Look, redirects are sent only in response to data packets,
1513 * so that this nexthop apparently is reachable. --ANK
1514 */
1515 dst_confirm(&rt->dst);
1516
1517 /* Duplicate redirect: silently ignore. */
1518 if (neigh == rt->dst.neighbour)
1519 goto out;
1520
1521 nrt = ip6_rt_copy(rt);
1522 if (nrt == NULL)
1523 goto out;
1524
1525 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1526 if (on_link)
1527 nrt->rt6i_flags &= ~RTF_GATEWAY;
1528
1529 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1530 nrt->rt6i_dst.plen = 128;
1531 nrt->dst.flags |= DST_HOST;
1532
1533 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1534 nrt->rt6i_nexthop = neigh_clone(neigh);
1535
1536 if (ip6_ins_rt(nrt))
1537 goto out;
1538
1539 netevent.old = &rt->dst;
1540 netevent.new = &nrt->dst;
1541 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1542
1543 if (rt->rt6i_flags&RTF_CACHE) {
1544 ip6_del_rt(rt);
1545 return;
1546 }
1547
1548 out:
1549 dst_release(&rt->dst);
1550 }
1551
1552 /*
1553 * Handle ICMP "packet too big" messages
1554 * i.e. Path MTU discovery
1555 */
1556
1557 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1558 struct net *net, u32 pmtu, int ifindex)
1559 {
1560 struct rt6_info *rt, *nrt;
1561 int allfrag = 0;
1562 again:
1563 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1564 if (rt == NULL)
1565 return;
1566
1567 if (rt6_check_expired(rt)) {
1568 ip6_del_rt(rt);
1569 goto again;
1570 }
1571
1572 if (pmtu >= dst_mtu(&rt->dst))
1573 goto out;
1574
1575 if (pmtu < IPV6_MIN_MTU) {
1576 /*
1577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1581 */
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1584 }
1585
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1589 */
1590 dst_confirm(&rt->dst);
1591
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1596 */
1597 if (rt->rt6i_flags & RTF_CACHE) {
1598 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1599 if (allfrag) {
1600 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1601 features |= RTAX_FEATURE_ALLFRAG;
1602 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1603 }
1604 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out;
1607 }
1608
1609 /* Network route.
1610 Two cases are possible:
1611 1. It is connected route. Action: COW
1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 */
1614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1615 nrt = rt6_alloc_cow(rt, daddr, saddr);
1616 else
1617 nrt = rt6_alloc_clone(rt, daddr);
1618
1619 if (nrt) {
1620 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1621 if (allfrag) {
1622 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1623 features |= RTAX_FEATURE_ALLFRAG;
1624 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1625 }
1626
1627 /* According to RFC 1981, detecting PMTU increase shouldn't be
1628 * happened within 5 mins, the recommended timer is 10 mins.
1629 * Here this route expiration time is set to ip6_rt_mtu_expires
1630 * which is 10 mins. After 10 mins the decreased pmtu is expired
1631 * and detecting PMTU increase will be automatically happened.
1632 */
1633 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1634 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1635
1636 ip6_ins_rt(nrt);
1637 }
1638 out:
1639 dst_release(&rt->dst);
1640 }
1641
1642 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1643 struct net_device *dev, u32 pmtu)
1644 {
1645 struct net *net = dev_net(dev);
1646
1647 /*
1648 * RFC 1981 states that a node "MUST reduce the size of the packets it
1649 * is sending along the path" that caused the Packet Too Big message.
1650 * Since it's not possible in the general case to determine which
1651 * interface was used to send the original packet, we update the MTU
1652 * on the interface that will be used to send future packets. We also
1653 * update the MTU on the interface that received the Packet Too Big in
1654 * case the original packet was forced out that interface with
1655 * SO_BINDTODEVICE or similar. This is the next best thing to the
1656 * correct behaviour, which would be to update the MTU on all
1657 * interfaces.
1658 */
1659 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1660 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1661 }
1662
1663 /*
1664 * Misc support functions
1665 */
1666
1667 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1668 {
1669 struct net *net = dev_net(ort->rt6i_dev);
1670 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1671
1672 if (rt) {
1673 rt->dst.input = ort->dst.input;
1674 rt->dst.output = ort->dst.output;
1675
1676 dst_copy_metrics(&rt->dst, &ort->dst);
1677 rt->dst.error = ort->dst.error;
1678 rt->dst.dev = ort->dst.dev;
1679 if (rt->dst.dev)
1680 dev_hold(rt->dst.dev);
1681 rt->rt6i_idev = ort->rt6i_idev;
1682 if (rt->rt6i_idev)
1683 in6_dev_hold(rt->rt6i_idev);
1684 rt->dst.lastuse = jiffies;
1685 rt->rt6i_expires = 0;
1686
1687 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1688 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1689 rt->rt6i_metric = 0;
1690
1691 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1692 #ifdef CONFIG_IPV6_SUBTREES
1693 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1694 #endif
1695 rt->rt6i_table = ort->rt6i_table;
1696 }
1697 return rt;
1698 }
1699
1700 #ifdef CONFIG_IPV6_ROUTE_INFO
1701 static struct rt6_info *rt6_get_route_info(struct net *net,
1702 struct in6_addr *prefix, int prefixlen,
1703 struct in6_addr *gwaddr, int ifindex)
1704 {
1705 struct fib6_node *fn;
1706 struct rt6_info *rt = NULL;
1707 struct fib6_table *table;
1708
1709 table = fib6_get_table(net, RT6_TABLE_INFO);
1710 if (table == NULL)
1711 return NULL;
1712
1713 write_lock_bh(&table->tb6_lock);
1714 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1715 if (!fn)
1716 goto out;
1717
1718 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1719 if (rt->rt6i_dev->ifindex != ifindex)
1720 continue;
1721 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1722 continue;
1723 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1724 continue;
1725 dst_hold(&rt->dst);
1726 break;
1727 }
1728 out:
1729 write_unlock_bh(&table->tb6_lock);
1730 return rt;
1731 }
1732
1733 static struct rt6_info *rt6_add_route_info(struct net *net,
1734 struct in6_addr *prefix, int prefixlen,
1735 struct in6_addr *gwaddr, int ifindex,
1736 unsigned pref)
1737 {
1738 struct fib6_config cfg = {
1739 .fc_table = RT6_TABLE_INFO,
1740 .fc_metric = IP6_RT_PRIO_USER,
1741 .fc_ifindex = ifindex,
1742 .fc_dst_len = prefixlen,
1743 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1744 RTF_UP | RTF_PREF(pref),
1745 .fc_nlinfo.pid = 0,
1746 .fc_nlinfo.nlh = NULL,
1747 .fc_nlinfo.nl_net = net,
1748 };
1749
1750 ipv6_addr_copy(&cfg.fc_dst, prefix);
1751 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1752
1753 /* We should treat it as a default route if prefix length is 0. */
1754 if (!prefixlen)
1755 cfg.fc_flags |= RTF_DEFAULT;
1756
1757 ip6_route_add(&cfg);
1758
1759 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1760 }
1761 #endif
1762
1763 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1764 {
1765 struct rt6_info *rt;
1766 struct fib6_table *table;
1767
1768 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1769 if (table == NULL)
1770 return NULL;
1771
1772 write_lock_bh(&table->tb6_lock);
1773 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1774 if (dev == rt->rt6i_dev &&
1775 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1776 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1777 break;
1778 }
1779 if (rt)
1780 dst_hold(&rt->dst);
1781 write_unlock_bh(&table->tb6_lock);
1782 return rt;
1783 }
1784
1785 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1786 struct net_device *dev,
1787 unsigned int pref)
1788 {
1789 struct fib6_config cfg = {
1790 .fc_table = RT6_TABLE_DFLT,
1791 .fc_metric = IP6_RT_PRIO_USER,
1792 .fc_ifindex = dev->ifindex,
1793 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1794 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1795 .fc_nlinfo.pid = 0,
1796 .fc_nlinfo.nlh = NULL,
1797 .fc_nlinfo.nl_net = dev_net(dev),
1798 };
1799
1800 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1801
1802 ip6_route_add(&cfg);
1803
1804 return rt6_get_dflt_router(gwaddr, dev);
1805 }
1806
1807 void rt6_purge_dflt_routers(struct net *net)
1808 {
1809 struct rt6_info *rt;
1810 struct fib6_table *table;
1811
1812 /* NOTE: Keep consistent with rt6_get_dflt_router */
1813 table = fib6_get_table(net, RT6_TABLE_DFLT);
1814 if (table == NULL)
1815 return;
1816
1817 restart:
1818 read_lock_bh(&table->tb6_lock);
1819 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1820 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1821 dst_hold(&rt->dst);
1822 read_unlock_bh(&table->tb6_lock);
1823 ip6_del_rt(rt);
1824 goto restart;
1825 }
1826 }
1827 read_unlock_bh(&table->tb6_lock);
1828 }
1829
1830 static void rtmsg_to_fib6_config(struct net *net,
1831 struct in6_rtmsg *rtmsg,
1832 struct fib6_config *cfg)
1833 {
1834 memset(cfg, 0, sizeof(*cfg));
1835
1836 cfg->fc_table = RT6_TABLE_MAIN;
1837 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1838 cfg->fc_metric = rtmsg->rtmsg_metric;
1839 cfg->fc_expires = rtmsg->rtmsg_info;
1840 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1841 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1842 cfg->fc_flags = rtmsg->rtmsg_flags;
1843
1844 cfg->fc_nlinfo.nl_net = net;
1845
1846 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1847 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1848 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1849 }
1850
1851 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1852 {
1853 struct fib6_config cfg;
1854 struct in6_rtmsg rtmsg;
1855 int err;
1856
1857 switch(cmd) {
1858 case SIOCADDRT: /* Add a route */
1859 case SIOCDELRT: /* Delete a route */
1860 if (!capable(CAP_NET_ADMIN))
1861 return -EPERM;
1862 err = copy_from_user(&rtmsg, arg,
1863 sizeof(struct in6_rtmsg));
1864 if (err)
1865 return -EFAULT;
1866
1867 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1868
1869 rtnl_lock();
1870 switch (cmd) {
1871 case SIOCADDRT:
1872 err = ip6_route_add(&cfg);
1873 break;
1874 case SIOCDELRT:
1875 err = ip6_route_del(&cfg);
1876 break;
1877 default:
1878 err = -EINVAL;
1879 }
1880 rtnl_unlock();
1881
1882 return err;
1883 }
1884
1885 return -EINVAL;
1886 }
1887
1888 /*
1889 * Drop the packet on the floor
1890 */
1891
1892 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1893 {
1894 int type;
1895 struct dst_entry *dst = skb_dst(skb);
1896 switch (ipstats_mib_noroutes) {
1897 case IPSTATS_MIB_INNOROUTES:
1898 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1899 if (type == IPV6_ADDR_ANY) {
1900 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1901 IPSTATS_MIB_INADDRERRORS);
1902 break;
1903 }
1904 /* FALLTHROUGH */
1905 case IPSTATS_MIB_OUTNOROUTES:
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 ipstats_mib_noroutes);
1908 break;
1909 }
1910 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1911 kfree_skb(skb);
1912 return 0;
1913 }
1914
1915 static int ip6_pkt_discard(struct sk_buff *skb)
1916 {
1917 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1918 }
1919
1920 static int ip6_pkt_discard_out(struct sk_buff *skb)
1921 {
1922 skb->dev = skb_dst(skb)->dev;
1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1924 }
1925
1926 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1927
1928 static int ip6_pkt_prohibit(struct sk_buff *skb)
1929 {
1930 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1931 }
1932
1933 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1934 {
1935 skb->dev = skb_dst(skb)->dev;
1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1937 }
1938
1939 #endif
1940
1941 /*
1942 * Allocate a dst for local (unicast / anycast) address.
1943 */
1944
1945 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1946 const struct in6_addr *addr,
1947 int anycast)
1948 {
1949 struct net *net = dev_net(idev->dev);
1950 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1951 struct neighbour *neigh;
1952
1953 if (rt == NULL) {
1954 if (net_ratelimit())
1955 pr_warning("IPv6: Maximum number of routes reached,"
1956 " consider increasing route/max_size.\n");
1957 return ERR_PTR(-ENOMEM);
1958 }
1959
1960 dev_hold(net->loopback_dev);
1961 in6_dev_hold(idev);
1962
1963 rt->dst.flags = DST_HOST;
1964 rt->dst.input = ip6_input;
1965 rt->dst.output = ip6_output;
1966 rt->rt6i_dev = net->loopback_dev;
1967 rt->rt6i_idev = idev;
1968 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1969 rt->dst.obsolete = -1;
1970
1971 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1972 if (anycast)
1973 rt->rt6i_flags |= RTF_ANYCAST;
1974 else
1975 rt->rt6i_flags |= RTF_LOCAL;
1976 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1977 if (IS_ERR(neigh)) {
1978 dst_free(&rt->dst);
1979
1980 /* We are casting this because that is the return
1981 * value type. But an errno encoded pointer is the
1982 * same regardless of the underlying pointer type,
1983 * and that's what we are returning. So this is OK.
1984 */
1985 return (struct rt6_info *) neigh;
1986 }
1987 rt->rt6i_nexthop = neigh;
1988
1989 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1990 rt->rt6i_dst.plen = 128;
1991 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1992
1993 atomic_set(&rt->dst.__refcnt, 1);
1994
1995 return rt;
1996 }
1997
1998 struct arg_dev_net {
1999 struct net_device *dev;
2000 struct net *net;
2001 };
2002
2003 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2004 {
2005 const struct arg_dev_net *adn = arg;
2006 const struct net_device *dev = adn->dev;
2007
2008 if ((rt->rt6i_dev == dev || dev == NULL) &&
2009 rt != adn->net->ipv6.ip6_null_entry) {
2010 RT6_TRACE("deleted by ifdown %p\n", rt);
2011 return -1;
2012 }
2013 return 0;
2014 }
2015
2016 void rt6_ifdown(struct net *net, struct net_device *dev)
2017 {
2018 struct arg_dev_net adn = {
2019 .dev = dev,
2020 .net = net,
2021 };
2022
2023 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2024 icmp6_clean_all(fib6_ifdown, &adn);
2025 }
2026
2027 struct rt6_mtu_change_arg
2028 {
2029 struct net_device *dev;
2030 unsigned mtu;
2031 };
2032
2033 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2034 {
2035 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2036 struct inet6_dev *idev;
2037
2038 /* In IPv6 pmtu discovery is not optional,
2039 so that RTAX_MTU lock cannot disable it.
2040 We still use this lock to block changes
2041 caused by addrconf/ndisc.
2042 */
2043
2044 idev = __in6_dev_get(arg->dev);
2045 if (idev == NULL)
2046 return 0;
2047
2048 /* For administrative MTU increase, there is no way to discover
2049 IPv6 PMTU increase, so PMTU increase should be updated here.
2050 Since RFC 1981 doesn't include administrative MTU increase
2051 update PMTU increase is a MUST. (i.e. jumbo frame)
2052 */
2053 /*
2054 If new MTU is less than route PMTU, this new MTU will be the
2055 lowest MTU in the path, update the route PMTU to reflect PMTU
2056 decreases; if new MTU is greater than route PMTU, and the
2057 old MTU is the lowest MTU in the path, update the route PMTU
2058 to reflect the increase. In this case if the other nodes' MTU
2059 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2060 PMTU discouvery.
2061 */
2062 if (rt->rt6i_dev == arg->dev &&
2063 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2064 (dst_mtu(&rt->dst) >= arg->mtu ||
2065 (dst_mtu(&rt->dst) < arg->mtu &&
2066 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2067 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2068 }
2069 return 0;
2070 }
2071
2072 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2073 {
2074 struct rt6_mtu_change_arg arg = {
2075 .dev = dev,
2076 .mtu = mtu,
2077 };
2078
2079 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2080 }
2081
2082 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2083 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2084 [RTA_OIF] = { .type = NLA_U32 },
2085 [RTA_IIF] = { .type = NLA_U32 },
2086 [RTA_PRIORITY] = { .type = NLA_U32 },
2087 [RTA_METRICS] = { .type = NLA_NESTED },
2088 };
2089
2090 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2091 struct fib6_config *cfg)
2092 {
2093 struct rtmsg *rtm;
2094 struct nlattr *tb[RTA_MAX+1];
2095 int err;
2096
2097 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2098 if (err < 0)
2099 goto errout;
2100
2101 err = -EINVAL;
2102 rtm = nlmsg_data(nlh);
2103 memset(cfg, 0, sizeof(*cfg));
2104
2105 cfg->fc_table = rtm->rtm_table;
2106 cfg->fc_dst_len = rtm->rtm_dst_len;
2107 cfg->fc_src_len = rtm->rtm_src_len;
2108 cfg->fc_flags = RTF_UP;
2109 cfg->fc_protocol = rtm->rtm_protocol;
2110
2111 if (rtm->rtm_type == RTN_UNREACHABLE)
2112 cfg->fc_flags |= RTF_REJECT;
2113
2114 if (rtm->rtm_type == RTN_LOCAL)
2115 cfg->fc_flags |= RTF_LOCAL;
2116
2117 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2118 cfg->fc_nlinfo.nlh = nlh;
2119 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2120
2121 if (tb[RTA_GATEWAY]) {
2122 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2123 cfg->fc_flags |= RTF_GATEWAY;
2124 }
2125
2126 if (tb[RTA_DST]) {
2127 int plen = (rtm->rtm_dst_len + 7) >> 3;
2128
2129 if (nla_len(tb[RTA_DST]) < plen)
2130 goto errout;
2131
2132 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2133 }
2134
2135 if (tb[RTA_SRC]) {
2136 int plen = (rtm->rtm_src_len + 7) >> 3;
2137
2138 if (nla_len(tb[RTA_SRC]) < plen)
2139 goto errout;
2140
2141 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2142 }
2143
2144 if (tb[RTA_OIF])
2145 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2146
2147 if (tb[RTA_PRIORITY])
2148 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2149
2150 if (tb[RTA_METRICS]) {
2151 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2152 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2153 }
2154
2155 if (tb[RTA_TABLE])
2156 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2157
2158 err = 0;
2159 errout:
2160 return err;
2161 }
2162
2163 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2164 {
2165 struct fib6_config cfg;
2166 int err;
2167
2168 err = rtm_to_fib6_config(skb, nlh, &cfg);
2169 if (err < 0)
2170 return err;
2171
2172 return ip6_route_del(&cfg);
2173 }
2174
2175 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2176 {
2177 struct fib6_config cfg;
2178 int err;
2179
2180 err = rtm_to_fib6_config(skb, nlh, &cfg);
2181 if (err < 0)
2182 return err;
2183
2184 return ip6_route_add(&cfg);
2185 }
2186
2187 static inline size_t rt6_nlmsg_size(void)
2188 {
2189 return NLMSG_ALIGN(sizeof(struct rtmsg))
2190 + nla_total_size(16) /* RTA_SRC */
2191 + nla_total_size(16) /* RTA_DST */
2192 + nla_total_size(16) /* RTA_GATEWAY */
2193 + nla_total_size(16) /* RTA_PREFSRC */
2194 + nla_total_size(4) /* RTA_TABLE */
2195 + nla_total_size(4) /* RTA_IIF */
2196 + nla_total_size(4) /* RTA_OIF */
2197 + nla_total_size(4) /* RTA_PRIORITY */
2198 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2199 + nla_total_size(sizeof(struct rta_cacheinfo));
2200 }
2201
2202 static int rt6_fill_node(struct net *net,
2203 struct sk_buff *skb, struct rt6_info *rt,
2204 struct in6_addr *dst, struct in6_addr *src,
2205 int iif, int type, u32 pid, u32 seq,
2206 int prefix, int nowait, unsigned int flags)
2207 {
2208 struct rtmsg *rtm;
2209 struct nlmsghdr *nlh;
2210 long expires;
2211 u32 table;
2212
2213 if (prefix) { /* user wants prefix routes only */
2214 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2215 /* success since this is not a prefix route */
2216 return 1;
2217 }
2218 }
2219
2220 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2221 if (nlh == NULL)
2222 return -EMSGSIZE;
2223
2224 rtm = nlmsg_data(nlh);
2225 rtm->rtm_family = AF_INET6;
2226 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2227 rtm->rtm_src_len = rt->rt6i_src.plen;
2228 rtm->rtm_tos = 0;
2229 if (rt->rt6i_table)
2230 table = rt->rt6i_table->tb6_id;
2231 else
2232 table = RT6_TABLE_UNSPEC;
2233 rtm->rtm_table = table;
2234 NLA_PUT_U32(skb, RTA_TABLE, table);
2235 if (rt->rt6i_flags&RTF_REJECT)
2236 rtm->rtm_type = RTN_UNREACHABLE;
2237 else if (rt->rt6i_flags&RTF_LOCAL)
2238 rtm->rtm_type = RTN_LOCAL;
2239 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2240 rtm->rtm_type = RTN_LOCAL;
2241 else
2242 rtm->rtm_type = RTN_UNICAST;
2243 rtm->rtm_flags = 0;
2244 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2245 rtm->rtm_protocol = rt->rt6i_protocol;
2246 if (rt->rt6i_flags&RTF_DYNAMIC)
2247 rtm->rtm_protocol = RTPROT_REDIRECT;
2248 else if (rt->rt6i_flags & RTF_ADDRCONF)
2249 rtm->rtm_protocol = RTPROT_KERNEL;
2250 else if (rt->rt6i_flags&RTF_DEFAULT)
2251 rtm->rtm_protocol = RTPROT_RA;
2252
2253 if (rt->rt6i_flags&RTF_CACHE)
2254 rtm->rtm_flags |= RTM_F_CLONED;
2255
2256 if (dst) {
2257 NLA_PUT(skb, RTA_DST, 16, dst);
2258 rtm->rtm_dst_len = 128;
2259 } else if (rtm->rtm_dst_len)
2260 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2261 #ifdef CONFIG_IPV6_SUBTREES
2262 if (src) {
2263 NLA_PUT(skb, RTA_SRC, 16, src);
2264 rtm->rtm_src_len = 128;
2265 } else if (rtm->rtm_src_len)
2266 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2267 #endif
2268 if (iif) {
2269 #ifdef CONFIG_IPV6_MROUTE
2270 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2271 int err = ip6mr_get_route(net, skb, rtm, nowait);
2272 if (err <= 0) {
2273 if (!nowait) {
2274 if (err == 0)
2275 return 0;
2276 goto nla_put_failure;
2277 } else {
2278 if (err == -EMSGSIZE)
2279 goto nla_put_failure;
2280 }
2281 }
2282 } else
2283 #endif
2284 NLA_PUT_U32(skb, RTA_IIF, iif);
2285 } else if (dst) {
2286 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2287 struct in6_addr saddr_buf;
2288 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2289 dst, 0, &saddr_buf) == 0)
2290 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2291 }
2292
2293 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2294 goto nla_put_failure;
2295
2296 if (rt->dst.neighbour)
2297 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2298
2299 if (rt->dst.dev)
2300 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2301
2302 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2303
2304 if (!(rt->rt6i_flags & RTF_EXPIRES))
2305 expires = 0;
2306 else if (rt->rt6i_expires - jiffies < INT_MAX)
2307 expires = rt->rt6i_expires - jiffies;
2308 else
2309 expires = INT_MAX;
2310
2311 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2312 expires, rt->dst.error) < 0)
2313 goto nla_put_failure;
2314
2315 return nlmsg_end(skb, nlh);
2316
2317 nla_put_failure:
2318 nlmsg_cancel(skb, nlh);
2319 return -EMSGSIZE;
2320 }
2321
2322 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2323 {
2324 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2325 int prefix;
2326
2327 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2328 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2329 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2330 } else
2331 prefix = 0;
2332
2333 return rt6_fill_node(arg->net,
2334 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2335 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2336 prefix, 0, NLM_F_MULTI);
2337 }
2338
2339 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2340 {
2341 struct net *net = sock_net(in_skb->sk);
2342 struct nlattr *tb[RTA_MAX+1];
2343 struct rt6_info *rt;
2344 struct sk_buff *skb;
2345 struct rtmsg *rtm;
2346 struct flowi fl;
2347 int err, iif = 0;
2348
2349 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2350 if (err < 0)
2351 goto errout;
2352
2353 err = -EINVAL;
2354 memset(&fl, 0, sizeof(fl));
2355
2356 if (tb[RTA_SRC]) {
2357 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2358 goto errout;
2359
2360 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2361 }
2362
2363 if (tb[RTA_DST]) {
2364 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2365 goto errout;
2366
2367 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2368 }
2369
2370 if (tb[RTA_IIF])
2371 iif = nla_get_u32(tb[RTA_IIF]);
2372
2373 if (tb[RTA_OIF])
2374 fl.oif = nla_get_u32(tb[RTA_OIF]);
2375
2376 if (iif) {
2377 struct net_device *dev;
2378 dev = __dev_get_by_index(net, iif);
2379 if (!dev) {
2380 err = -ENODEV;
2381 goto errout;
2382 }
2383 }
2384
2385 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2386 if (skb == NULL) {
2387 err = -ENOBUFS;
2388 goto errout;
2389 }
2390
2391 /* Reserve room for dummy headers, this skb can pass
2392 through good chunk of routing engine.
2393 */
2394 skb_reset_mac_header(skb);
2395 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2396
2397 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2398 skb_dst_set(skb, &rt->dst);
2399
2400 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2401 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2402 nlh->nlmsg_seq, 0, 0, 0);
2403 if (err < 0) {
2404 kfree_skb(skb);
2405 goto errout;
2406 }
2407
2408 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2409 errout:
2410 return err;
2411 }
2412
2413 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2414 {
2415 struct sk_buff *skb;
2416 struct net *net = info->nl_net;
2417 u32 seq;
2418 int err;
2419
2420 err = -ENOBUFS;
2421 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2422
2423 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2424 if (skb == NULL)
2425 goto errout;
2426
2427 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2428 event, info->pid, seq, 0, 0, 0);
2429 if (err < 0) {
2430 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2431 WARN_ON(err == -EMSGSIZE);
2432 kfree_skb(skb);
2433 goto errout;
2434 }
2435 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2436 info->nlh, gfp_any());
2437 return;
2438 errout:
2439 if (err < 0)
2440 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2441 }
2442
2443 static int ip6_route_dev_notify(struct notifier_block *this,
2444 unsigned long event, void *data)
2445 {
2446 struct net_device *dev = (struct net_device *)data;
2447 struct net *net = dev_net(dev);
2448
2449 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2450 net->ipv6.ip6_null_entry->dst.dev = dev;
2451 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2452 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2453 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2454 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2455 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2456 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2457 #endif
2458 }
2459
2460 return NOTIFY_OK;
2461 }
2462
2463 /*
2464 * /proc
2465 */
2466
2467 #ifdef CONFIG_PROC_FS
2468
2469 struct rt6_proc_arg
2470 {
2471 char *buffer;
2472 int offset;
2473 int length;
2474 int skip;
2475 int len;
2476 };
2477
2478 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2479 {
2480 struct seq_file *m = p_arg;
2481
2482 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2483
2484 #ifdef CONFIG_IPV6_SUBTREES
2485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2486 #else
2487 seq_puts(m, "00000000000000000000000000000000 00 ");
2488 #endif
2489
2490 if (rt->rt6i_nexthop) {
2491 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2492 } else {
2493 seq_puts(m, "00000000000000000000000000000000");
2494 }
2495 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2496 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2497 rt->dst.__use, rt->rt6i_flags,
2498 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2499 return 0;
2500 }
2501
2502 static int ipv6_route_show(struct seq_file *m, void *v)
2503 {
2504 struct net *net = (struct net *)m->private;
2505 fib6_clean_all(net, rt6_info_route, 0, m);
2506 return 0;
2507 }
2508
2509 static int ipv6_route_open(struct inode *inode, struct file *file)
2510 {
2511 return single_open_net(inode, file, ipv6_route_show);
2512 }
2513
2514 static const struct file_operations ipv6_route_proc_fops = {
2515 .owner = THIS_MODULE,
2516 .open = ipv6_route_open,
2517 .read = seq_read,
2518 .llseek = seq_lseek,
2519 .release = single_release_net,
2520 };
2521
2522 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2523 {
2524 struct net *net = (struct net *)seq->private;
2525 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2526 net->ipv6.rt6_stats->fib_nodes,
2527 net->ipv6.rt6_stats->fib_route_nodes,
2528 net->ipv6.rt6_stats->fib_rt_alloc,
2529 net->ipv6.rt6_stats->fib_rt_entries,
2530 net->ipv6.rt6_stats->fib_rt_cache,
2531 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2532 net->ipv6.rt6_stats->fib_discarded_routes);
2533
2534 return 0;
2535 }
2536
2537 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2538 {
2539 return single_open_net(inode, file, rt6_stats_seq_show);
2540 }
2541
2542 static const struct file_operations rt6_stats_seq_fops = {
2543 .owner = THIS_MODULE,
2544 .open = rt6_stats_seq_open,
2545 .read = seq_read,
2546 .llseek = seq_lseek,
2547 .release = single_release_net,
2548 };
2549 #endif /* CONFIG_PROC_FS */
2550
2551 #ifdef CONFIG_SYSCTL
2552
2553 static
2554 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2555 void __user *buffer, size_t *lenp, loff_t *ppos)
2556 {
2557 struct net *net = current->nsproxy->net_ns;
2558 int delay = net->ipv6.sysctl.flush_delay;
2559 if (write) {
2560 proc_dointvec(ctl, write, buffer, lenp, ppos);
2561 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2562 return 0;
2563 } else
2564 return -EINVAL;
2565 }
2566
2567 ctl_table ipv6_route_table_template[] = {
2568 {
2569 .procname = "flush",
2570 .data = &init_net.ipv6.sysctl.flush_delay,
2571 .maxlen = sizeof(int),
2572 .mode = 0200,
2573 .proc_handler = ipv6_sysctl_rtcache_flush
2574 },
2575 {
2576 .procname = "gc_thresh",
2577 .data = &ip6_dst_ops_template.gc_thresh,
2578 .maxlen = sizeof(int),
2579 .mode = 0644,
2580 .proc_handler = proc_dointvec,
2581 },
2582 {
2583 .procname = "max_size",
2584 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2585 .maxlen = sizeof(int),
2586 .mode = 0644,
2587 .proc_handler = proc_dointvec,
2588 },
2589 {
2590 .procname = "gc_min_interval",
2591 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2592 .maxlen = sizeof(int),
2593 .mode = 0644,
2594 .proc_handler = proc_dointvec_jiffies,
2595 },
2596 {
2597 .procname = "gc_timeout",
2598 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2599 .maxlen = sizeof(int),
2600 .mode = 0644,
2601 .proc_handler = proc_dointvec_jiffies,
2602 },
2603 {
2604 .procname = "gc_interval",
2605 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
2608 .proc_handler = proc_dointvec_jiffies,
2609 },
2610 {
2611 .procname = "gc_elasticity",
2612 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2613 .maxlen = sizeof(int),
2614 .mode = 0644,
2615 .proc_handler = proc_dointvec,
2616 },
2617 {
2618 .procname = "mtu_expires",
2619 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2620 .maxlen = sizeof(int),
2621 .mode = 0644,
2622 .proc_handler = proc_dointvec_jiffies,
2623 },
2624 {
2625 .procname = "min_adv_mss",
2626 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2627 .maxlen = sizeof(int),
2628 .mode = 0644,
2629 .proc_handler = proc_dointvec,
2630 },
2631 {
2632 .procname = "gc_min_interval_ms",
2633 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2634 .maxlen = sizeof(int),
2635 .mode = 0644,
2636 .proc_handler = proc_dointvec_ms_jiffies,
2637 },
2638 { }
2639 };
2640
2641 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2642 {
2643 struct ctl_table *table;
2644
2645 table = kmemdup(ipv6_route_table_template,
2646 sizeof(ipv6_route_table_template),
2647 GFP_KERNEL);
2648
2649 if (table) {
2650 table[0].data = &net->ipv6.sysctl.flush_delay;
2651 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2652 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2653 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2654 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2655 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2656 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2657 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2658 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2659 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 }
2661
2662 return table;
2663 }
2664 #endif
2665
2666 static int __net_init ip6_route_net_init(struct net *net)
2667 {
2668 int ret = -ENOMEM;
2669
2670 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2671 sizeof(net->ipv6.ip6_dst_ops));
2672
2673 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2674 goto out_ip6_dst_ops;
2675
2676 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2677 sizeof(*net->ipv6.ip6_null_entry),
2678 GFP_KERNEL);
2679 if (!net->ipv6.ip6_null_entry)
2680 goto out_ip6_dst_entries;
2681 net->ipv6.ip6_null_entry->dst.path =
2682 (struct dst_entry *)net->ipv6.ip6_null_entry;
2683 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2684 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
2685
2686 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2688 sizeof(*net->ipv6.ip6_prohibit_entry),
2689 GFP_KERNEL);
2690 if (!net->ipv6.ip6_prohibit_entry)
2691 goto out_ip6_null_entry;
2692 net->ipv6.ip6_prohibit_entry->dst.path =
2693 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2694 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2695 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
2696
2697 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2698 sizeof(*net->ipv6.ip6_blk_hole_entry),
2699 GFP_KERNEL);
2700 if (!net->ipv6.ip6_blk_hole_entry)
2701 goto out_ip6_prohibit_entry;
2702 net->ipv6.ip6_blk_hole_entry->dst.path =
2703 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2704 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2705 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
2706 #endif
2707
2708 net->ipv6.sysctl.flush_delay = 0;
2709 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2710 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2711 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2712 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2713 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2714 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2715 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2716
2717 #ifdef CONFIG_PROC_FS
2718 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2719 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2720 #endif
2721 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2722
2723 ret = 0;
2724 out:
2725 return ret;
2726
2727 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2728 out_ip6_prohibit_entry:
2729 kfree(net->ipv6.ip6_prohibit_entry);
2730 out_ip6_null_entry:
2731 kfree(net->ipv6.ip6_null_entry);
2732 #endif
2733 out_ip6_dst_entries:
2734 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2735 out_ip6_dst_ops:
2736 goto out;
2737 }
2738
2739 static void __net_exit ip6_route_net_exit(struct net *net)
2740 {
2741 #ifdef CONFIG_PROC_FS
2742 proc_net_remove(net, "ipv6_route");
2743 proc_net_remove(net, "rt6_stats");
2744 #endif
2745 kfree(net->ipv6.ip6_null_entry);
2746 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2747 kfree(net->ipv6.ip6_prohibit_entry);
2748 kfree(net->ipv6.ip6_blk_hole_entry);
2749 #endif
2750 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2751 }
2752
2753 static struct pernet_operations ip6_route_net_ops = {
2754 .init = ip6_route_net_init,
2755 .exit = ip6_route_net_exit,
2756 };
2757
2758 static struct notifier_block ip6_route_dev_notifier = {
2759 .notifier_call = ip6_route_dev_notify,
2760 .priority = 0,
2761 };
2762
2763 int __init ip6_route_init(void)
2764 {
2765 int ret;
2766
2767 ret = -ENOMEM;
2768 ip6_dst_ops_template.kmem_cachep =
2769 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2770 SLAB_HWCACHE_ALIGN, NULL);
2771 if (!ip6_dst_ops_template.kmem_cachep)
2772 goto out;
2773
2774 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2775 if (ret)
2776 goto out_kmem_cache;
2777
2778 ret = register_pernet_subsys(&ip6_route_net_ops);
2779 if (ret)
2780 goto out_dst_entries;
2781
2782 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2783
2784 /* Registering of the loopback is done before this portion of code,
2785 * the loopback reference in rt6_info will not be taken, do it
2786 * manually for init_net */
2787 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2788 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2789 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2790 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2791 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2792 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2793 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2794 #endif
2795 ret = fib6_init();
2796 if (ret)
2797 goto out_register_subsys;
2798
2799 ret = xfrm6_init();
2800 if (ret)
2801 goto out_fib6_init;
2802
2803 ret = fib6_rules_init();
2804 if (ret)
2805 goto xfrm6_init;
2806
2807 ret = -ENOBUFS;
2808 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2809 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2810 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2811 goto fib6_rules_init;
2812
2813 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2814 if (ret)
2815 goto fib6_rules_init;
2816
2817 out:
2818 return ret;
2819
2820 fib6_rules_init:
2821 fib6_rules_cleanup();
2822 xfrm6_init:
2823 xfrm6_fini();
2824 out_fib6_init:
2825 fib6_gc_cleanup();
2826 out_register_subsys:
2827 unregister_pernet_subsys(&ip6_route_net_ops);
2828 out_dst_entries:
2829 dst_entries_destroy(&ip6_dst_blackhole_ops);
2830 out_kmem_cache:
2831 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2832 goto out;
2833 }
2834
2835 void ip6_route_cleanup(void)
2836 {
2837 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2838 fib6_rules_cleanup();
2839 xfrm6_fini();
2840 fib6_gc_cleanup();
2841 unregister_pernet_subsys(&ip6_route_net_ops);
2842 dst_entries_destroy(&ip6_dst_blackhole_ops);
2843 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2844 }
This page took 0.387244 seconds and 5 git commands to generate.