Merge master.kernel.org:/pub/scm/linux/kernel/git/kyle/parisc-2.6
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 /* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 * Ville Nuorvala
26 * Fixed routing subtrees.
27 */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59
60 #include <asm/uaccess.h>
61
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76
77 #define CLONE_OFFLINK_ROUTE 0
78
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void ip6_dst_destroy(struct dst_entry *);
91 static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93 static int ip6_dst_gc(void);
94
95 static int ip6_pkt_discard(struct sk_buff *skb);
96 static int ip6_pkt_discard_out(struct sk_buff *skb);
97 static void ip6_link_failure(struct sk_buff *skb);
98 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120 };
121
122 struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140 };
141
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static int ip6_pkt_blk_hole(struct sk_buff *skb);
147
148 struct rt6_info ip6_prohibit_entry = {
149 .u = {
150 .dst = {
151 .__refcnt = ATOMIC_INIT(1),
152 .__use = 1,
153 .dev = &loopback_dev,
154 .obsolete = -1,
155 .error = -EACCES,
156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
157 .input = ip6_pkt_prohibit,
158 .output = ip6_pkt_prohibit_out,
159 .ops = &ip6_dst_ops,
160 .path = (struct dst_entry*)&ip6_prohibit_entry,
161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166 };
167
168 struct rt6_info ip6_blk_hole_entry = {
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .dev = &loopback_dev,
174 .obsolete = -1,
175 .error = -EINVAL,
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
177 .input = ip6_pkt_blk_hole,
178 .output = ip6_pkt_blk_hole,
179 .ops = &ip6_dst_ops,
180 .path = (struct dst_entry*)&ip6_blk_hole_entry,
181 }
182 },
183 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
184 .rt6i_metric = ~(u32) 0,
185 .rt6i_ref = ATOMIC_INIT(1),
186 };
187
188 #endif
189
190 /* allocate dst with ip6_dst_ops */
191 static __inline__ struct rt6_info *ip6_dst_alloc(void)
192 {
193 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194 }
195
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198 struct rt6_info *rt = (struct rt6_info *)dst;
199 struct inet6_dev *idev = rt->rt6i_idev;
200
201 if (idev != NULL) {
202 rt->rt6i_idev = NULL;
203 in6_dev_put(idev);
204 }
205 }
206
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 int how)
209 {
210 struct rt6_info *rt = (struct rt6_info *)dst;
211 struct inet6_dev *idev = rt->rt6i_idev;
212
213 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220 }
221
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226 }
227
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230 return (ipv6_addr_type(daddr) &
231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 }
233
234 /*
235 * Route lookup. Any table->tb6_lock is implied.
236 */
237
238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 int oif,
240 int strict)
241 {
242 struct rt6_info *local = NULL;
243 struct rt6_info *sprt;
244
245 if (oif) {
246 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
247 struct net_device *dev = sprt->rt6i_dev;
248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
253 if (strict && oif)
254 continue;
255 if (local && (!oif ||
256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
261 }
262
263 if (local)
264 return local;
265
266 if (strict)
267 return &ip6_null_entry;
268 }
269 return rt;
270 }
271
272 #ifdef CONFIG_IPV6_ROUTER_PREF
273 static void rt6_probe(struct rt6_info *rt)
274 {
275 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 /*
277 * Okay, this does not seem to be appropriate
278 * for now, however, we need to check if it
279 * is really so; aka Router Reachability Probing.
280 *
281 * Router Reachability Probe MUST be rate-limited
282 * to no more than one per minute.
283 */
284 if (!neigh || (neigh->nud_state & NUD_VALID))
285 return;
286 read_lock_bh(&neigh->lock);
287 if (!(neigh->nud_state & NUD_VALID) &&
288 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
289 struct in6_addr mcaddr;
290 struct in6_addr *target;
291
292 neigh->updated = jiffies;
293 read_unlock_bh(&neigh->lock);
294
295 target = (struct in6_addr *)&neigh->primary_key;
296 addrconf_addr_solict_mult(target, &mcaddr);
297 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 } else
299 read_unlock_bh(&neigh->lock);
300 }
301 #else
302 static inline void rt6_probe(struct rt6_info *rt)
303 {
304 return;
305 }
306 #endif
307
308 /*
309 * Default Router Selection (RFC 2461 6.3.6)
310 */
311 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
312 {
313 struct net_device *dev = rt->rt6i_dev;
314 int ret = 0;
315
316 if (!oif)
317 return 2;
318 if (dev->flags & IFF_LOOPBACK) {
319 if (!WARN_ON(rt->rt6i_idev == NULL) &&
320 rt->rt6i_idev->dev->ifindex == oif)
321 ret = 1;
322 else
323 return 0;
324 }
325 if (dev->ifindex == oif)
326 return 2;
327
328 return ret;
329 }
330
331 static int inline rt6_check_neigh(struct rt6_info *rt)
332 {
333 struct neighbour *neigh = rt->rt6i_nexthop;
334 int m = 0;
335 if (rt->rt6i_flags & RTF_NONEXTHOP ||
336 !(rt->rt6i_flags & RTF_GATEWAY))
337 m = 1;
338 else if (neigh) {
339 read_lock_bh(&neigh->lock);
340 if (neigh->nud_state & NUD_VALID)
341 m = 2;
342 else if (!(neigh->nud_state & NUD_FAILED))
343 m = 1;
344 read_unlock_bh(&neigh->lock);
345 }
346 return m;
347 }
348
349 static int rt6_score_route(struct rt6_info *rt, int oif,
350 int strict)
351 {
352 int m, n;
353
354 m = rt6_check_dev(rt, oif);
355 if (!m && (strict & RT6_LOOKUP_F_IFACE))
356 return -1;
357 #ifdef CONFIG_IPV6_ROUTER_PREF
358 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
359 #endif
360 n = rt6_check_neigh(rt);
361 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
362 return -1;
363 return m;
364 }
365
366 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
367 int strict)
368 {
369 struct rt6_info *match = NULL, *last = NULL;
370 struct rt6_info *rt, *rt0 = *head;
371 u32 metric;
372 int mpri = -1;
373
374 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
375 __FUNCTION__, head, head ? *head : NULL, oif);
376
377 for (rt = rt0, metric = rt0->rt6i_metric;
378 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
379 rt = rt->u.dst.rt6_next) {
380 int m;
381
382 if (rt6_check_expired(rt))
383 continue;
384
385 last = rt;
386
387 m = rt6_score_route(rt, oif, strict);
388 if (m < 0)
389 continue;
390
391 if (m > mpri) {
392 if (strict & RT6_LOOKUP_F_REACHABLE)
393 rt6_probe(match);
394 match = rt;
395 mpri = m;
396 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
397 rt6_probe(rt);
398 }
399 }
400
401 if (!match &&
402 (strict & RT6_LOOKUP_F_REACHABLE) &&
403 last && last != rt0) {
404 /* no entries matched; do round-robin */
405 static DEFINE_SPINLOCK(lock);
406 spin_lock(&lock);
407 *head = rt0->u.dst.rt6_next;
408 rt0->u.dst.rt6_next = last->u.dst.rt6_next;
409 last->u.dst.rt6_next = rt0;
410 spin_unlock(&lock);
411 }
412
413 RT6_TRACE("%s() => %p, score=%d\n",
414 __FUNCTION__, match, mpri);
415
416 return (match ? match : &ip6_null_entry);
417 }
418
419 #ifdef CONFIG_IPV6_ROUTE_INFO
420 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
421 struct in6_addr *gwaddr)
422 {
423 struct route_info *rinfo = (struct route_info *) opt;
424 struct in6_addr prefix_buf, *prefix;
425 unsigned int pref;
426 u32 lifetime;
427 struct rt6_info *rt;
428
429 if (len < sizeof(struct route_info)) {
430 return -EINVAL;
431 }
432
433 /* Sanity check for prefix_len and length */
434 if (rinfo->length > 3) {
435 return -EINVAL;
436 } else if (rinfo->prefix_len > 128) {
437 return -EINVAL;
438 } else if (rinfo->prefix_len > 64) {
439 if (rinfo->length < 2) {
440 return -EINVAL;
441 }
442 } else if (rinfo->prefix_len > 0) {
443 if (rinfo->length < 1) {
444 return -EINVAL;
445 }
446 }
447
448 pref = rinfo->route_pref;
449 if (pref == ICMPV6_ROUTER_PREF_INVALID)
450 pref = ICMPV6_ROUTER_PREF_MEDIUM;
451
452 lifetime = ntohl(rinfo->lifetime);
453 if (lifetime == 0xffffffff) {
454 /* infinity */
455 } else if (lifetime > 0x7fffffff/HZ) {
456 /* Avoid arithmetic overflow */
457 lifetime = 0x7fffffff/HZ - 1;
458 }
459
460 if (rinfo->length == 3)
461 prefix = (struct in6_addr *)rinfo->prefix;
462 else {
463 /* this function is safe */
464 ipv6_addr_prefix(&prefix_buf,
465 (struct in6_addr *)rinfo->prefix,
466 rinfo->prefix_len);
467 prefix = &prefix_buf;
468 }
469
470 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
471
472 if (rt && !lifetime) {
473 ip6_del_rt(rt);
474 rt = NULL;
475 }
476
477 if (!rt && lifetime)
478 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
479 pref);
480 else if (rt)
481 rt->rt6i_flags = RTF_ROUTEINFO |
482 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
483
484 if (rt) {
485 if (lifetime == 0xffffffff) {
486 rt->rt6i_flags &= ~RTF_EXPIRES;
487 } else {
488 rt->rt6i_expires = jiffies + HZ * lifetime;
489 rt->rt6i_flags |= RTF_EXPIRES;
490 }
491 dst_release(&rt->u.dst);
492 }
493 return 0;
494 }
495 #endif
496
497 #define BACKTRACK(saddr) \
498 do { \
499 if (rt == &ip6_null_entry) { \
500 struct fib6_node *pn; \
501 while (1) { \
502 if (fn->fn_flags & RTN_TL_ROOT) \
503 goto out; \
504 pn = fn->parent; \
505 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
506 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
507 else \
508 fn = pn; \
509 if (fn->fn_flags & RTN_RTINFO) \
510 goto restart; \
511 } \
512 } \
513 } while(0)
514
515 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
516 struct flowi *fl, int flags)
517 {
518 struct fib6_node *fn;
519 struct rt6_info *rt;
520
521 read_lock_bh(&table->tb6_lock);
522 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
523 restart:
524 rt = fn->leaf;
525 rt = rt6_device_match(rt, fl->oif, flags);
526 BACKTRACK(&fl->fl6_src);
527 out:
528 dst_hold(&rt->u.dst);
529 read_unlock_bh(&table->tb6_lock);
530
531 rt->u.dst.lastuse = jiffies;
532 rt->u.dst.__use++;
533
534 return rt;
535
536 }
537
538 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
539 int oif, int strict)
540 {
541 struct flowi fl = {
542 .oif = oif,
543 .nl_u = {
544 .ip6_u = {
545 .daddr = *daddr,
546 },
547 },
548 };
549 struct dst_entry *dst;
550 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
551
552 if (saddr) {
553 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
554 flags |= RT6_LOOKUP_F_HAS_SADDR;
555 }
556
557 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
558 if (dst->error == 0)
559 return (struct rt6_info *) dst;
560
561 dst_release(dst);
562
563 return NULL;
564 }
565
566 /* ip6_ins_rt is called with FREE table->tb6_lock.
567 It takes new route entry, the addition fails by any reason the
568 route is freed. In any case, if caller does not hold it, it may
569 be destroyed.
570 */
571
572 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
573 {
574 int err;
575 struct fib6_table *table;
576
577 table = rt->rt6i_table;
578 write_lock_bh(&table->tb6_lock);
579 err = fib6_add(&table->tb6_root, rt, info);
580 write_unlock_bh(&table->tb6_lock);
581
582 return err;
583 }
584
585 int ip6_ins_rt(struct rt6_info *rt)
586 {
587 return __ip6_ins_rt(rt, NULL);
588 }
589
590 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
591 struct in6_addr *saddr)
592 {
593 struct rt6_info *rt;
594
595 /*
596 * Clone the route.
597 */
598
599 rt = ip6_rt_copy(ort);
600
601 if (rt) {
602 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
603 if (rt->rt6i_dst.plen != 128 &&
604 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
605 rt->rt6i_flags |= RTF_ANYCAST;
606 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
607 }
608
609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
610 rt->rt6i_dst.plen = 128;
611 rt->rt6i_flags |= RTF_CACHE;
612 rt->u.dst.flags |= DST_HOST;
613
614 #ifdef CONFIG_IPV6_SUBTREES
615 if (rt->rt6i_src.plen && saddr) {
616 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
617 rt->rt6i_src.plen = 128;
618 }
619 #endif
620
621 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
622
623 }
624
625 return rt;
626 }
627
628 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
629 {
630 struct rt6_info *rt = ip6_rt_copy(ort);
631 if (rt) {
632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
633 rt->rt6i_dst.plen = 128;
634 rt->rt6i_flags |= RTF_CACHE;
635 rt->u.dst.flags |= DST_HOST;
636 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
637 }
638 return rt;
639 }
640
641 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
642 struct flowi *fl, int flags)
643 {
644 struct fib6_node *fn;
645 struct rt6_info *rt, *nrt;
646 int strict = 0;
647 int attempts = 3;
648 int err;
649 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
650
651 strict |= flags & RT6_LOOKUP_F_IFACE;
652
653 relookup:
654 read_lock_bh(&table->tb6_lock);
655
656 restart_2:
657 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
658
659 restart:
660 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
661 BACKTRACK(&fl->fl6_src);
662 if (rt == &ip6_null_entry ||
663 rt->rt6i_flags & RTF_CACHE)
664 goto out;
665
666 dst_hold(&rt->u.dst);
667 read_unlock_bh(&table->tb6_lock);
668
669 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
670 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
671 else {
672 #if CLONE_OFFLINK_ROUTE
673 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
674 #else
675 goto out2;
676 #endif
677 }
678
679 dst_release(&rt->u.dst);
680 rt = nrt ? : &ip6_null_entry;
681
682 dst_hold(&rt->u.dst);
683 if (nrt) {
684 err = ip6_ins_rt(nrt);
685 if (!err)
686 goto out2;
687 }
688
689 if (--attempts <= 0)
690 goto out2;
691
692 /*
693 * Race condition! In the gap, when table->tb6_lock was
694 * released someone could insert this route. Relookup.
695 */
696 dst_release(&rt->u.dst);
697 goto relookup;
698
699 out:
700 if (reachable) {
701 reachable = 0;
702 goto restart_2;
703 }
704 dst_hold(&rt->u.dst);
705 read_unlock_bh(&table->tb6_lock);
706 out2:
707 rt->u.dst.lastuse = jiffies;
708 rt->u.dst.__use++;
709
710 return rt;
711 }
712
713 void ip6_route_input(struct sk_buff *skb)
714 {
715 struct ipv6hdr *iph = skb->nh.ipv6h;
716 int flags = RT6_LOOKUP_F_HAS_SADDR;
717 struct flowi fl = {
718 .iif = skb->dev->ifindex,
719 .nl_u = {
720 .ip6_u = {
721 .daddr = iph->daddr,
722 .saddr = iph->saddr,
723 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
724 },
725 },
726 .mark = skb->mark,
727 .proto = iph->nexthdr,
728 };
729
730 if (rt6_need_strict(&iph->daddr))
731 flags |= RT6_LOOKUP_F_IFACE;
732
733 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
734 }
735
736 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
737 struct flowi *fl, int flags)
738 {
739 struct fib6_node *fn;
740 struct rt6_info *rt, *nrt;
741 int strict = 0;
742 int attempts = 3;
743 int err;
744 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
745
746 strict |= flags & RT6_LOOKUP_F_IFACE;
747
748 relookup:
749 read_lock_bh(&table->tb6_lock);
750
751 restart_2:
752 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
753
754 restart:
755 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
756 BACKTRACK(&fl->fl6_src);
757 if (rt == &ip6_null_entry ||
758 rt->rt6i_flags & RTF_CACHE)
759 goto out;
760
761 dst_hold(&rt->u.dst);
762 read_unlock_bh(&table->tb6_lock);
763
764 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
765 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
766 else {
767 #if CLONE_OFFLINK_ROUTE
768 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
769 #else
770 goto out2;
771 #endif
772 }
773
774 dst_release(&rt->u.dst);
775 rt = nrt ? : &ip6_null_entry;
776
777 dst_hold(&rt->u.dst);
778 if (nrt) {
779 err = ip6_ins_rt(nrt);
780 if (!err)
781 goto out2;
782 }
783
784 if (--attempts <= 0)
785 goto out2;
786
787 /*
788 * Race condition! In the gap, when table->tb6_lock was
789 * released someone could insert this route. Relookup.
790 */
791 dst_release(&rt->u.dst);
792 goto relookup;
793
794 out:
795 if (reachable) {
796 reachable = 0;
797 goto restart_2;
798 }
799 dst_hold(&rt->u.dst);
800 read_unlock_bh(&table->tb6_lock);
801 out2:
802 rt->u.dst.lastuse = jiffies;
803 rt->u.dst.__use++;
804 return rt;
805 }
806
807 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
808 {
809 int flags = 0;
810
811 if (rt6_need_strict(&fl->fl6_dst))
812 flags |= RT6_LOOKUP_F_IFACE;
813
814 if (!ipv6_addr_any(&fl->fl6_src))
815 flags |= RT6_LOOKUP_F_HAS_SADDR;
816
817 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
818 }
819
820
821 /*
822 * Destination cache support functions
823 */
824
825 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
826 {
827 struct rt6_info *rt;
828
829 rt = (struct rt6_info *) dst;
830
831 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
832 return dst;
833
834 return NULL;
835 }
836
837 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
838 {
839 struct rt6_info *rt = (struct rt6_info *) dst;
840
841 if (rt) {
842 if (rt->rt6i_flags & RTF_CACHE)
843 ip6_del_rt(rt);
844 else
845 dst_release(dst);
846 }
847 return NULL;
848 }
849
850 static void ip6_link_failure(struct sk_buff *skb)
851 {
852 struct rt6_info *rt;
853
854 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
855
856 rt = (struct rt6_info *) skb->dst;
857 if (rt) {
858 if (rt->rt6i_flags&RTF_CACHE) {
859 dst_set_expires(&rt->u.dst, 0);
860 rt->rt6i_flags |= RTF_EXPIRES;
861 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
862 rt->rt6i_node->fn_sernum = -1;
863 }
864 }
865
866 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
867 {
868 struct rt6_info *rt6 = (struct rt6_info*)dst;
869
870 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
871 rt6->rt6i_flags |= RTF_MODIFIED;
872 if (mtu < IPV6_MIN_MTU) {
873 mtu = IPV6_MIN_MTU;
874 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
875 }
876 dst->metrics[RTAX_MTU-1] = mtu;
877 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
878 }
879 }
880
881 static int ipv6_get_mtu(struct net_device *dev);
882
883 static inline unsigned int ipv6_advmss(unsigned int mtu)
884 {
885 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
886
887 if (mtu < ip6_rt_min_advmss)
888 mtu = ip6_rt_min_advmss;
889
890 /*
891 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
892 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
893 * IPV6_MAXPLEN is also valid and means: "any MSS,
894 * rely only on pmtu discovery"
895 */
896 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
897 mtu = IPV6_MAXPLEN;
898 return mtu;
899 }
900
901 static struct dst_entry *ndisc_dst_gc_list;
902 static DEFINE_SPINLOCK(ndisc_lock);
903
904 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
905 struct neighbour *neigh,
906 struct in6_addr *addr,
907 int (*output)(struct sk_buff *))
908 {
909 struct rt6_info *rt;
910 struct inet6_dev *idev = in6_dev_get(dev);
911
912 if (unlikely(idev == NULL))
913 return NULL;
914
915 rt = ip6_dst_alloc();
916 if (unlikely(rt == NULL)) {
917 in6_dev_put(idev);
918 goto out;
919 }
920
921 dev_hold(dev);
922 if (neigh)
923 neigh_hold(neigh);
924 else
925 neigh = ndisc_get_neigh(dev, addr);
926
927 rt->rt6i_dev = dev;
928 rt->rt6i_idev = idev;
929 rt->rt6i_nexthop = neigh;
930 atomic_set(&rt->u.dst.__refcnt, 1);
931 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
932 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
933 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
934 rt->u.dst.output = output;
935
936 #if 0 /* there's no chance to use these for ndisc */
937 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
938 ? DST_HOST
939 : 0;
940 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
941 rt->rt6i_dst.plen = 128;
942 #endif
943
944 spin_lock_bh(&ndisc_lock);
945 rt->u.dst.next = ndisc_dst_gc_list;
946 ndisc_dst_gc_list = &rt->u.dst;
947 spin_unlock_bh(&ndisc_lock);
948
949 fib6_force_start_gc();
950
951 out:
952 return &rt->u.dst;
953 }
954
955 int ndisc_dst_gc(int *more)
956 {
957 struct dst_entry *dst, *next, **pprev;
958 int freed;
959
960 next = NULL;
961 freed = 0;
962
963 spin_lock_bh(&ndisc_lock);
964 pprev = &ndisc_dst_gc_list;
965
966 while ((dst = *pprev) != NULL) {
967 if (!atomic_read(&dst->__refcnt)) {
968 *pprev = dst->next;
969 dst_free(dst);
970 freed++;
971 } else {
972 pprev = &dst->next;
973 (*more)++;
974 }
975 }
976
977 spin_unlock_bh(&ndisc_lock);
978
979 return freed;
980 }
981
982 static int ip6_dst_gc(void)
983 {
984 static unsigned expire = 30*HZ;
985 static unsigned long last_gc;
986 unsigned long now = jiffies;
987
988 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
989 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
990 goto out;
991
992 expire++;
993 fib6_run_gc(expire);
994 last_gc = now;
995 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
996 expire = ip6_rt_gc_timeout>>1;
997
998 out:
999 expire -= expire>>ip6_rt_gc_elasticity;
1000 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1001 }
1002
1003 /* Clean host part of a prefix. Not necessary in radix tree,
1004 but results in cleaner routing tables.
1005
1006 Remove it only when all the things will work!
1007 */
1008
1009 static int ipv6_get_mtu(struct net_device *dev)
1010 {
1011 int mtu = IPV6_MIN_MTU;
1012 struct inet6_dev *idev;
1013
1014 idev = in6_dev_get(dev);
1015 if (idev) {
1016 mtu = idev->cnf.mtu6;
1017 in6_dev_put(idev);
1018 }
1019 return mtu;
1020 }
1021
1022 int ipv6_get_hoplimit(struct net_device *dev)
1023 {
1024 int hoplimit = ipv6_devconf.hop_limit;
1025 struct inet6_dev *idev;
1026
1027 idev = in6_dev_get(dev);
1028 if (idev) {
1029 hoplimit = idev->cnf.hop_limit;
1030 in6_dev_put(idev);
1031 }
1032 return hoplimit;
1033 }
1034
1035 /*
1036 *
1037 */
1038
1039 int ip6_route_add(struct fib6_config *cfg)
1040 {
1041 int err;
1042 struct rt6_info *rt = NULL;
1043 struct net_device *dev = NULL;
1044 struct inet6_dev *idev = NULL;
1045 struct fib6_table *table;
1046 int addr_type;
1047
1048 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1049 return -EINVAL;
1050 #ifndef CONFIG_IPV6_SUBTREES
1051 if (cfg->fc_src_len)
1052 return -EINVAL;
1053 #endif
1054 if (cfg->fc_ifindex) {
1055 err = -ENODEV;
1056 dev = dev_get_by_index(cfg->fc_ifindex);
1057 if (!dev)
1058 goto out;
1059 idev = in6_dev_get(dev);
1060 if (!idev)
1061 goto out;
1062 }
1063
1064 if (cfg->fc_metric == 0)
1065 cfg->fc_metric = IP6_RT_PRIO_USER;
1066
1067 table = fib6_new_table(cfg->fc_table);
1068 if (table == NULL) {
1069 err = -ENOBUFS;
1070 goto out;
1071 }
1072
1073 rt = ip6_dst_alloc();
1074
1075 if (rt == NULL) {
1076 err = -ENOMEM;
1077 goto out;
1078 }
1079
1080 rt->u.dst.obsolete = -1;
1081 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1082
1083 if (cfg->fc_protocol == RTPROT_UNSPEC)
1084 cfg->fc_protocol = RTPROT_BOOT;
1085 rt->rt6i_protocol = cfg->fc_protocol;
1086
1087 addr_type = ipv6_addr_type(&cfg->fc_dst);
1088
1089 if (addr_type & IPV6_ADDR_MULTICAST)
1090 rt->u.dst.input = ip6_mc_input;
1091 else
1092 rt->u.dst.input = ip6_forward;
1093
1094 rt->u.dst.output = ip6_output;
1095
1096 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1097 rt->rt6i_dst.plen = cfg->fc_dst_len;
1098 if (rt->rt6i_dst.plen == 128)
1099 rt->u.dst.flags = DST_HOST;
1100
1101 #ifdef CONFIG_IPV6_SUBTREES
1102 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1103 rt->rt6i_src.plen = cfg->fc_src_len;
1104 #endif
1105
1106 rt->rt6i_metric = cfg->fc_metric;
1107
1108 /* We cannot add true routes via loopback here,
1109 they would result in kernel looping; promote them to reject routes
1110 */
1111 if ((cfg->fc_flags & RTF_REJECT) ||
1112 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1113 /* hold loopback dev/idev if we haven't done so. */
1114 if (dev != &loopback_dev) {
1115 if (dev) {
1116 dev_put(dev);
1117 in6_dev_put(idev);
1118 }
1119 dev = &loopback_dev;
1120 dev_hold(dev);
1121 idev = in6_dev_get(dev);
1122 if (!idev) {
1123 err = -ENODEV;
1124 goto out;
1125 }
1126 }
1127 rt->u.dst.output = ip6_pkt_discard_out;
1128 rt->u.dst.input = ip6_pkt_discard;
1129 rt->u.dst.error = -ENETUNREACH;
1130 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1131 goto install_route;
1132 }
1133
1134 if (cfg->fc_flags & RTF_GATEWAY) {
1135 struct in6_addr *gw_addr;
1136 int gwa_type;
1137
1138 gw_addr = &cfg->fc_gateway;
1139 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1140 gwa_type = ipv6_addr_type(gw_addr);
1141
1142 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1143 struct rt6_info *grt;
1144
1145 /* IPv6 strictly inhibits using not link-local
1146 addresses as nexthop address.
1147 Otherwise, router will not able to send redirects.
1148 It is very good, but in some (rare!) circumstances
1149 (SIT, PtP, NBMA NOARP links) it is handy to allow
1150 some exceptions. --ANK
1151 */
1152 err = -EINVAL;
1153 if (!(gwa_type&IPV6_ADDR_UNICAST))
1154 goto out;
1155
1156 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1157
1158 err = -EHOSTUNREACH;
1159 if (grt == NULL)
1160 goto out;
1161 if (dev) {
1162 if (dev != grt->rt6i_dev) {
1163 dst_release(&grt->u.dst);
1164 goto out;
1165 }
1166 } else {
1167 dev = grt->rt6i_dev;
1168 idev = grt->rt6i_idev;
1169 dev_hold(dev);
1170 in6_dev_hold(grt->rt6i_idev);
1171 }
1172 if (!(grt->rt6i_flags&RTF_GATEWAY))
1173 err = 0;
1174 dst_release(&grt->u.dst);
1175
1176 if (err)
1177 goto out;
1178 }
1179 err = -EINVAL;
1180 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1181 goto out;
1182 }
1183
1184 err = -ENODEV;
1185 if (dev == NULL)
1186 goto out;
1187
1188 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1189 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1190 if (IS_ERR(rt->rt6i_nexthop)) {
1191 err = PTR_ERR(rt->rt6i_nexthop);
1192 rt->rt6i_nexthop = NULL;
1193 goto out;
1194 }
1195 }
1196
1197 rt->rt6i_flags = cfg->fc_flags;
1198
1199 install_route:
1200 if (cfg->fc_mx) {
1201 struct nlattr *nla;
1202 int remaining;
1203
1204 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1205 int type = nla->nla_type;
1206
1207 if (type) {
1208 if (type > RTAX_MAX) {
1209 err = -EINVAL;
1210 goto out;
1211 }
1212
1213 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1214 }
1215 }
1216 }
1217
1218 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1219 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1220 if (!rt->u.dst.metrics[RTAX_MTU-1])
1221 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1222 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1223 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1224 rt->u.dst.dev = dev;
1225 rt->rt6i_idev = idev;
1226 rt->rt6i_table = table;
1227 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1228
1229 out:
1230 if (dev)
1231 dev_put(dev);
1232 if (idev)
1233 in6_dev_put(idev);
1234 if (rt)
1235 dst_free(&rt->u.dst);
1236 return err;
1237 }
1238
1239 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1240 {
1241 int err;
1242 struct fib6_table *table;
1243
1244 if (rt == &ip6_null_entry)
1245 return -ENOENT;
1246
1247 table = rt->rt6i_table;
1248 write_lock_bh(&table->tb6_lock);
1249
1250 err = fib6_del(rt, info);
1251 dst_release(&rt->u.dst);
1252
1253 write_unlock_bh(&table->tb6_lock);
1254
1255 return err;
1256 }
1257
1258 int ip6_del_rt(struct rt6_info *rt)
1259 {
1260 return __ip6_del_rt(rt, NULL);
1261 }
1262
1263 static int ip6_route_del(struct fib6_config *cfg)
1264 {
1265 struct fib6_table *table;
1266 struct fib6_node *fn;
1267 struct rt6_info *rt;
1268 int err = -ESRCH;
1269
1270 table = fib6_get_table(cfg->fc_table);
1271 if (table == NULL)
1272 return err;
1273
1274 read_lock_bh(&table->tb6_lock);
1275
1276 fn = fib6_locate(&table->tb6_root,
1277 &cfg->fc_dst, cfg->fc_dst_len,
1278 &cfg->fc_src, cfg->fc_src_len);
1279
1280 if (fn) {
1281 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1282 if (cfg->fc_ifindex &&
1283 (rt->rt6i_dev == NULL ||
1284 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1285 continue;
1286 if (cfg->fc_flags & RTF_GATEWAY &&
1287 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1288 continue;
1289 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1290 continue;
1291 dst_hold(&rt->u.dst);
1292 read_unlock_bh(&table->tb6_lock);
1293
1294 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1295 }
1296 }
1297 read_unlock_bh(&table->tb6_lock);
1298
1299 return err;
1300 }
1301
1302 /*
1303 * Handle redirects
1304 */
1305 struct ip6rd_flowi {
1306 struct flowi fl;
1307 struct in6_addr gateway;
1308 };
1309
1310 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1311 struct flowi *fl,
1312 int flags)
1313 {
1314 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1315 struct rt6_info *rt;
1316 struct fib6_node *fn;
1317
1318 /*
1319 * Get the "current" route for this destination and
1320 * check if the redirect has come from approriate router.
1321 *
1322 * RFC 2461 specifies that redirects should only be
1323 * accepted if they come from the nexthop to the target.
1324 * Due to the way the routes are chosen, this notion
1325 * is a bit fuzzy and one might need to check all possible
1326 * routes.
1327 */
1328
1329 read_lock_bh(&table->tb6_lock);
1330 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1331 restart:
1332 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1333 /*
1334 * Current route is on-link; redirect is always invalid.
1335 *
1336 * Seems, previous statement is not true. It could
1337 * be node, which looks for us as on-link (f.e. proxy ndisc)
1338 * But then router serving it might decide, that we should
1339 * know truth 8)8) --ANK (980726).
1340 */
1341 if (rt6_check_expired(rt))
1342 continue;
1343 if (!(rt->rt6i_flags & RTF_GATEWAY))
1344 continue;
1345 if (fl->oif != rt->rt6i_dev->ifindex)
1346 continue;
1347 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1348 continue;
1349 break;
1350 }
1351
1352 if (!rt)
1353 rt = &ip6_null_entry;
1354 BACKTRACK(&fl->fl6_src);
1355 out:
1356 dst_hold(&rt->u.dst);
1357
1358 read_unlock_bh(&table->tb6_lock);
1359
1360 return rt;
1361 };
1362
1363 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1364 struct in6_addr *src,
1365 struct in6_addr *gateway,
1366 struct net_device *dev)
1367 {
1368 int flags = RT6_LOOKUP_F_HAS_SADDR;
1369 struct ip6rd_flowi rdfl = {
1370 .fl = {
1371 .oif = dev->ifindex,
1372 .nl_u = {
1373 .ip6_u = {
1374 .daddr = *dest,
1375 .saddr = *src,
1376 },
1377 },
1378 },
1379 .gateway = *gateway,
1380 };
1381
1382 if (rt6_need_strict(dest))
1383 flags |= RT6_LOOKUP_F_IFACE;
1384
1385 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1386 }
1387
1388 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1389 struct in6_addr *saddr,
1390 struct neighbour *neigh, u8 *lladdr, int on_link)
1391 {
1392 struct rt6_info *rt, *nrt = NULL;
1393 struct netevent_redirect netevent;
1394
1395 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1396
1397 if (rt == &ip6_null_entry) {
1398 if (net_ratelimit())
1399 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1400 "for redirect target\n");
1401 goto out;
1402 }
1403
1404 /*
1405 * We have finally decided to accept it.
1406 */
1407
1408 neigh_update(neigh, lladdr, NUD_STALE,
1409 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1410 NEIGH_UPDATE_F_OVERRIDE|
1411 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1412 NEIGH_UPDATE_F_ISROUTER))
1413 );
1414
1415 /*
1416 * Redirect received -> path was valid.
1417 * Look, redirects are sent only in response to data packets,
1418 * so that this nexthop apparently is reachable. --ANK
1419 */
1420 dst_confirm(&rt->u.dst);
1421
1422 /* Duplicate redirect: silently ignore. */
1423 if (neigh == rt->u.dst.neighbour)
1424 goto out;
1425
1426 nrt = ip6_rt_copy(rt);
1427 if (nrt == NULL)
1428 goto out;
1429
1430 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1431 if (on_link)
1432 nrt->rt6i_flags &= ~RTF_GATEWAY;
1433
1434 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1435 nrt->rt6i_dst.plen = 128;
1436 nrt->u.dst.flags |= DST_HOST;
1437
1438 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1439 nrt->rt6i_nexthop = neigh_clone(neigh);
1440 /* Reset pmtu, it may be better */
1441 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1442 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1443
1444 if (ip6_ins_rt(nrt))
1445 goto out;
1446
1447 netevent.old = &rt->u.dst;
1448 netevent.new = &nrt->u.dst;
1449 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1450
1451 if (rt->rt6i_flags&RTF_CACHE) {
1452 ip6_del_rt(rt);
1453 return;
1454 }
1455
1456 out:
1457 dst_release(&rt->u.dst);
1458 return;
1459 }
1460
1461 /*
1462 * Handle ICMP "packet too big" messages
1463 * i.e. Path MTU discovery
1464 */
1465
1466 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1467 struct net_device *dev, u32 pmtu)
1468 {
1469 struct rt6_info *rt, *nrt;
1470 int allfrag = 0;
1471
1472 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1473 if (rt == NULL)
1474 return;
1475
1476 if (pmtu >= dst_mtu(&rt->u.dst))
1477 goto out;
1478
1479 if (pmtu < IPV6_MIN_MTU) {
1480 /*
1481 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1482 * MTU (1280) and a fragment header should always be included
1483 * after a node receiving Too Big message reporting PMTU is
1484 * less than the IPv6 Minimum Link MTU.
1485 */
1486 pmtu = IPV6_MIN_MTU;
1487 allfrag = 1;
1488 }
1489
1490 /* New mtu received -> path was valid.
1491 They are sent only in response to data packets,
1492 so that this nexthop apparently is reachable. --ANK
1493 */
1494 dst_confirm(&rt->u.dst);
1495
1496 /* Host route. If it is static, it would be better
1497 not to override it, but add new one, so that
1498 when cache entry will expire old pmtu
1499 would return automatically.
1500 */
1501 if (rt->rt6i_flags & RTF_CACHE) {
1502 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1503 if (allfrag)
1504 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1505 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1506 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1507 goto out;
1508 }
1509
1510 /* Network route.
1511 Two cases are possible:
1512 1. It is connected route. Action: COW
1513 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1514 */
1515 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1516 nrt = rt6_alloc_cow(rt, daddr, saddr);
1517 else
1518 nrt = rt6_alloc_clone(rt, daddr);
1519
1520 if (nrt) {
1521 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1522 if (allfrag)
1523 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1524
1525 /* According to RFC 1981, detecting PMTU increase shouldn't be
1526 * happened within 5 mins, the recommended timer is 10 mins.
1527 * Here this route expiration time is set to ip6_rt_mtu_expires
1528 * which is 10 mins. After 10 mins the decreased pmtu is expired
1529 * and detecting PMTU increase will be automatically happened.
1530 */
1531 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1532 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1533
1534 ip6_ins_rt(nrt);
1535 }
1536 out:
1537 dst_release(&rt->u.dst);
1538 }
1539
1540 /*
1541 * Misc support functions
1542 */
1543
1544 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1545 {
1546 struct rt6_info *rt = ip6_dst_alloc();
1547
1548 if (rt) {
1549 rt->u.dst.input = ort->u.dst.input;
1550 rt->u.dst.output = ort->u.dst.output;
1551
1552 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1553 rt->u.dst.error = ort->u.dst.error;
1554 rt->u.dst.dev = ort->u.dst.dev;
1555 if (rt->u.dst.dev)
1556 dev_hold(rt->u.dst.dev);
1557 rt->rt6i_idev = ort->rt6i_idev;
1558 if (rt->rt6i_idev)
1559 in6_dev_hold(rt->rt6i_idev);
1560 rt->u.dst.lastuse = jiffies;
1561 rt->rt6i_expires = 0;
1562
1563 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1564 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1565 rt->rt6i_metric = 0;
1566
1567 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1568 #ifdef CONFIG_IPV6_SUBTREES
1569 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1570 #endif
1571 rt->rt6i_table = ort->rt6i_table;
1572 }
1573 return rt;
1574 }
1575
1576 #ifdef CONFIG_IPV6_ROUTE_INFO
1577 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1578 struct in6_addr *gwaddr, int ifindex)
1579 {
1580 struct fib6_node *fn;
1581 struct rt6_info *rt = NULL;
1582 struct fib6_table *table;
1583
1584 table = fib6_get_table(RT6_TABLE_INFO);
1585 if (table == NULL)
1586 return NULL;
1587
1588 write_lock_bh(&table->tb6_lock);
1589 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1590 if (!fn)
1591 goto out;
1592
1593 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1594 if (rt->rt6i_dev->ifindex != ifindex)
1595 continue;
1596 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1597 continue;
1598 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1599 continue;
1600 dst_hold(&rt->u.dst);
1601 break;
1602 }
1603 out:
1604 write_unlock_bh(&table->tb6_lock);
1605 return rt;
1606 }
1607
1608 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1609 struct in6_addr *gwaddr, int ifindex,
1610 unsigned pref)
1611 {
1612 struct fib6_config cfg = {
1613 .fc_table = RT6_TABLE_INFO,
1614 .fc_metric = 1024,
1615 .fc_ifindex = ifindex,
1616 .fc_dst_len = prefixlen,
1617 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1618 RTF_UP | RTF_PREF(pref),
1619 };
1620
1621 ipv6_addr_copy(&cfg.fc_dst, prefix);
1622 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1623
1624 /* We should treat it as a default route if prefix length is 0. */
1625 if (!prefixlen)
1626 cfg.fc_flags |= RTF_DEFAULT;
1627
1628 ip6_route_add(&cfg);
1629
1630 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1631 }
1632 #endif
1633
1634 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1635 {
1636 struct rt6_info *rt;
1637 struct fib6_table *table;
1638
1639 table = fib6_get_table(RT6_TABLE_DFLT);
1640 if (table == NULL)
1641 return NULL;
1642
1643 write_lock_bh(&table->tb6_lock);
1644 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1645 if (dev == rt->rt6i_dev &&
1646 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1647 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1648 break;
1649 }
1650 if (rt)
1651 dst_hold(&rt->u.dst);
1652 write_unlock_bh(&table->tb6_lock);
1653 return rt;
1654 }
1655
1656 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1657 struct net_device *dev,
1658 unsigned int pref)
1659 {
1660 struct fib6_config cfg = {
1661 .fc_table = RT6_TABLE_DFLT,
1662 .fc_metric = 1024,
1663 .fc_ifindex = dev->ifindex,
1664 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1665 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1666 };
1667
1668 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1669
1670 ip6_route_add(&cfg);
1671
1672 return rt6_get_dflt_router(gwaddr, dev);
1673 }
1674
1675 void rt6_purge_dflt_routers(void)
1676 {
1677 struct rt6_info *rt;
1678 struct fib6_table *table;
1679
1680 /* NOTE: Keep consistent with rt6_get_dflt_router */
1681 table = fib6_get_table(RT6_TABLE_DFLT);
1682 if (table == NULL)
1683 return;
1684
1685 restart:
1686 read_lock_bh(&table->tb6_lock);
1687 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1688 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1689 dst_hold(&rt->u.dst);
1690 read_unlock_bh(&table->tb6_lock);
1691 ip6_del_rt(rt);
1692 goto restart;
1693 }
1694 }
1695 read_unlock_bh(&table->tb6_lock);
1696 }
1697
1698 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1699 struct fib6_config *cfg)
1700 {
1701 memset(cfg, 0, sizeof(*cfg));
1702
1703 cfg->fc_table = RT6_TABLE_MAIN;
1704 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1705 cfg->fc_metric = rtmsg->rtmsg_metric;
1706 cfg->fc_expires = rtmsg->rtmsg_info;
1707 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1708 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1709 cfg->fc_flags = rtmsg->rtmsg_flags;
1710
1711 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1712 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1713 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1714 }
1715
1716 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1717 {
1718 struct fib6_config cfg;
1719 struct in6_rtmsg rtmsg;
1720 int err;
1721
1722 switch(cmd) {
1723 case SIOCADDRT: /* Add a route */
1724 case SIOCDELRT: /* Delete a route */
1725 if (!capable(CAP_NET_ADMIN))
1726 return -EPERM;
1727 err = copy_from_user(&rtmsg, arg,
1728 sizeof(struct in6_rtmsg));
1729 if (err)
1730 return -EFAULT;
1731
1732 rtmsg_to_fib6_config(&rtmsg, &cfg);
1733
1734 rtnl_lock();
1735 switch (cmd) {
1736 case SIOCADDRT:
1737 err = ip6_route_add(&cfg);
1738 break;
1739 case SIOCDELRT:
1740 err = ip6_route_del(&cfg);
1741 break;
1742 default:
1743 err = -EINVAL;
1744 }
1745 rtnl_unlock();
1746
1747 return err;
1748 };
1749
1750 return -EINVAL;
1751 }
1752
1753 /*
1754 * Drop the packet on the floor
1755 */
1756
1757 static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1758 {
1759 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1760 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1761 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1762
1763 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
1764 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1765 kfree_skb(skb);
1766 return 0;
1767 }
1768
1769 static int ip6_pkt_discard(struct sk_buff *skb)
1770 {
1771 return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1772 }
1773
1774 static int ip6_pkt_discard_out(struct sk_buff *skb)
1775 {
1776 skb->dev = skb->dst->dev;
1777 return ip6_pkt_discard(skb);
1778 }
1779
1780 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1781
1782 static int ip6_pkt_prohibit(struct sk_buff *skb)
1783 {
1784 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1785 }
1786
1787 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1788 {
1789 skb->dev = skb->dst->dev;
1790 return ip6_pkt_prohibit(skb);
1791 }
1792
1793 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1794 {
1795 kfree_skb(skb);
1796 return 0;
1797 }
1798
1799 #endif
1800
1801 /*
1802 * Allocate a dst for local (unicast / anycast) address.
1803 */
1804
1805 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1806 const struct in6_addr *addr,
1807 int anycast)
1808 {
1809 struct rt6_info *rt = ip6_dst_alloc();
1810
1811 if (rt == NULL)
1812 return ERR_PTR(-ENOMEM);
1813
1814 dev_hold(&loopback_dev);
1815 in6_dev_hold(idev);
1816
1817 rt->u.dst.flags = DST_HOST;
1818 rt->u.dst.input = ip6_input;
1819 rt->u.dst.output = ip6_output;
1820 rt->rt6i_dev = &loopback_dev;
1821 rt->rt6i_idev = idev;
1822 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1823 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1824 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1825 rt->u.dst.obsolete = -1;
1826
1827 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1828 if (anycast)
1829 rt->rt6i_flags |= RTF_ANYCAST;
1830 else
1831 rt->rt6i_flags |= RTF_LOCAL;
1832 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1833 if (rt->rt6i_nexthop == NULL) {
1834 dst_free(&rt->u.dst);
1835 return ERR_PTR(-ENOMEM);
1836 }
1837
1838 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1839 rt->rt6i_dst.plen = 128;
1840 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1841
1842 atomic_set(&rt->u.dst.__refcnt, 1);
1843
1844 return rt;
1845 }
1846
1847 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1848 {
1849 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1850 rt != &ip6_null_entry) {
1851 RT6_TRACE("deleted by ifdown %p\n", rt);
1852 return -1;
1853 }
1854 return 0;
1855 }
1856
1857 void rt6_ifdown(struct net_device *dev)
1858 {
1859 fib6_clean_all(fib6_ifdown, 0, dev);
1860 }
1861
1862 struct rt6_mtu_change_arg
1863 {
1864 struct net_device *dev;
1865 unsigned mtu;
1866 };
1867
1868 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1869 {
1870 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1871 struct inet6_dev *idev;
1872
1873 /* In IPv6 pmtu discovery is not optional,
1874 so that RTAX_MTU lock cannot disable it.
1875 We still use this lock to block changes
1876 caused by addrconf/ndisc.
1877 */
1878
1879 idev = __in6_dev_get(arg->dev);
1880 if (idev == NULL)
1881 return 0;
1882
1883 /* For administrative MTU increase, there is no way to discover
1884 IPv6 PMTU increase, so PMTU increase should be updated here.
1885 Since RFC 1981 doesn't include administrative MTU increase
1886 update PMTU increase is a MUST. (i.e. jumbo frame)
1887 */
1888 /*
1889 If new MTU is less than route PMTU, this new MTU will be the
1890 lowest MTU in the path, update the route PMTU to reflect PMTU
1891 decreases; if new MTU is greater than route PMTU, and the
1892 old MTU is the lowest MTU in the path, update the route PMTU
1893 to reflect the increase. In this case if the other nodes' MTU
1894 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1895 PMTU discouvery.
1896 */
1897 if (rt->rt6i_dev == arg->dev &&
1898 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1899 (dst_mtu(&rt->u.dst) > arg->mtu ||
1900 (dst_mtu(&rt->u.dst) < arg->mtu &&
1901 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1902 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1903 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1904 return 0;
1905 }
1906
1907 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1908 {
1909 struct rt6_mtu_change_arg arg = {
1910 .dev = dev,
1911 .mtu = mtu,
1912 };
1913
1914 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1915 }
1916
1917 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1918 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1919 [RTA_OIF] = { .type = NLA_U32 },
1920 [RTA_IIF] = { .type = NLA_U32 },
1921 [RTA_PRIORITY] = { .type = NLA_U32 },
1922 [RTA_METRICS] = { .type = NLA_NESTED },
1923 };
1924
1925 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1926 struct fib6_config *cfg)
1927 {
1928 struct rtmsg *rtm;
1929 struct nlattr *tb[RTA_MAX+1];
1930 int err;
1931
1932 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1933 if (err < 0)
1934 goto errout;
1935
1936 err = -EINVAL;
1937 rtm = nlmsg_data(nlh);
1938 memset(cfg, 0, sizeof(*cfg));
1939
1940 cfg->fc_table = rtm->rtm_table;
1941 cfg->fc_dst_len = rtm->rtm_dst_len;
1942 cfg->fc_src_len = rtm->rtm_src_len;
1943 cfg->fc_flags = RTF_UP;
1944 cfg->fc_protocol = rtm->rtm_protocol;
1945
1946 if (rtm->rtm_type == RTN_UNREACHABLE)
1947 cfg->fc_flags |= RTF_REJECT;
1948
1949 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1950 cfg->fc_nlinfo.nlh = nlh;
1951
1952 if (tb[RTA_GATEWAY]) {
1953 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1954 cfg->fc_flags |= RTF_GATEWAY;
1955 }
1956
1957 if (tb[RTA_DST]) {
1958 int plen = (rtm->rtm_dst_len + 7) >> 3;
1959
1960 if (nla_len(tb[RTA_DST]) < plen)
1961 goto errout;
1962
1963 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1964 }
1965
1966 if (tb[RTA_SRC]) {
1967 int plen = (rtm->rtm_src_len + 7) >> 3;
1968
1969 if (nla_len(tb[RTA_SRC]) < plen)
1970 goto errout;
1971
1972 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1973 }
1974
1975 if (tb[RTA_OIF])
1976 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1977
1978 if (tb[RTA_PRIORITY])
1979 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1980
1981 if (tb[RTA_METRICS]) {
1982 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1983 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1984 }
1985
1986 if (tb[RTA_TABLE])
1987 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1988
1989 err = 0;
1990 errout:
1991 return err;
1992 }
1993
1994 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1995 {
1996 struct fib6_config cfg;
1997 int err;
1998
1999 err = rtm_to_fib6_config(skb, nlh, &cfg);
2000 if (err < 0)
2001 return err;
2002
2003 return ip6_route_del(&cfg);
2004 }
2005
2006 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2007 {
2008 struct fib6_config cfg;
2009 int err;
2010
2011 err = rtm_to_fib6_config(skb, nlh, &cfg);
2012 if (err < 0)
2013 return err;
2014
2015 return ip6_route_add(&cfg);
2016 }
2017
2018 static inline size_t rt6_nlmsg_size(void)
2019 {
2020 return NLMSG_ALIGN(sizeof(struct rtmsg))
2021 + nla_total_size(16) /* RTA_SRC */
2022 + nla_total_size(16) /* RTA_DST */
2023 + nla_total_size(16) /* RTA_GATEWAY */
2024 + nla_total_size(16) /* RTA_PREFSRC */
2025 + nla_total_size(4) /* RTA_TABLE */
2026 + nla_total_size(4) /* RTA_IIF */
2027 + nla_total_size(4) /* RTA_OIF */
2028 + nla_total_size(4) /* RTA_PRIORITY */
2029 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2030 + nla_total_size(sizeof(struct rta_cacheinfo));
2031 }
2032
2033 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2034 struct in6_addr *dst, struct in6_addr *src,
2035 int iif, int type, u32 pid, u32 seq,
2036 int prefix, unsigned int flags)
2037 {
2038 struct rtmsg *rtm;
2039 struct nlmsghdr *nlh;
2040 long expires;
2041 u32 table;
2042
2043 if (prefix) { /* user wants prefix routes only */
2044 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2045 /* success since this is not a prefix route */
2046 return 1;
2047 }
2048 }
2049
2050 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2051 if (nlh == NULL)
2052 return -EMSGSIZE;
2053
2054 rtm = nlmsg_data(nlh);
2055 rtm->rtm_family = AF_INET6;
2056 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2057 rtm->rtm_src_len = rt->rt6i_src.plen;
2058 rtm->rtm_tos = 0;
2059 if (rt->rt6i_table)
2060 table = rt->rt6i_table->tb6_id;
2061 else
2062 table = RT6_TABLE_UNSPEC;
2063 rtm->rtm_table = table;
2064 NLA_PUT_U32(skb, RTA_TABLE, table);
2065 if (rt->rt6i_flags&RTF_REJECT)
2066 rtm->rtm_type = RTN_UNREACHABLE;
2067 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2068 rtm->rtm_type = RTN_LOCAL;
2069 else
2070 rtm->rtm_type = RTN_UNICAST;
2071 rtm->rtm_flags = 0;
2072 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2073 rtm->rtm_protocol = rt->rt6i_protocol;
2074 if (rt->rt6i_flags&RTF_DYNAMIC)
2075 rtm->rtm_protocol = RTPROT_REDIRECT;
2076 else if (rt->rt6i_flags & RTF_ADDRCONF)
2077 rtm->rtm_protocol = RTPROT_KERNEL;
2078 else if (rt->rt6i_flags&RTF_DEFAULT)
2079 rtm->rtm_protocol = RTPROT_RA;
2080
2081 if (rt->rt6i_flags&RTF_CACHE)
2082 rtm->rtm_flags |= RTM_F_CLONED;
2083
2084 if (dst) {
2085 NLA_PUT(skb, RTA_DST, 16, dst);
2086 rtm->rtm_dst_len = 128;
2087 } else if (rtm->rtm_dst_len)
2088 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2089 #ifdef CONFIG_IPV6_SUBTREES
2090 if (src) {
2091 NLA_PUT(skb, RTA_SRC, 16, src);
2092 rtm->rtm_src_len = 128;
2093 } else if (rtm->rtm_src_len)
2094 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2095 #endif
2096 if (iif)
2097 NLA_PUT_U32(skb, RTA_IIF, iif);
2098 else if (dst) {
2099 struct in6_addr saddr_buf;
2100 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2101 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2102 }
2103
2104 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2105 goto nla_put_failure;
2106
2107 if (rt->u.dst.neighbour)
2108 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2109
2110 if (rt->u.dst.dev)
2111 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2112
2113 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2114
2115 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2116 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2117 expires, rt->u.dst.error) < 0)
2118 goto nla_put_failure;
2119
2120 return nlmsg_end(skb, nlh);
2121
2122 nla_put_failure:
2123 nlmsg_cancel(skb, nlh);
2124 return -EMSGSIZE;
2125 }
2126
2127 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2128 {
2129 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2130 int prefix;
2131
2132 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2133 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2134 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2135 } else
2136 prefix = 0;
2137
2138 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2139 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2140 prefix, NLM_F_MULTI);
2141 }
2142
2143 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2144 {
2145 struct nlattr *tb[RTA_MAX+1];
2146 struct rt6_info *rt;
2147 struct sk_buff *skb;
2148 struct rtmsg *rtm;
2149 struct flowi fl;
2150 int err, iif = 0;
2151
2152 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2153 if (err < 0)
2154 goto errout;
2155
2156 err = -EINVAL;
2157 memset(&fl, 0, sizeof(fl));
2158
2159 if (tb[RTA_SRC]) {
2160 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2161 goto errout;
2162
2163 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2164 }
2165
2166 if (tb[RTA_DST]) {
2167 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2168 goto errout;
2169
2170 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2171 }
2172
2173 if (tb[RTA_IIF])
2174 iif = nla_get_u32(tb[RTA_IIF]);
2175
2176 if (tb[RTA_OIF])
2177 fl.oif = nla_get_u32(tb[RTA_OIF]);
2178
2179 if (iif) {
2180 struct net_device *dev;
2181 dev = __dev_get_by_index(iif);
2182 if (!dev) {
2183 err = -ENODEV;
2184 goto errout;
2185 }
2186 }
2187
2188 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2189 if (skb == NULL) {
2190 err = -ENOBUFS;
2191 goto errout;
2192 }
2193
2194 /* Reserve room for dummy headers, this skb can pass
2195 through good chunk of routing engine.
2196 */
2197 skb->mac.raw = skb->data;
2198 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2199
2200 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2201 skb->dst = &rt->u.dst;
2202
2203 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2204 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2205 nlh->nlmsg_seq, 0, 0);
2206 if (err < 0) {
2207 kfree_skb(skb);
2208 goto errout;
2209 }
2210
2211 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2212 errout:
2213 return err;
2214 }
2215
2216 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2217 {
2218 struct sk_buff *skb;
2219 u32 pid = 0, seq = 0;
2220 struct nlmsghdr *nlh = NULL;
2221 int err = -ENOBUFS;
2222
2223 if (info) {
2224 pid = info->pid;
2225 nlh = info->nlh;
2226 if (nlh)
2227 seq = nlh->nlmsg_seq;
2228 }
2229
2230 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2231 if (skb == NULL)
2232 goto errout;
2233
2234 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2235 if (err < 0) {
2236 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2237 WARN_ON(err == -EMSGSIZE);
2238 kfree_skb(skb);
2239 goto errout;
2240 }
2241 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2242 errout:
2243 if (err < 0)
2244 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2245 }
2246
2247 /*
2248 * /proc
2249 */
2250
2251 #ifdef CONFIG_PROC_FS
2252
2253 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2254
2255 struct rt6_proc_arg
2256 {
2257 char *buffer;
2258 int offset;
2259 int length;
2260 int skip;
2261 int len;
2262 };
2263
2264 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2265 {
2266 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2267
2268 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2269 arg->skip++;
2270 return 0;
2271 }
2272
2273 if (arg->len >= arg->length)
2274 return 0;
2275
2276 arg->len += sprintf(arg->buffer + arg->len,
2277 NIP6_SEQFMT " %02x ",
2278 NIP6(rt->rt6i_dst.addr),
2279 rt->rt6i_dst.plen);
2280
2281 #ifdef CONFIG_IPV6_SUBTREES
2282 arg->len += sprintf(arg->buffer + arg->len,
2283 NIP6_SEQFMT " %02x ",
2284 NIP6(rt->rt6i_src.addr),
2285 rt->rt6i_src.plen);
2286 #else
2287 arg->len += sprintf(arg->buffer + arg->len,
2288 "00000000000000000000000000000000 00 ");
2289 #endif
2290
2291 if (rt->rt6i_nexthop) {
2292 arg->len += sprintf(arg->buffer + arg->len,
2293 NIP6_SEQFMT,
2294 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2295 } else {
2296 arg->len += sprintf(arg->buffer + arg->len,
2297 "00000000000000000000000000000000");
2298 }
2299 arg->len += sprintf(arg->buffer + arg->len,
2300 " %08x %08x %08x %08x %8s\n",
2301 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2302 rt->u.dst.__use, rt->rt6i_flags,
2303 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2304 return 0;
2305 }
2306
2307 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2308 {
2309 struct rt6_proc_arg arg = {
2310 .buffer = buffer,
2311 .offset = offset,
2312 .length = length,
2313 };
2314
2315 fib6_clean_all(rt6_info_route, 0, &arg);
2316
2317 *start = buffer;
2318 if (offset)
2319 *start += offset % RT6_INFO_LEN;
2320
2321 arg.len -= offset % RT6_INFO_LEN;
2322
2323 if (arg.len > length)
2324 arg.len = length;
2325 if (arg.len < 0)
2326 arg.len = 0;
2327
2328 return arg.len;
2329 }
2330
2331 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2332 {
2333 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2334 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2335 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2336 rt6_stats.fib_rt_cache,
2337 atomic_read(&ip6_dst_ops.entries),
2338 rt6_stats.fib_discarded_routes);
2339
2340 return 0;
2341 }
2342
2343 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2344 {
2345 return single_open(file, rt6_stats_seq_show, NULL);
2346 }
2347
2348 static const struct file_operations rt6_stats_seq_fops = {
2349 .owner = THIS_MODULE,
2350 .open = rt6_stats_seq_open,
2351 .read = seq_read,
2352 .llseek = seq_lseek,
2353 .release = single_release,
2354 };
2355 #endif /* CONFIG_PROC_FS */
2356
2357 #ifdef CONFIG_SYSCTL
2358
2359 static int flush_delay;
2360
2361 static
2362 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2363 void __user *buffer, size_t *lenp, loff_t *ppos)
2364 {
2365 if (write) {
2366 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2367 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2368 return 0;
2369 } else
2370 return -EINVAL;
2371 }
2372
2373 ctl_table ipv6_route_table[] = {
2374 {
2375 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2376 .procname = "flush",
2377 .data = &flush_delay,
2378 .maxlen = sizeof(int),
2379 .mode = 0200,
2380 .proc_handler = &ipv6_sysctl_rtcache_flush
2381 },
2382 {
2383 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2384 .procname = "gc_thresh",
2385 .data = &ip6_dst_ops.gc_thresh,
2386 .maxlen = sizeof(int),
2387 .mode = 0644,
2388 .proc_handler = &proc_dointvec,
2389 },
2390 {
2391 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2392 .procname = "max_size",
2393 .data = &ip6_rt_max_size,
2394 .maxlen = sizeof(int),
2395 .mode = 0644,
2396 .proc_handler = &proc_dointvec,
2397 },
2398 {
2399 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2400 .procname = "gc_min_interval",
2401 .data = &ip6_rt_gc_min_interval,
2402 .maxlen = sizeof(int),
2403 .mode = 0644,
2404 .proc_handler = &proc_dointvec_jiffies,
2405 .strategy = &sysctl_jiffies,
2406 },
2407 {
2408 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2409 .procname = "gc_timeout",
2410 .data = &ip6_rt_gc_timeout,
2411 .maxlen = sizeof(int),
2412 .mode = 0644,
2413 .proc_handler = &proc_dointvec_jiffies,
2414 .strategy = &sysctl_jiffies,
2415 },
2416 {
2417 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2418 .procname = "gc_interval",
2419 .data = &ip6_rt_gc_interval,
2420 .maxlen = sizeof(int),
2421 .mode = 0644,
2422 .proc_handler = &proc_dointvec_jiffies,
2423 .strategy = &sysctl_jiffies,
2424 },
2425 {
2426 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2427 .procname = "gc_elasticity",
2428 .data = &ip6_rt_gc_elasticity,
2429 .maxlen = sizeof(int),
2430 .mode = 0644,
2431 .proc_handler = &proc_dointvec_jiffies,
2432 .strategy = &sysctl_jiffies,
2433 },
2434 {
2435 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2436 .procname = "mtu_expires",
2437 .data = &ip6_rt_mtu_expires,
2438 .maxlen = sizeof(int),
2439 .mode = 0644,
2440 .proc_handler = &proc_dointvec_jiffies,
2441 .strategy = &sysctl_jiffies,
2442 },
2443 {
2444 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2445 .procname = "min_adv_mss",
2446 .data = &ip6_rt_min_advmss,
2447 .maxlen = sizeof(int),
2448 .mode = 0644,
2449 .proc_handler = &proc_dointvec_jiffies,
2450 .strategy = &sysctl_jiffies,
2451 },
2452 {
2453 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2454 .procname = "gc_min_interval_ms",
2455 .data = &ip6_rt_gc_min_interval,
2456 .maxlen = sizeof(int),
2457 .mode = 0644,
2458 .proc_handler = &proc_dointvec_ms_jiffies,
2459 .strategy = &sysctl_ms_jiffies,
2460 },
2461 { .ctl_name = 0 }
2462 };
2463
2464 #endif
2465
2466 void __init ip6_route_init(void)
2467 {
2468 struct proc_dir_entry *p;
2469
2470 ip6_dst_ops.kmem_cachep =
2471 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2472 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2473 fib6_init();
2474 #ifdef CONFIG_PROC_FS
2475 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2476 if (p)
2477 p->owner = THIS_MODULE;
2478
2479 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2480 #endif
2481 #ifdef CONFIG_XFRM
2482 xfrm6_init();
2483 #endif
2484 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2485 fib6_rules_init();
2486 #endif
2487 }
2488
2489 void ip6_route_cleanup(void)
2490 {
2491 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2492 fib6_rules_cleanup();
2493 #endif
2494 #ifdef CONFIG_PROC_FS
2495 proc_net_remove("ipv6_route");
2496 proc_net_remove("rt6_stats");
2497 #endif
2498 #ifdef CONFIG_XFRM
2499 xfrm6_fini();
2500 #endif
2501 rt6_ifdown(NULL);
2502 fib6_gc_cleanup();
2503 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2504 }
This page took 0.082213 seconds and 6 git commands to generate.