ipv6: Don't change dst->flags using assignments.
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101 {
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128 }
129
130 static struct dst_ops ip6_dst_ops_template = {
131 .family = AF_INET6,
132 .protocol = cpu_to_be16(ETH_P_IPV6),
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
136 .default_advmss = ip6_default_advmss,
137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
144 .local_out = __ip6_local_out,
145 };
146
147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148 {
149 return 0;
150 }
151
152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153 {
154 }
155
156 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 unsigned long old)
158 {
159 return NULL;
160 }
161
162 static struct dst_ops ip6_dst_blackhole_ops = {
163 .family = AF_INET6,
164 .protocol = cpu_to_be16(ETH_P_IPV6),
165 .destroy = ip6_dst_destroy,
166 .check = ip6_dst_check,
167 .default_mtu = ip6_blackhole_default_mtu,
168 .default_advmss = ip6_default_advmss,
169 .update_pmtu = ip6_rt_blackhole_update_pmtu,
170 .cow_metrics = ip6_rt_blackhole_cow_metrics,
171 };
172
173 static const u32 ip6_template_metrics[RTAX_MAX] = {
174 [RTAX_HOPLIMIT - 1] = 255,
175 };
176
177 static struct rt6_info ip6_null_entry_template = {
178 .dst = {
179 .__refcnt = ATOMIC_INIT(1),
180 .__use = 1,
181 .obsolete = -1,
182 .error = -ENETUNREACH,
183 .input = ip6_pkt_discard,
184 .output = ip6_pkt_discard_out,
185 },
186 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
187 .rt6i_protocol = RTPROT_KERNEL,
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
190 };
191
192 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
193
194 static int ip6_pkt_prohibit(struct sk_buff *skb);
195 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
196
197 static struct rt6_info ip6_prohibit_entry_template = {
198 .dst = {
199 .__refcnt = ATOMIC_INIT(1),
200 .__use = 1,
201 .obsolete = -1,
202 .error = -EACCES,
203 .input = ip6_pkt_prohibit,
204 .output = ip6_pkt_prohibit_out,
205 },
206 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
207 .rt6i_protocol = RTPROT_KERNEL,
208 .rt6i_metric = ~(u32) 0,
209 .rt6i_ref = ATOMIC_INIT(1),
210 };
211
212 static struct rt6_info ip6_blk_hole_entry_template = {
213 .dst = {
214 .__refcnt = ATOMIC_INIT(1),
215 .__use = 1,
216 .obsolete = -1,
217 .error = -EINVAL,
218 .input = dst_discard,
219 .output = dst_discard,
220 },
221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
222 .rt6i_protocol = RTPROT_KERNEL,
223 .rt6i_metric = ~(u32) 0,
224 .rt6i_ref = ATOMIC_INIT(1),
225 };
226
227 #endif
228
229 /* allocate dst with ip6_dst_ops */
230 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
231 struct net_device *dev)
232 {
233 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
234
235 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
236
237 return rt;
238 }
239
240 static void ip6_dst_destroy(struct dst_entry *dst)
241 {
242 struct rt6_info *rt = (struct rt6_info *)dst;
243 struct inet6_dev *idev = rt->rt6i_idev;
244 struct inet_peer *peer = rt->rt6i_peer;
245
246 if (idev != NULL) {
247 rt->rt6i_idev = NULL;
248 in6_dev_put(idev);
249 }
250 if (peer) {
251 rt->rt6i_peer = NULL;
252 inet_putpeer(peer);
253 }
254 }
255
256 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
257
258 static u32 rt6_peer_genid(void)
259 {
260 return atomic_read(&__rt6_peer_genid);
261 }
262
263 void rt6_bind_peer(struct rt6_info *rt, int create)
264 {
265 struct inet_peer *peer;
266
267 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
268 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
269 inet_putpeer(peer);
270 else
271 rt->rt6i_peer_genid = rt6_peer_genid();
272 }
273
274 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
275 int how)
276 {
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
279 struct net_device *loopback_dev =
280 dev_net(dev)->loopback_dev;
281
282 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
283 struct inet6_dev *loopback_idev =
284 in6_dev_get(loopback_dev);
285 if (loopback_idev != NULL) {
286 rt->rt6i_idev = loopback_idev;
287 in6_dev_put(idev);
288 }
289 }
290 }
291
292 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
293 {
294 return (rt->rt6i_flags & RTF_EXPIRES) &&
295 time_after(jiffies, rt->rt6i_expires);
296 }
297
298 static inline int rt6_need_strict(const struct in6_addr *daddr)
299 {
300 return ipv6_addr_type(daddr) &
301 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
302 }
303
304 /*
305 * Route lookup. Any table->tb6_lock is implied.
306 */
307
308 static inline struct rt6_info *rt6_device_match(struct net *net,
309 struct rt6_info *rt,
310 const struct in6_addr *saddr,
311 int oif,
312 int flags)
313 {
314 struct rt6_info *local = NULL;
315 struct rt6_info *sprt;
316
317 if (!oif && ipv6_addr_any(saddr))
318 goto out;
319
320 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
321 struct net_device *dev = sprt->rt6i_dev;
322
323 if (oif) {
324 if (dev->ifindex == oif)
325 return sprt;
326 if (dev->flags & IFF_LOOPBACK) {
327 if (sprt->rt6i_idev == NULL ||
328 sprt->rt6i_idev->dev->ifindex != oif) {
329 if (flags & RT6_LOOKUP_F_IFACE && oif)
330 continue;
331 if (local && (!oif ||
332 local->rt6i_idev->dev->ifindex == oif))
333 continue;
334 }
335 local = sprt;
336 }
337 } else {
338 if (ipv6_chk_addr(net, saddr, dev,
339 flags & RT6_LOOKUP_F_IFACE))
340 return sprt;
341 }
342 }
343
344 if (oif) {
345 if (local)
346 return local;
347
348 if (flags & RT6_LOOKUP_F_IFACE)
349 return net->ipv6.ip6_null_entry;
350 }
351 out:
352 return rt;
353 }
354
355 #ifdef CONFIG_IPV6_ROUTER_PREF
356 static void rt6_probe(struct rt6_info *rt)
357 {
358 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
359 /*
360 * Okay, this does not seem to be appropriate
361 * for now, however, we need to check if it
362 * is really so; aka Router Reachability Probing.
363 *
364 * Router Reachability Probe MUST be rate-limited
365 * to no more than one per minute.
366 */
367 if (!neigh || (neigh->nud_state & NUD_VALID))
368 return;
369 read_lock_bh(&neigh->lock);
370 if (!(neigh->nud_state & NUD_VALID) &&
371 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
372 struct in6_addr mcaddr;
373 struct in6_addr *target;
374
375 neigh->updated = jiffies;
376 read_unlock_bh(&neigh->lock);
377
378 target = (struct in6_addr *)&neigh->primary_key;
379 addrconf_addr_solict_mult(target, &mcaddr);
380 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
381 } else
382 read_unlock_bh(&neigh->lock);
383 }
384 #else
385 static inline void rt6_probe(struct rt6_info *rt)
386 {
387 }
388 #endif
389
390 /*
391 * Default Router Selection (RFC 2461 6.3.6)
392 */
393 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
394 {
395 struct net_device *dev = rt->rt6i_dev;
396 if (!oif || dev->ifindex == oif)
397 return 2;
398 if ((dev->flags & IFF_LOOPBACK) &&
399 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
400 return 1;
401 return 0;
402 }
403
404 static inline int rt6_check_neigh(struct rt6_info *rt)
405 {
406 struct neighbour *neigh = rt->rt6i_nexthop;
407 int m;
408 if (rt->rt6i_flags & RTF_NONEXTHOP ||
409 !(rt->rt6i_flags & RTF_GATEWAY))
410 m = 1;
411 else if (neigh) {
412 read_lock_bh(&neigh->lock);
413 if (neigh->nud_state & NUD_VALID)
414 m = 2;
415 #ifdef CONFIG_IPV6_ROUTER_PREF
416 else if (neigh->nud_state & NUD_FAILED)
417 m = 0;
418 #endif
419 else
420 m = 1;
421 read_unlock_bh(&neigh->lock);
422 } else
423 m = 0;
424 return m;
425 }
426
427 static int rt6_score_route(struct rt6_info *rt, int oif,
428 int strict)
429 {
430 int m, n;
431
432 m = rt6_check_dev(rt, oif);
433 if (!m && (strict & RT6_LOOKUP_F_IFACE))
434 return -1;
435 #ifdef CONFIG_IPV6_ROUTER_PREF
436 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
437 #endif
438 n = rt6_check_neigh(rt);
439 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
440 return -1;
441 return m;
442 }
443
444 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
445 int *mpri, struct rt6_info *match)
446 {
447 int m;
448
449 if (rt6_check_expired(rt))
450 goto out;
451
452 m = rt6_score_route(rt, oif, strict);
453 if (m < 0)
454 goto out;
455
456 if (m > *mpri) {
457 if (strict & RT6_LOOKUP_F_REACHABLE)
458 rt6_probe(match);
459 *mpri = m;
460 match = rt;
461 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
462 rt6_probe(rt);
463 }
464
465 out:
466 return match;
467 }
468
469 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
470 struct rt6_info *rr_head,
471 u32 metric, int oif, int strict)
472 {
473 struct rt6_info *rt, *match;
474 int mpri = -1;
475
476 match = NULL;
477 for (rt = rr_head; rt && rt->rt6i_metric == metric;
478 rt = rt->dst.rt6_next)
479 match = find_match(rt, oif, strict, &mpri, match);
480 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
481 rt = rt->dst.rt6_next)
482 match = find_match(rt, oif, strict, &mpri, match);
483
484 return match;
485 }
486
487 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
488 {
489 struct rt6_info *match, *rt0;
490 struct net *net;
491
492 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
493 __func__, fn->leaf, oif);
494
495 rt0 = fn->rr_ptr;
496 if (!rt0)
497 fn->rr_ptr = rt0 = fn->leaf;
498
499 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
500
501 if (!match &&
502 (strict & RT6_LOOKUP_F_REACHABLE)) {
503 struct rt6_info *next = rt0->dst.rt6_next;
504
505 /* no entries matched; do round-robin */
506 if (!next || next->rt6i_metric != rt0->rt6i_metric)
507 next = fn->leaf;
508
509 if (next != rt0)
510 fn->rr_ptr = next;
511 }
512
513 RT6_TRACE("%s() => %p\n",
514 __func__, match);
515
516 net = dev_net(rt0->rt6i_dev);
517 return match ? match : net->ipv6.ip6_null_entry;
518 }
519
520 #ifdef CONFIG_IPV6_ROUTE_INFO
521 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
522 const struct in6_addr *gwaddr)
523 {
524 struct net *net = dev_net(dev);
525 struct route_info *rinfo = (struct route_info *) opt;
526 struct in6_addr prefix_buf, *prefix;
527 unsigned int pref;
528 unsigned long lifetime;
529 struct rt6_info *rt;
530
531 if (len < sizeof(struct route_info)) {
532 return -EINVAL;
533 }
534
535 /* Sanity check for prefix_len and length */
536 if (rinfo->length > 3) {
537 return -EINVAL;
538 } else if (rinfo->prefix_len > 128) {
539 return -EINVAL;
540 } else if (rinfo->prefix_len > 64) {
541 if (rinfo->length < 2) {
542 return -EINVAL;
543 }
544 } else if (rinfo->prefix_len > 0) {
545 if (rinfo->length < 1) {
546 return -EINVAL;
547 }
548 }
549
550 pref = rinfo->route_pref;
551 if (pref == ICMPV6_ROUTER_PREF_INVALID)
552 return -EINVAL;
553
554 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
555
556 if (rinfo->length == 3)
557 prefix = (struct in6_addr *)rinfo->prefix;
558 else {
559 /* this function is safe */
560 ipv6_addr_prefix(&prefix_buf,
561 (struct in6_addr *)rinfo->prefix,
562 rinfo->prefix_len);
563 prefix = &prefix_buf;
564 }
565
566 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
567 dev->ifindex);
568
569 if (rt && !lifetime) {
570 ip6_del_rt(rt);
571 rt = NULL;
572 }
573
574 if (!rt && lifetime)
575 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
576 pref);
577 else if (rt)
578 rt->rt6i_flags = RTF_ROUTEINFO |
579 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
580
581 if (rt) {
582 if (!addrconf_finite_timeout(lifetime)) {
583 rt->rt6i_flags &= ~RTF_EXPIRES;
584 } else {
585 rt->rt6i_expires = jiffies + HZ * lifetime;
586 rt->rt6i_flags |= RTF_EXPIRES;
587 }
588 dst_release(&rt->dst);
589 }
590 return 0;
591 }
592 #endif
593
594 #define BACKTRACK(__net, saddr) \
595 do { \
596 if (rt == __net->ipv6.ip6_null_entry) { \
597 struct fib6_node *pn; \
598 while (1) { \
599 if (fn->fn_flags & RTN_TL_ROOT) \
600 goto out; \
601 pn = fn->parent; \
602 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
603 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
604 else \
605 fn = pn; \
606 if (fn->fn_flags & RTN_RTINFO) \
607 goto restart; \
608 } \
609 } \
610 } while(0)
611
612 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
613 struct fib6_table *table,
614 struct flowi6 *fl6, int flags)
615 {
616 struct fib6_node *fn;
617 struct rt6_info *rt;
618
619 read_lock_bh(&table->tb6_lock);
620 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
621 restart:
622 rt = fn->leaf;
623 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
624 BACKTRACK(net, &fl6->saddr);
625 out:
626 dst_use(&rt->dst, jiffies);
627 read_unlock_bh(&table->tb6_lock);
628 return rt;
629
630 }
631
632 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
633 const struct in6_addr *saddr, int oif, int strict)
634 {
635 struct flowi6 fl6 = {
636 .flowi6_oif = oif,
637 .daddr = *daddr,
638 };
639 struct dst_entry *dst;
640 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
641
642 if (saddr) {
643 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
644 flags |= RT6_LOOKUP_F_HAS_SADDR;
645 }
646
647 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
648 if (dst->error == 0)
649 return (struct rt6_info *) dst;
650
651 dst_release(dst);
652
653 return NULL;
654 }
655
656 EXPORT_SYMBOL(rt6_lookup);
657
658 /* ip6_ins_rt is called with FREE table->tb6_lock.
659 It takes new route entry, the addition fails by any reason the
660 route is freed. In any case, if caller does not hold it, it may
661 be destroyed.
662 */
663
664 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
665 {
666 int err;
667 struct fib6_table *table;
668
669 table = rt->rt6i_table;
670 write_lock_bh(&table->tb6_lock);
671 err = fib6_add(&table->tb6_root, rt, info);
672 write_unlock_bh(&table->tb6_lock);
673
674 return err;
675 }
676
677 int ip6_ins_rt(struct rt6_info *rt)
678 {
679 struct nl_info info = {
680 .nl_net = dev_net(rt->rt6i_dev),
681 };
682 return __ip6_ins_rt(rt, &info);
683 }
684
685 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
686 const struct in6_addr *saddr)
687 {
688 struct rt6_info *rt;
689
690 /*
691 * Clone the route.
692 */
693
694 rt = ip6_rt_copy(ort);
695
696 if (rt) {
697 struct neighbour *neigh;
698 int attempts = !in_softirq();
699
700 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
701 if (rt->rt6i_dst.plen != 128 &&
702 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
703 rt->rt6i_flags |= RTF_ANYCAST;
704 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
705 }
706
707 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
708 rt->rt6i_dst.plen = 128;
709 rt->rt6i_flags |= RTF_CACHE;
710 rt->dst.flags |= DST_HOST;
711
712 #ifdef CONFIG_IPV6_SUBTREES
713 if (rt->rt6i_src.plen && saddr) {
714 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
715 rt->rt6i_src.plen = 128;
716 }
717 #endif
718
719 retry:
720 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
721 if (IS_ERR(neigh)) {
722 struct net *net = dev_net(rt->rt6i_dev);
723 int saved_rt_min_interval =
724 net->ipv6.sysctl.ip6_rt_gc_min_interval;
725 int saved_rt_elasticity =
726 net->ipv6.sysctl.ip6_rt_gc_elasticity;
727
728 if (attempts-- > 0) {
729 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
730 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
731
732 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
733
734 net->ipv6.sysctl.ip6_rt_gc_elasticity =
735 saved_rt_elasticity;
736 net->ipv6.sysctl.ip6_rt_gc_min_interval =
737 saved_rt_min_interval;
738 goto retry;
739 }
740
741 if (net_ratelimit())
742 printk(KERN_WARNING
743 "ipv6: Neighbour table overflow.\n");
744 dst_free(&rt->dst);
745 return NULL;
746 }
747 rt->rt6i_nexthop = neigh;
748
749 }
750
751 return rt;
752 }
753
754 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
755 {
756 struct rt6_info *rt = ip6_rt_copy(ort);
757 if (rt) {
758 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
759 rt->rt6i_dst.plen = 128;
760 rt->rt6i_flags |= RTF_CACHE;
761 rt->dst.flags |= DST_HOST;
762 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
763 }
764 return rt;
765 }
766
767 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
768 struct flowi6 *fl6, int flags)
769 {
770 struct fib6_node *fn;
771 struct rt6_info *rt, *nrt;
772 int strict = 0;
773 int attempts = 3;
774 int err;
775 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
776
777 strict |= flags & RT6_LOOKUP_F_IFACE;
778
779 relookup:
780 read_lock_bh(&table->tb6_lock);
781
782 restart_2:
783 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
784
785 restart:
786 rt = rt6_select(fn, oif, strict | reachable);
787
788 BACKTRACK(net, &fl6->saddr);
789 if (rt == net->ipv6.ip6_null_entry ||
790 rt->rt6i_flags & RTF_CACHE)
791 goto out;
792
793 dst_hold(&rt->dst);
794 read_unlock_bh(&table->tb6_lock);
795
796 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
797 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
798 else if (!(rt->dst.flags & DST_HOST))
799 nrt = rt6_alloc_clone(rt, &fl6->daddr);
800 else
801 goto out2;
802
803 dst_release(&rt->dst);
804 rt = nrt ? : net->ipv6.ip6_null_entry;
805
806 dst_hold(&rt->dst);
807 if (nrt) {
808 err = ip6_ins_rt(nrt);
809 if (!err)
810 goto out2;
811 }
812
813 if (--attempts <= 0)
814 goto out2;
815
816 /*
817 * Race condition! In the gap, when table->tb6_lock was
818 * released someone could insert this route. Relookup.
819 */
820 dst_release(&rt->dst);
821 goto relookup;
822
823 out:
824 if (reachable) {
825 reachable = 0;
826 goto restart_2;
827 }
828 dst_hold(&rt->dst);
829 read_unlock_bh(&table->tb6_lock);
830 out2:
831 rt->dst.lastuse = jiffies;
832 rt->dst.__use++;
833
834 return rt;
835 }
836
837 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
838 struct flowi6 *fl6, int flags)
839 {
840 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
841 }
842
843 void ip6_route_input(struct sk_buff *skb)
844 {
845 const struct ipv6hdr *iph = ipv6_hdr(skb);
846 struct net *net = dev_net(skb->dev);
847 int flags = RT6_LOOKUP_F_HAS_SADDR;
848 struct flowi6 fl6 = {
849 .flowi6_iif = skb->dev->ifindex,
850 .daddr = iph->daddr,
851 .saddr = iph->saddr,
852 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
853 .flowi6_mark = skb->mark,
854 .flowi6_proto = iph->nexthdr,
855 };
856
857 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
858 flags |= RT6_LOOKUP_F_IFACE;
859
860 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
861 }
862
863 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
864 struct flowi6 *fl6, int flags)
865 {
866 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
867 }
868
869 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
870 struct flowi6 *fl6)
871 {
872 int flags = 0;
873
874 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
875 flags |= RT6_LOOKUP_F_IFACE;
876
877 if (!ipv6_addr_any(&fl6->saddr))
878 flags |= RT6_LOOKUP_F_HAS_SADDR;
879 else if (sk)
880 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
881
882 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
883 }
884
885 EXPORT_SYMBOL(ip6_route_output);
886
887 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
888 {
889 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
890 struct dst_entry *new = NULL;
891
892 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
893 if (rt) {
894 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
895
896 new = &rt->dst;
897
898 new->__use = 1;
899 new->input = dst_discard;
900 new->output = dst_discard;
901
902 dst_copy_metrics(new, &ort->dst);
903 rt->rt6i_idev = ort->rt6i_idev;
904 if (rt->rt6i_idev)
905 in6_dev_hold(rt->rt6i_idev);
906 rt->rt6i_expires = 0;
907
908 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
909 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
910 rt->rt6i_metric = 0;
911
912 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
913 #ifdef CONFIG_IPV6_SUBTREES
914 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
915 #endif
916
917 dst_free(new);
918 }
919
920 dst_release(dst_orig);
921 return new ? new : ERR_PTR(-ENOMEM);
922 }
923
924 /*
925 * Destination cache support functions
926 */
927
928 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
929 {
930 struct rt6_info *rt;
931
932 rt = (struct rt6_info *) dst;
933
934 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
935 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
936 if (!rt->rt6i_peer)
937 rt6_bind_peer(rt, 0);
938 rt->rt6i_peer_genid = rt6_peer_genid();
939 }
940 return dst;
941 }
942 return NULL;
943 }
944
945 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
946 {
947 struct rt6_info *rt = (struct rt6_info *) dst;
948
949 if (rt) {
950 if (rt->rt6i_flags & RTF_CACHE) {
951 if (rt6_check_expired(rt)) {
952 ip6_del_rt(rt);
953 dst = NULL;
954 }
955 } else {
956 dst_release(dst);
957 dst = NULL;
958 }
959 }
960 return dst;
961 }
962
963 static void ip6_link_failure(struct sk_buff *skb)
964 {
965 struct rt6_info *rt;
966
967 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
968
969 rt = (struct rt6_info *) skb_dst(skb);
970 if (rt) {
971 if (rt->rt6i_flags&RTF_CACHE) {
972 dst_set_expires(&rt->dst, 0);
973 rt->rt6i_flags |= RTF_EXPIRES;
974 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
975 rt->rt6i_node->fn_sernum = -1;
976 }
977 }
978
979 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
980 {
981 struct rt6_info *rt6 = (struct rt6_info*)dst;
982
983 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
984 rt6->rt6i_flags |= RTF_MODIFIED;
985 if (mtu < IPV6_MIN_MTU) {
986 u32 features = dst_metric(dst, RTAX_FEATURES);
987 mtu = IPV6_MIN_MTU;
988 features |= RTAX_FEATURE_ALLFRAG;
989 dst_metric_set(dst, RTAX_FEATURES, features);
990 }
991 dst_metric_set(dst, RTAX_MTU, mtu);
992 }
993 }
994
995 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
996 {
997 struct net_device *dev = dst->dev;
998 unsigned int mtu = dst_mtu(dst);
999 struct net *net = dev_net(dev);
1000
1001 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1002
1003 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1004 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1005
1006 /*
1007 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1008 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1009 * IPV6_MAXPLEN is also valid and means: "any MSS,
1010 * rely only on pmtu discovery"
1011 */
1012 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1013 mtu = IPV6_MAXPLEN;
1014 return mtu;
1015 }
1016
1017 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1018 {
1019 unsigned int mtu = IPV6_MIN_MTU;
1020 struct inet6_dev *idev;
1021
1022 rcu_read_lock();
1023 idev = __in6_dev_get(dst->dev);
1024 if (idev)
1025 mtu = idev->cnf.mtu6;
1026 rcu_read_unlock();
1027
1028 return mtu;
1029 }
1030
1031 static struct dst_entry *icmp6_dst_gc_list;
1032 static DEFINE_SPINLOCK(icmp6_dst_lock);
1033
1034 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1035 struct neighbour *neigh,
1036 const struct in6_addr *addr)
1037 {
1038 struct rt6_info *rt;
1039 struct inet6_dev *idev = in6_dev_get(dev);
1040 struct net *net = dev_net(dev);
1041
1042 if (unlikely(idev == NULL))
1043 return NULL;
1044
1045 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
1046 if (unlikely(rt == NULL)) {
1047 in6_dev_put(idev);
1048 goto out;
1049 }
1050
1051 if (neigh)
1052 neigh_hold(neigh);
1053 else {
1054 neigh = ndisc_get_neigh(dev, addr);
1055 if (IS_ERR(neigh))
1056 neigh = NULL;
1057 }
1058
1059 rt->rt6i_idev = idev;
1060 rt->rt6i_nexthop = neigh;
1061 atomic_set(&rt->dst.__refcnt, 1);
1062 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1063 rt->dst.output = ip6_output;
1064
1065 spin_lock_bh(&icmp6_dst_lock);
1066 rt->dst.next = icmp6_dst_gc_list;
1067 icmp6_dst_gc_list = &rt->dst;
1068 spin_unlock_bh(&icmp6_dst_lock);
1069
1070 fib6_force_start_gc(net);
1071
1072 out:
1073 return &rt->dst;
1074 }
1075
1076 int icmp6_dst_gc(void)
1077 {
1078 struct dst_entry *dst, **pprev;
1079 int more = 0;
1080
1081 spin_lock_bh(&icmp6_dst_lock);
1082 pprev = &icmp6_dst_gc_list;
1083
1084 while ((dst = *pprev) != NULL) {
1085 if (!atomic_read(&dst->__refcnt)) {
1086 *pprev = dst->next;
1087 dst_free(dst);
1088 } else {
1089 pprev = &dst->next;
1090 ++more;
1091 }
1092 }
1093
1094 spin_unlock_bh(&icmp6_dst_lock);
1095
1096 return more;
1097 }
1098
1099 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1100 void *arg)
1101 {
1102 struct dst_entry *dst, **pprev;
1103
1104 spin_lock_bh(&icmp6_dst_lock);
1105 pprev = &icmp6_dst_gc_list;
1106 while ((dst = *pprev) != NULL) {
1107 struct rt6_info *rt = (struct rt6_info *) dst;
1108 if (func(rt, arg)) {
1109 *pprev = dst->next;
1110 dst_free(dst);
1111 } else {
1112 pprev = &dst->next;
1113 }
1114 }
1115 spin_unlock_bh(&icmp6_dst_lock);
1116 }
1117
1118 static int ip6_dst_gc(struct dst_ops *ops)
1119 {
1120 unsigned long now = jiffies;
1121 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1122 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1123 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1124 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1125 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1126 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1127 int entries;
1128
1129 entries = dst_entries_get_fast(ops);
1130 if (time_after(rt_last_gc + rt_min_interval, now) &&
1131 entries <= rt_max_size)
1132 goto out;
1133
1134 net->ipv6.ip6_rt_gc_expire++;
1135 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1136 net->ipv6.ip6_rt_last_gc = now;
1137 entries = dst_entries_get_slow(ops);
1138 if (entries < ops->gc_thresh)
1139 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1140 out:
1141 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1142 return entries > rt_max_size;
1143 }
1144
1145 /* Clean host part of a prefix. Not necessary in radix tree,
1146 but results in cleaner routing tables.
1147
1148 Remove it only when all the things will work!
1149 */
1150
1151 int ip6_dst_hoplimit(struct dst_entry *dst)
1152 {
1153 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1154 if (hoplimit == 0) {
1155 struct net_device *dev = dst->dev;
1156 struct inet6_dev *idev;
1157
1158 rcu_read_lock();
1159 idev = __in6_dev_get(dev);
1160 if (idev)
1161 hoplimit = idev->cnf.hop_limit;
1162 else
1163 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1164 rcu_read_unlock();
1165 }
1166 return hoplimit;
1167 }
1168 EXPORT_SYMBOL(ip6_dst_hoplimit);
1169
1170 /*
1171 *
1172 */
1173
1174 int ip6_route_add(struct fib6_config *cfg)
1175 {
1176 int err;
1177 struct net *net = cfg->fc_nlinfo.nl_net;
1178 struct rt6_info *rt = NULL;
1179 struct net_device *dev = NULL;
1180 struct inet6_dev *idev = NULL;
1181 struct fib6_table *table;
1182 int addr_type;
1183
1184 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1185 return -EINVAL;
1186 #ifndef CONFIG_IPV6_SUBTREES
1187 if (cfg->fc_src_len)
1188 return -EINVAL;
1189 #endif
1190 if (cfg->fc_ifindex) {
1191 err = -ENODEV;
1192 dev = dev_get_by_index(net, cfg->fc_ifindex);
1193 if (!dev)
1194 goto out;
1195 idev = in6_dev_get(dev);
1196 if (!idev)
1197 goto out;
1198 }
1199
1200 if (cfg->fc_metric == 0)
1201 cfg->fc_metric = IP6_RT_PRIO_USER;
1202
1203 table = fib6_new_table(net, cfg->fc_table);
1204 if (table == NULL) {
1205 err = -ENOBUFS;
1206 goto out;
1207 }
1208
1209 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
1210
1211 if (rt == NULL) {
1212 err = -ENOMEM;
1213 goto out;
1214 }
1215
1216 rt->dst.obsolete = -1;
1217 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1218 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1219 0;
1220
1221 if (cfg->fc_protocol == RTPROT_UNSPEC)
1222 cfg->fc_protocol = RTPROT_BOOT;
1223 rt->rt6i_protocol = cfg->fc_protocol;
1224
1225 addr_type = ipv6_addr_type(&cfg->fc_dst);
1226
1227 if (addr_type & IPV6_ADDR_MULTICAST)
1228 rt->dst.input = ip6_mc_input;
1229 else if (cfg->fc_flags & RTF_LOCAL)
1230 rt->dst.input = ip6_input;
1231 else
1232 rt->dst.input = ip6_forward;
1233
1234 rt->dst.output = ip6_output;
1235
1236 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1237 rt->rt6i_dst.plen = cfg->fc_dst_len;
1238 if (rt->rt6i_dst.plen == 128)
1239 rt->dst.flags |= DST_HOST;
1240
1241 #ifdef CONFIG_IPV6_SUBTREES
1242 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1243 rt->rt6i_src.plen = cfg->fc_src_len;
1244 #endif
1245
1246 rt->rt6i_metric = cfg->fc_metric;
1247
1248 /* We cannot add true routes via loopback here,
1249 they would result in kernel looping; promote them to reject routes
1250 */
1251 if ((cfg->fc_flags & RTF_REJECT) ||
1252 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1253 && !(cfg->fc_flags&RTF_LOCAL))) {
1254 /* hold loopback dev/idev if we haven't done so. */
1255 if (dev != net->loopback_dev) {
1256 if (dev) {
1257 dev_put(dev);
1258 in6_dev_put(idev);
1259 }
1260 dev = net->loopback_dev;
1261 dev_hold(dev);
1262 idev = in6_dev_get(dev);
1263 if (!idev) {
1264 err = -ENODEV;
1265 goto out;
1266 }
1267 }
1268 rt->dst.output = ip6_pkt_discard_out;
1269 rt->dst.input = ip6_pkt_discard;
1270 rt->dst.error = -ENETUNREACH;
1271 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1272 goto install_route;
1273 }
1274
1275 if (cfg->fc_flags & RTF_GATEWAY) {
1276 const struct in6_addr *gw_addr;
1277 int gwa_type;
1278
1279 gw_addr = &cfg->fc_gateway;
1280 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1281 gwa_type = ipv6_addr_type(gw_addr);
1282
1283 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1284 struct rt6_info *grt;
1285
1286 /* IPv6 strictly inhibits using not link-local
1287 addresses as nexthop address.
1288 Otherwise, router will not able to send redirects.
1289 It is very good, but in some (rare!) circumstances
1290 (SIT, PtP, NBMA NOARP links) it is handy to allow
1291 some exceptions. --ANK
1292 */
1293 err = -EINVAL;
1294 if (!(gwa_type&IPV6_ADDR_UNICAST))
1295 goto out;
1296
1297 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1298
1299 err = -EHOSTUNREACH;
1300 if (grt == NULL)
1301 goto out;
1302 if (dev) {
1303 if (dev != grt->rt6i_dev) {
1304 dst_release(&grt->dst);
1305 goto out;
1306 }
1307 } else {
1308 dev = grt->rt6i_dev;
1309 idev = grt->rt6i_idev;
1310 dev_hold(dev);
1311 in6_dev_hold(grt->rt6i_idev);
1312 }
1313 if (!(grt->rt6i_flags&RTF_GATEWAY))
1314 err = 0;
1315 dst_release(&grt->dst);
1316
1317 if (err)
1318 goto out;
1319 }
1320 err = -EINVAL;
1321 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1322 goto out;
1323 }
1324
1325 err = -ENODEV;
1326 if (dev == NULL)
1327 goto out;
1328
1329 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1330 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1331 err = -EINVAL;
1332 goto out;
1333 }
1334 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1335 rt->rt6i_prefsrc.plen = 128;
1336 } else
1337 rt->rt6i_prefsrc.plen = 0;
1338
1339 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1340 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1341 if (IS_ERR(rt->rt6i_nexthop)) {
1342 err = PTR_ERR(rt->rt6i_nexthop);
1343 rt->rt6i_nexthop = NULL;
1344 goto out;
1345 }
1346 }
1347
1348 rt->rt6i_flags = cfg->fc_flags;
1349
1350 install_route:
1351 if (cfg->fc_mx) {
1352 struct nlattr *nla;
1353 int remaining;
1354
1355 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1356 int type = nla_type(nla);
1357
1358 if (type) {
1359 if (type > RTAX_MAX) {
1360 err = -EINVAL;
1361 goto out;
1362 }
1363
1364 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1365 }
1366 }
1367 }
1368
1369 rt->dst.dev = dev;
1370 rt->rt6i_idev = idev;
1371 rt->rt6i_table = table;
1372
1373 cfg->fc_nlinfo.nl_net = dev_net(dev);
1374
1375 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1376
1377 out:
1378 if (dev)
1379 dev_put(dev);
1380 if (idev)
1381 in6_dev_put(idev);
1382 if (rt)
1383 dst_free(&rt->dst);
1384 return err;
1385 }
1386
1387 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1388 {
1389 int err;
1390 struct fib6_table *table;
1391 struct net *net = dev_net(rt->rt6i_dev);
1392
1393 if (rt == net->ipv6.ip6_null_entry)
1394 return -ENOENT;
1395
1396 table = rt->rt6i_table;
1397 write_lock_bh(&table->tb6_lock);
1398
1399 err = fib6_del(rt, info);
1400 dst_release(&rt->dst);
1401
1402 write_unlock_bh(&table->tb6_lock);
1403
1404 return err;
1405 }
1406
1407 int ip6_del_rt(struct rt6_info *rt)
1408 {
1409 struct nl_info info = {
1410 .nl_net = dev_net(rt->rt6i_dev),
1411 };
1412 return __ip6_del_rt(rt, &info);
1413 }
1414
1415 static int ip6_route_del(struct fib6_config *cfg)
1416 {
1417 struct fib6_table *table;
1418 struct fib6_node *fn;
1419 struct rt6_info *rt;
1420 int err = -ESRCH;
1421
1422 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1423 if (table == NULL)
1424 return err;
1425
1426 read_lock_bh(&table->tb6_lock);
1427
1428 fn = fib6_locate(&table->tb6_root,
1429 &cfg->fc_dst, cfg->fc_dst_len,
1430 &cfg->fc_src, cfg->fc_src_len);
1431
1432 if (fn) {
1433 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1434 if (cfg->fc_ifindex &&
1435 (rt->rt6i_dev == NULL ||
1436 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1437 continue;
1438 if (cfg->fc_flags & RTF_GATEWAY &&
1439 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1440 continue;
1441 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1442 continue;
1443 dst_hold(&rt->dst);
1444 read_unlock_bh(&table->tb6_lock);
1445
1446 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1447 }
1448 }
1449 read_unlock_bh(&table->tb6_lock);
1450
1451 return err;
1452 }
1453
1454 /*
1455 * Handle redirects
1456 */
1457 struct ip6rd_flowi {
1458 struct flowi6 fl6;
1459 struct in6_addr gateway;
1460 };
1461
1462 static struct rt6_info *__ip6_route_redirect(struct net *net,
1463 struct fib6_table *table,
1464 struct flowi6 *fl6,
1465 int flags)
1466 {
1467 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1468 struct rt6_info *rt;
1469 struct fib6_node *fn;
1470
1471 /*
1472 * Get the "current" route for this destination and
1473 * check if the redirect has come from approriate router.
1474 *
1475 * RFC 2461 specifies that redirects should only be
1476 * accepted if they come from the nexthop to the target.
1477 * Due to the way the routes are chosen, this notion
1478 * is a bit fuzzy and one might need to check all possible
1479 * routes.
1480 */
1481
1482 read_lock_bh(&table->tb6_lock);
1483 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1484 restart:
1485 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1486 /*
1487 * Current route is on-link; redirect is always invalid.
1488 *
1489 * Seems, previous statement is not true. It could
1490 * be node, which looks for us as on-link (f.e. proxy ndisc)
1491 * But then router serving it might decide, that we should
1492 * know truth 8)8) --ANK (980726).
1493 */
1494 if (rt6_check_expired(rt))
1495 continue;
1496 if (!(rt->rt6i_flags & RTF_GATEWAY))
1497 continue;
1498 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1499 continue;
1500 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1501 continue;
1502 break;
1503 }
1504
1505 if (!rt)
1506 rt = net->ipv6.ip6_null_entry;
1507 BACKTRACK(net, &fl6->saddr);
1508 out:
1509 dst_hold(&rt->dst);
1510
1511 read_unlock_bh(&table->tb6_lock);
1512
1513 return rt;
1514 };
1515
1516 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1517 const struct in6_addr *src,
1518 const struct in6_addr *gateway,
1519 struct net_device *dev)
1520 {
1521 int flags = RT6_LOOKUP_F_HAS_SADDR;
1522 struct net *net = dev_net(dev);
1523 struct ip6rd_flowi rdfl = {
1524 .fl6 = {
1525 .flowi6_oif = dev->ifindex,
1526 .daddr = *dest,
1527 .saddr = *src,
1528 },
1529 };
1530
1531 ipv6_addr_copy(&rdfl.gateway, gateway);
1532
1533 if (rt6_need_strict(dest))
1534 flags |= RT6_LOOKUP_F_IFACE;
1535
1536 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1537 flags, __ip6_route_redirect);
1538 }
1539
1540 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1541 const struct in6_addr *saddr,
1542 struct neighbour *neigh, u8 *lladdr, int on_link)
1543 {
1544 struct rt6_info *rt, *nrt = NULL;
1545 struct netevent_redirect netevent;
1546 struct net *net = dev_net(neigh->dev);
1547
1548 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1549
1550 if (rt == net->ipv6.ip6_null_entry) {
1551 if (net_ratelimit())
1552 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1553 "for redirect target\n");
1554 goto out;
1555 }
1556
1557 /*
1558 * We have finally decided to accept it.
1559 */
1560
1561 neigh_update(neigh, lladdr, NUD_STALE,
1562 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1563 NEIGH_UPDATE_F_OVERRIDE|
1564 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1565 NEIGH_UPDATE_F_ISROUTER))
1566 );
1567
1568 /*
1569 * Redirect received -> path was valid.
1570 * Look, redirects are sent only in response to data packets,
1571 * so that this nexthop apparently is reachable. --ANK
1572 */
1573 dst_confirm(&rt->dst);
1574
1575 /* Duplicate redirect: silently ignore. */
1576 if (neigh == rt->dst.neighbour)
1577 goto out;
1578
1579 nrt = ip6_rt_copy(rt);
1580 if (nrt == NULL)
1581 goto out;
1582
1583 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1584 if (on_link)
1585 nrt->rt6i_flags &= ~RTF_GATEWAY;
1586
1587 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1588 nrt->rt6i_dst.plen = 128;
1589 nrt->dst.flags |= DST_HOST;
1590
1591 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1592 nrt->rt6i_nexthop = neigh_clone(neigh);
1593
1594 if (ip6_ins_rt(nrt))
1595 goto out;
1596
1597 netevent.old = &rt->dst;
1598 netevent.new = &nrt->dst;
1599 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1600
1601 if (rt->rt6i_flags&RTF_CACHE) {
1602 ip6_del_rt(rt);
1603 return;
1604 }
1605
1606 out:
1607 dst_release(&rt->dst);
1608 }
1609
1610 /*
1611 * Handle ICMP "packet too big" messages
1612 * i.e. Path MTU discovery
1613 */
1614
1615 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1616 struct net *net, u32 pmtu, int ifindex)
1617 {
1618 struct rt6_info *rt, *nrt;
1619 int allfrag = 0;
1620 again:
1621 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1622 if (rt == NULL)
1623 return;
1624
1625 if (rt6_check_expired(rt)) {
1626 ip6_del_rt(rt);
1627 goto again;
1628 }
1629
1630 if (pmtu >= dst_mtu(&rt->dst))
1631 goto out;
1632
1633 if (pmtu < IPV6_MIN_MTU) {
1634 /*
1635 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1636 * MTU (1280) and a fragment header should always be included
1637 * after a node receiving Too Big message reporting PMTU is
1638 * less than the IPv6 Minimum Link MTU.
1639 */
1640 pmtu = IPV6_MIN_MTU;
1641 allfrag = 1;
1642 }
1643
1644 /* New mtu received -> path was valid.
1645 They are sent only in response to data packets,
1646 so that this nexthop apparently is reachable. --ANK
1647 */
1648 dst_confirm(&rt->dst);
1649
1650 /* Host route. If it is static, it would be better
1651 not to override it, but add new one, so that
1652 when cache entry will expire old pmtu
1653 would return automatically.
1654 */
1655 if (rt->rt6i_flags & RTF_CACHE) {
1656 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1657 if (allfrag) {
1658 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1659 features |= RTAX_FEATURE_ALLFRAG;
1660 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1661 }
1662 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1663 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1664 goto out;
1665 }
1666
1667 /* Network route.
1668 Two cases are possible:
1669 1. It is connected route. Action: COW
1670 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1671 */
1672 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1673 nrt = rt6_alloc_cow(rt, daddr, saddr);
1674 else
1675 nrt = rt6_alloc_clone(rt, daddr);
1676
1677 if (nrt) {
1678 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1679 if (allfrag) {
1680 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1681 features |= RTAX_FEATURE_ALLFRAG;
1682 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1683 }
1684
1685 /* According to RFC 1981, detecting PMTU increase shouldn't be
1686 * happened within 5 mins, the recommended timer is 10 mins.
1687 * Here this route expiration time is set to ip6_rt_mtu_expires
1688 * which is 10 mins. After 10 mins the decreased pmtu is expired
1689 * and detecting PMTU increase will be automatically happened.
1690 */
1691 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1692 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1693
1694 ip6_ins_rt(nrt);
1695 }
1696 out:
1697 dst_release(&rt->dst);
1698 }
1699
1700 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1701 struct net_device *dev, u32 pmtu)
1702 {
1703 struct net *net = dev_net(dev);
1704
1705 /*
1706 * RFC 1981 states that a node "MUST reduce the size of the packets it
1707 * is sending along the path" that caused the Packet Too Big message.
1708 * Since it's not possible in the general case to determine which
1709 * interface was used to send the original packet, we update the MTU
1710 * on the interface that will be used to send future packets. We also
1711 * update the MTU on the interface that received the Packet Too Big in
1712 * case the original packet was forced out that interface with
1713 * SO_BINDTODEVICE or similar. This is the next best thing to the
1714 * correct behaviour, which would be to update the MTU on all
1715 * interfaces.
1716 */
1717 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1718 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1719 }
1720
1721 /*
1722 * Misc support functions
1723 */
1724
1725 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1726 {
1727 struct net *net = dev_net(ort->rt6i_dev);
1728 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1729 ort->dst.dev);
1730
1731 if (rt) {
1732 rt->dst.input = ort->dst.input;
1733 rt->dst.output = ort->dst.output;
1734
1735 dst_copy_metrics(&rt->dst, &ort->dst);
1736 rt->dst.error = ort->dst.error;
1737 rt->rt6i_idev = ort->rt6i_idev;
1738 if (rt->rt6i_idev)
1739 in6_dev_hold(rt->rt6i_idev);
1740 rt->dst.lastuse = jiffies;
1741 rt->rt6i_expires = 0;
1742
1743 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1744 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1745 rt->rt6i_metric = 0;
1746
1747 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1748 #ifdef CONFIG_IPV6_SUBTREES
1749 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1750 #endif
1751 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1752 rt->rt6i_table = ort->rt6i_table;
1753 }
1754 return rt;
1755 }
1756
1757 #ifdef CONFIG_IPV6_ROUTE_INFO
1758 static struct rt6_info *rt6_get_route_info(struct net *net,
1759 const struct in6_addr *prefix, int prefixlen,
1760 const struct in6_addr *gwaddr, int ifindex)
1761 {
1762 struct fib6_node *fn;
1763 struct rt6_info *rt = NULL;
1764 struct fib6_table *table;
1765
1766 table = fib6_get_table(net, RT6_TABLE_INFO);
1767 if (table == NULL)
1768 return NULL;
1769
1770 write_lock_bh(&table->tb6_lock);
1771 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1772 if (!fn)
1773 goto out;
1774
1775 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1776 if (rt->rt6i_dev->ifindex != ifindex)
1777 continue;
1778 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1779 continue;
1780 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1781 continue;
1782 dst_hold(&rt->dst);
1783 break;
1784 }
1785 out:
1786 write_unlock_bh(&table->tb6_lock);
1787 return rt;
1788 }
1789
1790 static struct rt6_info *rt6_add_route_info(struct net *net,
1791 const struct in6_addr *prefix, int prefixlen,
1792 const struct in6_addr *gwaddr, int ifindex,
1793 unsigned pref)
1794 {
1795 struct fib6_config cfg = {
1796 .fc_table = RT6_TABLE_INFO,
1797 .fc_metric = IP6_RT_PRIO_USER,
1798 .fc_ifindex = ifindex,
1799 .fc_dst_len = prefixlen,
1800 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1801 RTF_UP | RTF_PREF(pref),
1802 .fc_nlinfo.pid = 0,
1803 .fc_nlinfo.nlh = NULL,
1804 .fc_nlinfo.nl_net = net,
1805 };
1806
1807 ipv6_addr_copy(&cfg.fc_dst, prefix);
1808 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1809
1810 /* We should treat it as a default route if prefix length is 0. */
1811 if (!prefixlen)
1812 cfg.fc_flags |= RTF_DEFAULT;
1813
1814 ip6_route_add(&cfg);
1815
1816 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1817 }
1818 #endif
1819
1820 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1821 {
1822 struct rt6_info *rt;
1823 struct fib6_table *table;
1824
1825 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1826 if (table == NULL)
1827 return NULL;
1828
1829 write_lock_bh(&table->tb6_lock);
1830 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1831 if (dev == rt->rt6i_dev &&
1832 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1833 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1834 break;
1835 }
1836 if (rt)
1837 dst_hold(&rt->dst);
1838 write_unlock_bh(&table->tb6_lock);
1839 return rt;
1840 }
1841
1842 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1843 struct net_device *dev,
1844 unsigned int pref)
1845 {
1846 struct fib6_config cfg = {
1847 .fc_table = RT6_TABLE_DFLT,
1848 .fc_metric = IP6_RT_PRIO_USER,
1849 .fc_ifindex = dev->ifindex,
1850 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1851 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1852 .fc_nlinfo.pid = 0,
1853 .fc_nlinfo.nlh = NULL,
1854 .fc_nlinfo.nl_net = dev_net(dev),
1855 };
1856
1857 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1858
1859 ip6_route_add(&cfg);
1860
1861 return rt6_get_dflt_router(gwaddr, dev);
1862 }
1863
1864 void rt6_purge_dflt_routers(struct net *net)
1865 {
1866 struct rt6_info *rt;
1867 struct fib6_table *table;
1868
1869 /* NOTE: Keep consistent with rt6_get_dflt_router */
1870 table = fib6_get_table(net, RT6_TABLE_DFLT);
1871 if (table == NULL)
1872 return;
1873
1874 restart:
1875 read_lock_bh(&table->tb6_lock);
1876 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1877 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1878 dst_hold(&rt->dst);
1879 read_unlock_bh(&table->tb6_lock);
1880 ip6_del_rt(rt);
1881 goto restart;
1882 }
1883 }
1884 read_unlock_bh(&table->tb6_lock);
1885 }
1886
1887 static void rtmsg_to_fib6_config(struct net *net,
1888 struct in6_rtmsg *rtmsg,
1889 struct fib6_config *cfg)
1890 {
1891 memset(cfg, 0, sizeof(*cfg));
1892
1893 cfg->fc_table = RT6_TABLE_MAIN;
1894 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1895 cfg->fc_metric = rtmsg->rtmsg_metric;
1896 cfg->fc_expires = rtmsg->rtmsg_info;
1897 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1898 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1899 cfg->fc_flags = rtmsg->rtmsg_flags;
1900
1901 cfg->fc_nlinfo.nl_net = net;
1902
1903 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1904 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1905 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1906 }
1907
1908 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1909 {
1910 struct fib6_config cfg;
1911 struct in6_rtmsg rtmsg;
1912 int err;
1913
1914 switch(cmd) {
1915 case SIOCADDRT: /* Add a route */
1916 case SIOCDELRT: /* Delete a route */
1917 if (!capable(CAP_NET_ADMIN))
1918 return -EPERM;
1919 err = copy_from_user(&rtmsg, arg,
1920 sizeof(struct in6_rtmsg));
1921 if (err)
1922 return -EFAULT;
1923
1924 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1925
1926 rtnl_lock();
1927 switch (cmd) {
1928 case SIOCADDRT:
1929 err = ip6_route_add(&cfg);
1930 break;
1931 case SIOCDELRT:
1932 err = ip6_route_del(&cfg);
1933 break;
1934 default:
1935 err = -EINVAL;
1936 }
1937 rtnl_unlock();
1938
1939 return err;
1940 }
1941
1942 return -EINVAL;
1943 }
1944
1945 /*
1946 * Drop the packet on the floor
1947 */
1948
1949 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1950 {
1951 int type;
1952 struct dst_entry *dst = skb_dst(skb);
1953 switch (ipstats_mib_noroutes) {
1954 case IPSTATS_MIB_INNOROUTES:
1955 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1956 if (type == IPV6_ADDR_ANY) {
1957 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1958 IPSTATS_MIB_INADDRERRORS);
1959 break;
1960 }
1961 /* FALLTHROUGH */
1962 case IPSTATS_MIB_OUTNOROUTES:
1963 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1964 ipstats_mib_noroutes);
1965 break;
1966 }
1967 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1968 kfree_skb(skb);
1969 return 0;
1970 }
1971
1972 static int ip6_pkt_discard(struct sk_buff *skb)
1973 {
1974 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1975 }
1976
1977 static int ip6_pkt_discard_out(struct sk_buff *skb)
1978 {
1979 skb->dev = skb_dst(skb)->dev;
1980 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1981 }
1982
1983 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1984
1985 static int ip6_pkt_prohibit(struct sk_buff *skb)
1986 {
1987 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1988 }
1989
1990 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1991 {
1992 skb->dev = skb_dst(skb)->dev;
1993 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1994 }
1995
1996 #endif
1997
1998 /*
1999 * Allocate a dst for local (unicast / anycast) address.
2000 */
2001
2002 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2003 const struct in6_addr *addr,
2004 int anycast)
2005 {
2006 struct net *net = dev_net(idev->dev);
2007 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2008 net->loopback_dev);
2009 struct neighbour *neigh;
2010
2011 if (rt == NULL) {
2012 if (net_ratelimit())
2013 pr_warning("IPv6: Maximum number of routes reached,"
2014 " consider increasing route/max_size.\n");
2015 return ERR_PTR(-ENOMEM);
2016 }
2017
2018 in6_dev_hold(idev);
2019
2020 rt->dst.flags |= DST_HOST;
2021 rt->dst.input = ip6_input;
2022 rt->dst.output = ip6_output;
2023 rt->rt6i_idev = idev;
2024 rt->dst.obsolete = -1;
2025
2026 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2027 if (anycast)
2028 rt->rt6i_flags |= RTF_ANYCAST;
2029 else
2030 rt->rt6i_flags |= RTF_LOCAL;
2031 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2032 if (IS_ERR(neigh)) {
2033 dst_free(&rt->dst);
2034
2035 return ERR_CAST(neigh);
2036 }
2037 rt->rt6i_nexthop = neigh;
2038
2039 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2040 rt->rt6i_dst.plen = 128;
2041 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2042
2043 atomic_set(&rt->dst.__refcnt, 1);
2044
2045 return rt;
2046 }
2047
2048 int ip6_route_get_saddr(struct net *net,
2049 struct rt6_info *rt,
2050 const struct in6_addr *daddr,
2051 unsigned int prefs,
2052 struct in6_addr *saddr)
2053 {
2054 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2055 int err = 0;
2056 if (rt->rt6i_prefsrc.plen)
2057 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2058 else
2059 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2060 daddr, prefs, saddr);
2061 return err;
2062 }
2063
2064 /* remove deleted ip from prefsrc entries */
2065 struct arg_dev_net_ip {
2066 struct net_device *dev;
2067 struct net *net;
2068 struct in6_addr *addr;
2069 };
2070
2071 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2072 {
2073 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2074 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2075 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2076
2077 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2078 rt != net->ipv6.ip6_null_entry &&
2079 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2080 /* remove prefsrc entry */
2081 rt->rt6i_prefsrc.plen = 0;
2082 }
2083 return 0;
2084 }
2085
2086 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2087 {
2088 struct net *net = dev_net(ifp->idev->dev);
2089 struct arg_dev_net_ip adni = {
2090 .dev = ifp->idev->dev,
2091 .net = net,
2092 .addr = &ifp->addr,
2093 };
2094 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2095 }
2096
2097 struct arg_dev_net {
2098 struct net_device *dev;
2099 struct net *net;
2100 };
2101
2102 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2103 {
2104 const struct arg_dev_net *adn = arg;
2105 const struct net_device *dev = adn->dev;
2106
2107 if ((rt->rt6i_dev == dev || dev == NULL) &&
2108 rt != adn->net->ipv6.ip6_null_entry) {
2109 RT6_TRACE("deleted by ifdown %p\n", rt);
2110 return -1;
2111 }
2112 return 0;
2113 }
2114
2115 void rt6_ifdown(struct net *net, struct net_device *dev)
2116 {
2117 struct arg_dev_net adn = {
2118 .dev = dev,
2119 .net = net,
2120 };
2121
2122 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2123 icmp6_clean_all(fib6_ifdown, &adn);
2124 }
2125
2126 struct rt6_mtu_change_arg
2127 {
2128 struct net_device *dev;
2129 unsigned mtu;
2130 };
2131
2132 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2133 {
2134 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2135 struct inet6_dev *idev;
2136
2137 /* In IPv6 pmtu discovery is not optional,
2138 so that RTAX_MTU lock cannot disable it.
2139 We still use this lock to block changes
2140 caused by addrconf/ndisc.
2141 */
2142
2143 idev = __in6_dev_get(arg->dev);
2144 if (idev == NULL)
2145 return 0;
2146
2147 /* For administrative MTU increase, there is no way to discover
2148 IPv6 PMTU increase, so PMTU increase should be updated here.
2149 Since RFC 1981 doesn't include administrative MTU increase
2150 update PMTU increase is a MUST. (i.e. jumbo frame)
2151 */
2152 /*
2153 If new MTU is less than route PMTU, this new MTU will be the
2154 lowest MTU in the path, update the route PMTU to reflect PMTU
2155 decreases; if new MTU is greater than route PMTU, and the
2156 old MTU is the lowest MTU in the path, update the route PMTU
2157 to reflect the increase. In this case if the other nodes' MTU
2158 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2159 PMTU discouvery.
2160 */
2161 if (rt->rt6i_dev == arg->dev &&
2162 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2163 (dst_mtu(&rt->dst) >= arg->mtu ||
2164 (dst_mtu(&rt->dst) < arg->mtu &&
2165 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2166 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2167 }
2168 return 0;
2169 }
2170
2171 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2172 {
2173 struct rt6_mtu_change_arg arg = {
2174 .dev = dev,
2175 .mtu = mtu,
2176 };
2177
2178 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2179 }
2180
2181 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2182 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2183 [RTA_OIF] = { .type = NLA_U32 },
2184 [RTA_IIF] = { .type = NLA_U32 },
2185 [RTA_PRIORITY] = { .type = NLA_U32 },
2186 [RTA_METRICS] = { .type = NLA_NESTED },
2187 };
2188
2189 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2190 struct fib6_config *cfg)
2191 {
2192 struct rtmsg *rtm;
2193 struct nlattr *tb[RTA_MAX+1];
2194 int err;
2195
2196 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2197 if (err < 0)
2198 goto errout;
2199
2200 err = -EINVAL;
2201 rtm = nlmsg_data(nlh);
2202 memset(cfg, 0, sizeof(*cfg));
2203
2204 cfg->fc_table = rtm->rtm_table;
2205 cfg->fc_dst_len = rtm->rtm_dst_len;
2206 cfg->fc_src_len = rtm->rtm_src_len;
2207 cfg->fc_flags = RTF_UP;
2208 cfg->fc_protocol = rtm->rtm_protocol;
2209
2210 if (rtm->rtm_type == RTN_UNREACHABLE)
2211 cfg->fc_flags |= RTF_REJECT;
2212
2213 if (rtm->rtm_type == RTN_LOCAL)
2214 cfg->fc_flags |= RTF_LOCAL;
2215
2216 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2217 cfg->fc_nlinfo.nlh = nlh;
2218 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2219
2220 if (tb[RTA_GATEWAY]) {
2221 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2222 cfg->fc_flags |= RTF_GATEWAY;
2223 }
2224
2225 if (tb[RTA_DST]) {
2226 int plen = (rtm->rtm_dst_len + 7) >> 3;
2227
2228 if (nla_len(tb[RTA_DST]) < plen)
2229 goto errout;
2230
2231 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2232 }
2233
2234 if (tb[RTA_SRC]) {
2235 int plen = (rtm->rtm_src_len + 7) >> 3;
2236
2237 if (nla_len(tb[RTA_SRC]) < plen)
2238 goto errout;
2239
2240 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2241 }
2242
2243 if (tb[RTA_PREFSRC])
2244 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2245
2246 if (tb[RTA_OIF])
2247 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2248
2249 if (tb[RTA_PRIORITY])
2250 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2251
2252 if (tb[RTA_METRICS]) {
2253 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2254 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2255 }
2256
2257 if (tb[RTA_TABLE])
2258 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2259
2260 err = 0;
2261 errout:
2262 return err;
2263 }
2264
2265 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2266 {
2267 struct fib6_config cfg;
2268 int err;
2269
2270 err = rtm_to_fib6_config(skb, nlh, &cfg);
2271 if (err < 0)
2272 return err;
2273
2274 return ip6_route_del(&cfg);
2275 }
2276
2277 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2278 {
2279 struct fib6_config cfg;
2280 int err;
2281
2282 err = rtm_to_fib6_config(skb, nlh, &cfg);
2283 if (err < 0)
2284 return err;
2285
2286 return ip6_route_add(&cfg);
2287 }
2288
2289 static inline size_t rt6_nlmsg_size(void)
2290 {
2291 return NLMSG_ALIGN(sizeof(struct rtmsg))
2292 + nla_total_size(16) /* RTA_SRC */
2293 + nla_total_size(16) /* RTA_DST */
2294 + nla_total_size(16) /* RTA_GATEWAY */
2295 + nla_total_size(16) /* RTA_PREFSRC */
2296 + nla_total_size(4) /* RTA_TABLE */
2297 + nla_total_size(4) /* RTA_IIF */
2298 + nla_total_size(4) /* RTA_OIF */
2299 + nla_total_size(4) /* RTA_PRIORITY */
2300 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2301 + nla_total_size(sizeof(struct rta_cacheinfo));
2302 }
2303
2304 static int rt6_fill_node(struct net *net,
2305 struct sk_buff *skb, struct rt6_info *rt,
2306 struct in6_addr *dst, struct in6_addr *src,
2307 int iif, int type, u32 pid, u32 seq,
2308 int prefix, int nowait, unsigned int flags)
2309 {
2310 struct rtmsg *rtm;
2311 struct nlmsghdr *nlh;
2312 long expires;
2313 u32 table;
2314
2315 if (prefix) { /* user wants prefix routes only */
2316 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2317 /* success since this is not a prefix route */
2318 return 1;
2319 }
2320 }
2321
2322 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2323 if (nlh == NULL)
2324 return -EMSGSIZE;
2325
2326 rtm = nlmsg_data(nlh);
2327 rtm->rtm_family = AF_INET6;
2328 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2329 rtm->rtm_src_len = rt->rt6i_src.plen;
2330 rtm->rtm_tos = 0;
2331 if (rt->rt6i_table)
2332 table = rt->rt6i_table->tb6_id;
2333 else
2334 table = RT6_TABLE_UNSPEC;
2335 rtm->rtm_table = table;
2336 NLA_PUT_U32(skb, RTA_TABLE, table);
2337 if (rt->rt6i_flags&RTF_REJECT)
2338 rtm->rtm_type = RTN_UNREACHABLE;
2339 else if (rt->rt6i_flags&RTF_LOCAL)
2340 rtm->rtm_type = RTN_LOCAL;
2341 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2342 rtm->rtm_type = RTN_LOCAL;
2343 else
2344 rtm->rtm_type = RTN_UNICAST;
2345 rtm->rtm_flags = 0;
2346 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2347 rtm->rtm_protocol = rt->rt6i_protocol;
2348 if (rt->rt6i_flags&RTF_DYNAMIC)
2349 rtm->rtm_protocol = RTPROT_REDIRECT;
2350 else if (rt->rt6i_flags & RTF_ADDRCONF)
2351 rtm->rtm_protocol = RTPROT_KERNEL;
2352 else if (rt->rt6i_flags&RTF_DEFAULT)
2353 rtm->rtm_protocol = RTPROT_RA;
2354
2355 if (rt->rt6i_flags&RTF_CACHE)
2356 rtm->rtm_flags |= RTM_F_CLONED;
2357
2358 if (dst) {
2359 NLA_PUT(skb, RTA_DST, 16, dst);
2360 rtm->rtm_dst_len = 128;
2361 } else if (rtm->rtm_dst_len)
2362 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2363 #ifdef CONFIG_IPV6_SUBTREES
2364 if (src) {
2365 NLA_PUT(skb, RTA_SRC, 16, src);
2366 rtm->rtm_src_len = 128;
2367 } else if (rtm->rtm_src_len)
2368 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2369 #endif
2370 if (iif) {
2371 #ifdef CONFIG_IPV6_MROUTE
2372 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2373 int err = ip6mr_get_route(net, skb, rtm, nowait);
2374 if (err <= 0) {
2375 if (!nowait) {
2376 if (err == 0)
2377 return 0;
2378 goto nla_put_failure;
2379 } else {
2380 if (err == -EMSGSIZE)
2381 goto nla_put_failure;
2382 }
2383 }
2384 } else
2385 #endif
2386 NLA_PUT_U32(skb, RTA_IIF, iif);
2387 } else if (dst) {
2388 struct in6_addr saddr_buf;
2389 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2390 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2391 }
2392
2393 if (rt->rt6i_prefsrc.plen) {
2394 struct in6_addr saddr_buf;
2395 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2396 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2397 }
2398
2399 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2400 goto nla_put_failure;
2401
2402 if (rt->dst.neighbour)
2403 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2404
2405 if (rt->dst.dev)
2406 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2407
2408 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2409
2410 if (!(rt->rt6i_flags & RTF_EXPIRES))
2411 expires = 0;
2412 else if (rt->rt6i_expires - jiffies < INT_MAX)
2413 expires = rt->rt6i_expires - jiffies;
2414 else
2415 expires = INT_MAX;
2416
2417 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2418 expires, rt->dst.error) < 0)
2419 goto nla_put_failure;
2420
2421 return nlmsg_end(skb, nlh);
2422
2423 nla_put_failure:
2424 nlmsg_cancel(skb, nlh);
2425 return -EMSGSIZE;
2426 }
2427
2428 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2429 {
2430 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2431 int prefix;
2432
2433 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2434 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2435 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2436 } else
2437 prefix = 0;
2438
2439 return rt6_fill_node(arg->net,
2440 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2441 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2442 prefix, 0, NLM_F_MULTI);
2443 }
2444
2445 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2446 {
2447 struct net *net = sock_net(in_skb->sk);
2448 struct nlattr *tb[RTA_MAX+1];
2449 struct rt6_info *rt;
2450 struct sk_buff *skb;
2451 struct rtmsg *rtm;
2452 struct flowi6 fl6;
2453 int err, iif = 0;
2454
2455 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2456 if (err < 0)
2457 goto errout;
2458
2459 err = -EINVAL;
2460 memset(&fl6, 0, sizeof(fl6));
2461
2462 if (tb[RTA_SRC]) {
2463 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2464 goto errout;
2465
2466 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2467 }
2468
2469 if (tb[RTA_DST]) {
2470 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2471 goto errout;
2472
2473 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2474 }
2475
2476 if (tb[RTA_IIF])
2477 iif = nla_get_u32(tb[RTA_IIF]);
2478
2479 if (tb[RTA_OIF])
2480 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2481
2482 if (iif) {
2483 struct net_device *dev;
2484 dev = __dev_get_by_index(net, iif);
2485 if (!dev) {
2486 err = -ENODEV;
2487 goto errout;
2488 }
2489 }
2490
2491 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2492 if (skb == NULL) {
2493 err = -ENOBUFS;
2494 goto errout;
2495 }
2496
2497 /* Reserve room for dummy headers, this skb can pass
2498 through good chunk of routing engine.
2499 */
2500 skb_reset_mac_header(skb);
2501 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2502
2503 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2504 skb_dst_set(skb, &rt->dst);
2505
2506 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2507 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2508 nlh->nlmsg_seq, 0, 0, 0);
2509 if (err < 0) {
2510 kfree_skb(skb);
2511 goto errout;
2512 }
2513
2514 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2515 errout:
2516 return err;
2517 }
2518
2519 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2520 {
2521 struct sk_buff *skb;
2522 struct net *net = info->nl_net;
2523 u32 seq;
2524 int err;
2525
2526 err = -ENOBUFS;
2527 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2528
2529 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2530 if (skb == NULL)
2531 goto errout;
2532
2533 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2534 event, info->pid, seq, 0, 0, 0);
2535 if (err < 0) {
2536 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2537 WARN_ON(err == -EMSGSIZE);
2538 kfree_skb(skb);
2539 goto errout;
2540 }
2541 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2542 info->nlh, gfp_any());
2543 return;
2544 errout:
2545 if (err < 0)
2546 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2547 }
2548
2549 static int ip6_route_dev_notify(struct notifier_block *this,
2550 unsigned long event, void *data)
2551 {
2552 struct net_device *dev = (struct net_device *)data;
2553 struct net *net = dev_net(dev);
2554
2555 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2556 net->ipv6.ip6_null_entry->dst.dev = dev;
2557 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2558 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2559 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2560 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2561 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2562 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2563 #endif
2564 }
2565
2566 return NOTIFY_OK;
2567 }
2568
2569 /*
2570 * /proc
2571 */
2572
2573 #ifdef CONFIG_PROC_FS
2574
2575 struct rt6_proc_arg
2576 {
2577 char *buffer;
2578 int offset;
2579 int length;
2580 int skip;
2581 int len;
2582 };
2583
2584 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2585 {
2586 struct seq_file *m = p_arg;
2587
2588 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2589
2590 #ifdef CONFIG_IPV6_SUBTREES
2591 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2592 #else
2593 seq_puts(m, "00000000000000000000000000000000 00 ");
2594 #endif
2595
2596 if (rt->rt6i_nexthop) {
2597 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2598 } else {
2599 seq_puts(m, "00000000000000000000000000000000");
2600 }
2601 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2602 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2603 rt->dst.__use, rt->rt6i_flags,
2604 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2605 return 0;
2606 }
2607
2608 static int ipv6_route_show(struct seq_file *m, void *v)
2609 {
2610 struct net *net = (struct net *)m->private;
2611 fib6_clean_all(net, rt6_info_route, 0, m);
2612 return 0;
2613 }
2614
2615 static int ipv6_route_open(struct inode *inode, struct file *file)
2616 {
2617 return single_open_net(inode, file, ipv6_route_show);
2618 }
2619
2620 static const struct file_operations ipv6_route_proc_fops = {
2621 .owner = THIS_MODULE,
2622 .open = ipv6_route_open,
2623 .read = seq_read,
2624 .llseek = seq_lseek,
2625 .release = single_release_net,
2626 };
2627
2628 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2629 {
2630 struct net *net = (struct net *)seq->private;
2631 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2632 net->ipv6.rt6_stats->fib_nodes,
2633 net->ipv6.rt6_stats->fib_route_nodes,
2634 net->ipv6.rt6_stats->fib_rt_alloc,
2635 net->ipv6.rt6_stats->fib_rt_entries,
2636 net->ipv6.rt6_stats->fib_rt_cache,
2637 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2638 net->ipv6.rt6_stats->fib_discarded_routes);
2639
2640 return 0;
2641 }
2642
2643 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2644 {
2645 return single_open_net(inode, file, rt6_stats_seq_show);
2646 }
2647
2648 static const struct file_operations rt6_stats_seq_fops = {
2649 .owner = THIS_MODULE,
2650 .open = rt6_stats_seq_open,
2651 .read = seq_read,
2652 .llseek = seq_lseek,
2653 .release = single_release_net,
2654 };
2655 #endif /* CONFIG_PROC_FS */
2656
2657 #ifdef CONFIG_SYSCTL
2658
2659 static
2660 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2661 void __user *buffer, size_t *lenp, loff_t *ppos)
2662 {
2663 struct net *net;
2664 int delay;
2665 if (!write)
2666 return -EINVAL;
2667
2668 net = (struct net *)ctl->extra1;
2669 delay = net->ipv6.sysctl.flush_delay;
2670 proc_dointvec(ctl, write, buffer, lenp, ppos);
2671 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2672 return 0;
2673 }
2674
2675 ctl_table ipv6_route_table_template[] = {
2676 {
2677 .procname = "flush",
2678 .data = &init_net.ipv6.sysctl.flush_delay,
2679 .maxlen = sizeof(int),
2680 .mode = 0200,
2681 .proc_handler = ipv6_sysctl_rtcache_flush
2682 },
2683 {
2684 .procname = "gc_thresh",
2685 .data = &ip6_dst_ops_template.gc_thresh,
2686 .maxlen = sizeof(int),
2687 .mode = 0644,
2688 .proc_handler = proc_dointvec,
2689 },
2690 {
2691 .procname = "max_size",
2692 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2693 .maxlen = sizeof(int),
2694 .mode = 0644,
2695 .proc_handler = proc_dointvec,
2696 },
2697 {
2698 .procname = "gc_min_interval",
2699 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2700 .maxlen = sizeof(int),
2701 .mode = 0644,
2702 .proc_handler = proc_dointvec_jiffies,
2703 },
2704 {
2705 .procname = "gc_timeout",
2706 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2707 .maxlen = sizeof(int),
2708 .mode = 0644,
2709 .proc_handler = proc_dointvec_jiffies,
2710 },
2711 {
2712 .procname = "gc_interval",
2713 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2714 .maxlen = sizeof(int),
2715 .mode = 0644,
2716 .proc_handler = proc_dointvec_jiffies,
2717 },
2718 {
2719 .procname = "gc_elasticity",
2720 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2721 .maxlen = sizeof(int),
2722 .mode = 0644,
2723 .proc_handler = proc_dointvec,
2724 },
2725 {
2726 .procname = "mtu_expires",
2727 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2728 .maxlen = sizeof(int),
2729 .mode = 0644,
2730 .proc_handler = proc_dointvec_jiffies,
2731 },
2732 {
2733 .procname = "min_adv_mss",
2734 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2735 .maxlen = sizeof(int),
2736 .mode = 0644,
2737 .proc_handler = proc_dointvec,
2738 },
2739 {
2740 .procname = "gc_min_interval_ms",
2741 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2742 .maxlen = sizeof(int),
2743 .mode = 0644,
2744 .proc_handler = proc_dointvec_ms_jiffies,
2745 },
2746 { }
2747 };
2748
2749 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2750 {
2751 struct ctl_table *table;
2752
2753 table = kmemdup(ipv6_route_table_template,
2754 sizeof(ipv6_route_table_template),
2755 GFP_KERNEL);
2756
2757 if (table) {
2758 table[0].data = &net->ipv6.sysctl.flush_delay;
2759 table[0].extra1 = net;
2760 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2761 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2762 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2763 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2764 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2765 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2766 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2767 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2768 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2769 }
2770
2771 return table;
2772 }
2773 #endif
2774
2775 static int __net_init ip6_route_net_init(struct net *net)
2776 {
2777 int ret = -ENOMEM;
2778
2779 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2780 sizeof(net->ipv6.ip6_dst_ops));
2781
2782 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2783 goto out_ip6_dst_ops;
2784
2785 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2786 sizeof(*net->ipv6.ip6_null_entry),
2787 GFP_KERNEL);
2788 if (!net->ipv6.ip6_null_entry)
2789 goto out_ip6_dst_entries;
2790 net->ipv6.ip6_null_entry->dst.path =
2791 (struct dst_entry *)net->ipv6.ip6_null_entry;
2792 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2793 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2794 ip6_template_metrics, true);
2795
2796 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2797 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2798 sizeof(*net->ipv6.ip6_prohibit_entry),
2799 GFP_KERNEL);
2800 if (!net->ipv6.ip6_prohibit_entry)
2801 goto out_ip6_null_entry;
2802 net->ipv6.ip6_prohibit_entry->dst.path =
2803 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2804 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2805 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2806 ip6_template_metrics, true);
2807
2808 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2809 sizeof(*net->ipv6.ip6_blk_hole_entry),
2810 GFP_KERNEL);
2811 if (!net->ipv6.ip6_blk_hole_entry)
2812 goto out_ip6_prohibit_entry;
2813 net->ipv6.ip6_blk_hole_entry->dst.path =
2814 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2815 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2816 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2817 ip6_template_metrics, true);
2818 #endif
2819
2820 net->ipv6.sysctl.flush_delay = 0;
2821 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2822 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2823 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2824 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2825 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2826 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2827 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2828
2829 #ifdef CONFIG_PROC_FS
2830 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2831 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2832 #endif
2833 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2834
2835 ret = 0;
2836 out:
2837 return ret;
2838
2839 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2840 out_ip6_prohibit_entry:
2841 kfree(net->ipv6.ip6_prohibit_entry);
2842 out_ip6_null_entry:
2843 kfree(net->ipv6.ip6_null_entry);
2844 #endif
2845 out_ip6_dst_entries:
2846 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2847 out_ip6_dst_ops:
2848 goto out;
2849 }
2850
2851 static void __net_exit ip6_route_net_exit(struct net *net)
2852 {
2853 #ifdef CONFIG_PROC_FS
2854 proc_net_remove(net, "ipv6_route");
2855 proc_net_remove(net, "rt6_stats");
2856 #endif
2857 kfree(net->ipv6.ip6_null_entry);
2858 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2859 kfree(net->ipv6.ip6_prohibit_entry);
2860 kfree(net->ipv6.ip6_blk_hole_entry);
2861 #endif
2862 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2863 }
2864
2865 static struct pernet_operations ip6_route_net_ops = {
2866 .init = ip6_route_net_init,
2867 .exit = ip6_route_net_exit,
2868 };
2869
2870 static struct notifier_block ip6_route_dev_notifier = {
2871 .notifier_call = ip6_route_dev_notify,
2872 .priority = 0,
2873 };
2874
2875 int __init ip6_route_init(void)
2876 {
2877 int ret;
2878
2879 ret = -ENOMEM;
2880 ip6_dst_ops_template.kmem_cachep =
2881 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2882 SLAB_HWCACHE_ALIGN, NULL);
2883 if (!ip6_dst_ops_template.kmem_cachep)
2884 goto out;
2885
2886 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2887 if (ret)
2888 goto out_kmem_cache;
2889
2890 ret = register_pernet_subsys(&ip6_route_net_ops);
2891 if (ret)
2892 goto out_dst_entries;
2893
2894 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2895
2896 /* Registering of the loopback is done before this portion of code,
2897 * the loopback reference in rt6_info will not be taken, do it
2898 * manually for init_net */
2899 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2900 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2901 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2903 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2904 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2905 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2906 #endif
2907 ret = fib6_init();
2908 if (ret)
2909 goto out_register_subsys;
2910
2911 ret = xfrm6_init();
2912 if (ret)
2913 goto out_fib6_init;
2914
2915 ret = fib6_rules_init();
2916 if (ret)
2917 goto xfrm6_init;
2918
2919 ret = -ENOBUFS;
2920 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2921 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2922 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2923 goto fib6_rules_init;
2924
2925 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2926 if (ret)
2927 goto fib6_rules_init;
2928
2929 out:
2930 return ret;
2931
2932 fib6_rules_init:
2933 fib6_rules_cleanup();
2934 xfrm6_init:
2935 xfrm6_fini();
2936 out_fib6_init:
2937 fib6_gc_cleanup();
2938 out_register_subsys:
2939 unregister_pernet_subsys(&ip6_route_net_ops);
2940 out_dst_entries:
2941 dst_entries_destroy(&ip6_dst_blackhole_ops);
2942 out_kmem_cache:
2943 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2944 goto out;
2945 }
2946
2947 void ip6_route_cleanup(void)
2948 {
2949 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2950 fib6_rules_cleanup();
2951 xfrm6_fini();
2952 fib6_gc_cleanup();
2953 unregister_pernet_subsys(&ip6_route_net_ops);
2954 dst_entries_destroy(&ip6_dst_blackhole_ops);
2955 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2956 }
This page took 0.095632 seconds and 5 git commands to generate.