inet: Kill FLOWI_FLAG_PRECOW_METRICS.
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 peer = rt6_get_peer_create(rt);
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
124 struct sk_buff *skb,
125 const void *daddr)
126 {
127 struct in6_addr *p = &rt->rt6i_gateway;
128
129 if (!ipv6_addr_any(p))
130 return (const void *) p;
131 else if (skb)
132 return &ipv6_hdr(skb)->daddr;
133 return daddr;
134 }
135
136 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
137 struct sk_buff *skb,
138 const void *daddr)
139 {
140 struct rt6_info *rt = (struct rt6_info *) dst;
141 struct neighbour *n;
142
143 daddr = choose_neigh_daddr(rt, skb, daddr);
144 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
145 if (n)
146 return n;
147 return neigh_create(&nd_tbl, daddr, dst->dev);
148 }
149
150 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
151 {
152 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
153 if (!n) {
154 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
155 if (IS_ERR(n))
156 return PTR_ERR(n);
157 }
158 rt->n = n;
159
160 return 0;
161 }
162
163 static struct dst_ops ip6_dst_ops_template = {
164 .family = AF_INET6,
165 .protocol = cpu_to_be16(ETH_P_IPV6),
166 .gc = ip6_dst_gc,
167 .gc_thresh = 1024,
168 .check = ip6_dst_check,
169 .default_advmss = ip6_default_advmss,
170 .mtu = ip6_mtu,
171 .cow_metrics = ipv6_cow_metrics,
172 .destroy = ip6_dst_destroy,
173 .ifdown = ip6_dst_ifdown,
174 .negative_advice = ip6_negative_advice,
175 .link_failure = ip6_link_failure,
176 .update_pmtu = ip6_rt_update_pmtu,
177 .local_out = __ip6_local_out,
178 .neigh_lookup = ip6_neigh_lookup,
179 };
180
181 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
182 {
183 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185 return mtu ? : dst->dev->mtu;
186 }
187
188 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
189 {
190 }
191
192 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
193 unsigned long old)
194 {
195 return NULL;
196 }
197
198 static struct dst_ops ip6_dst_blackhole_ops = {
199 .family = AF_INET6,
200 .protocol = cpu_to_be16(ETH_P_IPV6),
201 .destroy = ip6_dst_destroy,
202 .check = ip6_dst_check,
203 .mtu = ip6_blackhole_mtu,
204 .default_advmss = ip6_default_advmss,
205 .update_pmtu = ip6_rt_blackhole_update_pmtu,
206 .cow_metrics = ip6_rt_blackhole_cow_metrics,
207 .neigh_lookup = ip6_neigh_lookup,
208 };
209
210 static const u32 ip6_template_metrics[RTAX_MAX] = {
211 [RTAX_HOPLIMIT - 1] = 255,
212 };
213
214 static struct rt6_info ip6_null_entry_template = {
215 .dst = {
216 .__refcnt = ATOMIC_INIT(1),
217 .__use = 1,
218 .obsolete = -1,
219 .error = -ENETUNREACH,
220 .input = ip6_pkt_discard,
221 .output = ip6_pkt_discard_out,
222 },
223 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
224 .rt6i_protocol = RTPROT_KERNEL,
225 .rt6i_metric = ~(u32) 0,
226 .rt6i_ref = ATOMIC_INIT(1),
227 };
228
229 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
230
231 static int ip6_pkt_prohibit(struct sk_buff *skb);
232 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
233
234 static struct rt6_info ip6_prohibit_entry_template = {
235 .dst = {
236 .__refcnt = ATOMIC_INIT(1),
237 .__use = 1,
238 .obsolete = -1,
239 .error = -EACCES,
240 .input = ip6_pkt_prohibit,
241 .output = ip6_pkt_prohibit_out,
242 },
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
244 .rt6i_protocol = RTPROT_KERNEL,
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
247 };
248
249 static struct rt6_info ip6_blk_hole_entry_template = {
250 .dst = {
251 .__refcnt = ATOMIC_INIT(1),
252 .__use = 1,
253 .obsolete = -1,
254 .error = -EINVAL,
255 .input = dst_discard,
256 .output = dst_discard,
257 },
258 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
259 .rt6i_protocol = RTPROT_KERNEL,
260 .rt6i_metric = ~(u32) 0,
261 .rt6i_ref = ATOMIC_INIT(1),
262 };
263
264 #endif
265
266 /* allocate dst with ip6_dst_ops */
267 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
268 struct net_device *dev,
269 int flags,
270 struct fib6_table *table)
271 {
272 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
273 0, 0, flags);
274
275 if (rt) {
276 memset(&rt->n, 0,
277 sizeof(*rt) - sizeof(struct dst_entry));
278 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279 }
280 return rt;
281 }
282
283 static void ip6_dst_destroy(struct dst_entry *dst)
284 {
285 struct rt6_info *rt = (struct rt6_info *)dst;
286 struct inet6_dev *idev = rt->rt6i_idev;
287
288 if (rt->n)
289 neigh_release(rt->n);
290
291 if (!(rt->dst.flags & DST_HOST))
292 dst_destroy_metrics_generic(dst);
293
294 if (idev) {
295 rt->rt6i_idev = NULL;
296 in6_dev_put(idev);
297 }
298
299 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
300 dst_release(dst->from);
301
302 if (rt6_has_peer(rt)) {
303 struct inet_peer *peer = rt6_peer_ptr(rt);
304 inet_putpeer(peer);
305 }
306 }
307
308 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
309
310 static u32 rt6_peer_genid(void)
311 {
312 return atomic_read(&__rt6_peer_genid);
313 }
314
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317 struct inet_peer_base *base;
318 struct inet_peer *peer;
319
320 base = inetpeer_base_ptr(rt->_rt6i_peer);
321 if (!base)
322 return;
323
324 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325 if (peer) {
326 if (!rt6_set_peer(rt, peer))
327 inet_putpeer(peer);
328 else
329 rt->rt6i_peer_genid = rt6_peer_genid();
330 }
331 }
332
333 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
334 int how)
335 {
336 struct rt6_info *rt = (struct rt6_info *)dst;
337 struct inet6_dev *idev = rt->rt6i_idev;
338 struct net_device *loopback_dev =
339 dev_net(dev)->loopback_dev;
340
341 if (dev != loopback_dev) {
342 if (idev && idev->dev == dev) {
343 struct inet6_dev *loopback_idev =
344 in6_dev_get(loopback_dev);
345 if (loopback_idev) {
346 rt->rt6i_idev = loopback_idev;
347 in6_dev_put(idev);
348 }
349 }
350 if (rt->n && rt->n->dev == dev) {
351 rt->n->dev = loopback_dev;
352 dev_hold(loopback_dev);
353 dev_put(dev);
354 }
355 }
356 }
357
358 static bool rt6_check_expired(const struct rt6_info *rt)
359 {
360 struct rt6_info *ort = NULL;
361
362 if (rt->rt6i_flags & RTF_EXPIRES) {
363 if (time_after(jiffies, rt->dst.expires))
364 return true;
365 } else if (rt->dst.from) {
366 ort = (struct rt6_info *) rt->dst.from;
367 return (ort->rt6i_flags & RTF_EXPIRES) &&
368 time_after(jiffies, ort->dst.expires);
369 }
370 return false;
371 }
372
373 static bool rt6_need_strict(const struct in6_addr *daddr)
374 {
375 return ipv6_addr_type(daddr) &
376 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
377 }
378
379 /*
380 * Route lookup. Any table->tb6_lock is implied.
381 */
382
383 static inline struct rt6_info *rt6_device_match(struct net *net,
384 struct rt6_info *rt,
385 const struct in6_addr *saddr,
386 int oif,
387 int flags)
388 {
389 struct rt6_info *local = NULL;
390 struct rt6_info *sprt;
391
392 if (!oif && ipv6_addr_any(saddr))
393 goto out;
394
395 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
396 struct net_device *dev = sprt->dst.dev;
397
398 if (oif) {
399 if (dev->ifindex == oif)
400 return sprt;
401 if (dev->flags & IFF_LOOPBACK) {
402 if (!sprt->rt6i_idev ||
403 sprt->rt6i_idev->dev->ifindex != oif) {
404 if (flags & RT6_LOOKUP_F_IFACE && oif)
405 continue;
406 if (local && (!oif ||
407 local->rt6i_idev->dev->ifindex == oif))
408 continue;
409 }
410 local = sprt;
411 }
412 } else {
413 if (ipv6_chk_addr(net, saddr, dev,
414 flags & RT6_LOOKUP_F_IFACE))
415 return sprt;
416 }
417 }
418
419 if (oif) {
420 if (local)
421 return local;
422
423 if (flags & RT6_LOOKUP_F_IFACE)
424 return net->ipv6.ip6_null_entry;
425 }
426 out:
427 return rt;
428 }
429
430 #ifdef CONFIG_IPV6_ROUTER_PREF
431 static void rt6_probe(struct rt6_info *rt)
432 {
433 struct neighbour *neigh;
434 /*
435 * Okay, this does not seem to be appropriate
436 * for now, however, we need to check if it
437 * is really so; aka Router Reachability Probing.
438 *
439 * Router Reachability Probe MUST be rate-limited
440 * to no more than one per minute.
441 */
442 rcu_read_lock();
443 neigh = rt ? rt->n : NULL;
444 if (!neigh || (neigh->nud_state & NUD_VALID))
445 goto out;
446 read_lock_bh(&neigh->lock);
447 if (!(neigh->nud_state & NUD_VALID) &&
448 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
449 struct in6_addr mcaddr;
450 struct in6_addr *target;
451
452 neigh->updated = jiffies;
453 read_unlock_bh(&neigh->lock);
454
455 target = (struct in6_addr *)&neigh->primary_key;
456 addrconf_addr_solict_mult(target, &mcaddr);
457 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
458 } else {
459 read_unlock_bh(&neigh->lock);
460 }
461 out:
462 rcu_read_unlock();
463 }
464 #else
465 static inline void rt6_probe(struct rt6_info *rt)
466 {
467 }
468 #endif
469
470 /*
471 * Default Router Selection (RFC 2461 6.3.6)
472 */
473 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
474 {
475 struct net_device *dev = rt->dst.dev;
476 if (!oif || dev->ifindex == oif)
477 return 2;
478 if ((dev->flags & IFF_LOOPBACK) &&
479 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
480 return 1;
481 return 0;
482 }
483
484 static inline int rt6_check_neigh(struct rt6_info *rt)
485 {
486 struct neighbour *neigh;
487 int m;
488
489 rcu_read_lock();
490 neigh = rt->n;
491 if (rt->rt6i_flags & RTF_NONEXTHOP ||
492 !(rt->rt6i_flags & RTF_GATEWAY))
493 m = 1;
494 else if (neigh) {
495 read_lock_bh(&neigh->lock);
496 if (neigh->nud_state & NUD_VALID)
497 m = 2;
498 #ifdef CONFIG_IPV6_ROUTER_PREF
499 else if (neigh->nud_state & NUD_FAILED)
500 m = 0;
501 #endif
502 else
503 m = 1;
504 read_unlock_bh(&neigh->lock);
505 } else
506 m = 0;
507 rcu_read_unlock();
508 return m;
509 }
510
511 static int rt6_score_route(struct rt6_info *rt, int oif,
512 int strict)
513 {
514 int m, n;
515
516 m = rt6_check_dev(rt, oif);
517 if (!m && (strict & RT6_LOOKUP_F_IFACE))
518 return -1;
519 #ifdef CONFIG_IPV6_ROUTER_PREF
520 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
521 #endif
522 n = rt6_check_neigh(rt);
523 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
524 return -1;
525 return m;
526 }
527
528 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
529 int *mpri, struct rt6_info *match)
530 {
531 int m;
532
533 if (rt6_check_expired(rt))
534 goto out;
535
536 m = rt6_score_route(rt, oif, strict);
537 if (m < 0)
538 goto out;
539
540 if (m > *mpri) {
541 if (strict & RT6_LOOKUP_F_REACHABLE)
542 rt6_probe(match);
543 *mpri = m;
544 match = rt;
545 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
546 rt6_probe(rt);
547 }
548
549 out:
550 return match;
551 }
552
553 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
554 struct rt6_info *rr_head,
555 u32 metric, int oif, int strict)
556 {
557 struct rt6_info *rt, *match;
558 int mpri = -1;
559
560 match = NULL;
561 for (rt = rr_head; rt && rt->rt6i_metric == metric;
562 rt = rt->dst.rt6_next)
563 match = find_match(rt, oif, strict, &mpri, match);
564 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
565 rt = rt->dst.rt6_next)
566 match = find_match(rt, oif, strict, &mpri, match);
567
568 return match;
569 }
570
571 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
572 {
573 struct rt6_info *match, *rt0;
574 struct net *net;
575
576 rt0 = fn->rr_ptr;
577 if (!rt0)
578 fn->rr_ptr = rt0 = fn->leaf;
579
580 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
581
582 if (!match &&
583 (strict & RT6_LOOKUP_F_REACHABLE)) {
584 struct rt6_info *next = rt0->dst.rt6_next;
585
586 /* no entries matched; do round-robin */
587 if (!next || next->rt6i_metric != rt0->rt6i_metric)
588 next = fn->leaf;
589
590 if (next != rt0)
591 fn->rr_ptr = next;
592 }
593
594 net = dev_net(rt0->dst.dev);
595 return match ? match : net->ipv6.ip6_null_entry;
596 }
597
598 #ifdef CONFIG_IPV6_ROUTE_INFO
599 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
600 const struct in6_addr *gwaddr)
601 {
602 struct net *net = dev_net(dev);
603 struct route_info *rinfo = (struct route_info *) opt;
604 struct in6_addr prefix_buf, *prefix;
605 unsigned int pref;
606 unsigned long lifetime;
607 struct rt6_info *rt;
608
609 if (len < sizeof(struct route_info)) {
610 return -EINVAL;
611 }
612
613 /* Sanity check for prefix_len and length */
614 if (rinfo->length > 3) {
615 return -EINVAL;
616 } else if (rinfo->prefix_len > 128) {
617 return -EINVAL;
618 } else if (rinfo->prefix_len > 64) {
619 if (rinfo->length < 2) {
620 return -EINVAL;
621 }
622 } else if (rinfo->prefix_len > 0) {
623 if (rinfo->length < 1) {
624 return -EINVAL;
625 }
626 }
627
628 pref = rinfo->route_pref;
629 if (pref == ICMPV6_ROUTER_PREF_INVALID)
630 return -EINVAL;
631
632 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
633
634 if (rinfo->length == 3)
635 prefix = (struct in6_addr *)rinfo->prefix;
636 else {
637 /* this function is safe */
638 ipv6_addr_prefix(&prefix_buf,
639 (struct in6_addr *)rinfo->prefix,
640 rinfo->prefix_len);
641 prefix = &prefix_buf;
642 }
643
644 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
645 dev->ifindex);
646
647 if (rt && !lifetime) {
648 ip6_del_rt(rt);
649 rt = NULL;
650 }
651
652 if (!rt && lifetime)
653 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
654 pref);
655 else if (rt)
656 rt->rt6i_flags = RTF_ROUTEINFO |
657 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
658
659 if (rt) {
660 if (!addrconf_finite_timeout(lifetime))
661 rt6_clean_expires(rt);
662 else
663 rt6_set_expires(rt, jiffies + HZ * lifetime);
664
665 dst_release(&rt->dst);
666 }
667 return 0;
668 }
669 #endif
670
671 #define BACKTRACK(__net, saddr) \
672 do { \
673 if (rt == __net->ipv6.ip6_null_entry) { \
674 struct fib6_node *pn; \
675 while (1) { \
676 if (fn->fn_flags & RTN_TL_ROOT) \
677 goto out; \
678 pn = fn->parent; \
679 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
680 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
681 else \
682 fn = pn; \
683 if (fn->fn_flags & RTN_RTINFO) \
684 goto restart; \
685 } \
686 } \
687 } while (0)
688
689 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
690 struct fib6_table *table,
691 struct flowi6 *fl6, int flags)
692 {
693 struct fib6_node *fn;
694 struct rt6_info *rt;
695
696 read_lock_bh(&table->tb6_lock);
697 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
698 restart:
699 rt = fn->leaf;
700 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
701 BACKTRACK(net, &fl6->saddr);
702 out:
703 dst_use(&rt->dst, jiffies);
704 read_unlock_bh(&table->tb6_lock);
705 return rt;
706
707 }
708
709 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
710 int flags)
711 {
712 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
713 }
714 EXPORT_SYMBOL_GPL(ip6_route_lookup);
715
716 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
717 const struct in6_addr *saddr, int oif, int strict)
718 {
719 struct flowi6 fl6 = {
720 .flowi6_oif = oif,
721 .daddr = *daddr,
722 };
723 struct dst_entry *dst;
724 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
725
726 if (saddr) {
727 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
728 flags |= RT6_LOOKUP_F_HAS_SADDR;
729 }
730
731 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
732 if (dst->error == 0)
733 return (struct rt6_info *) dst;
734
735 dst_release(dst);
736
737 return NULL;
738 }
739
740 EXPORT_SYMBOL(rt6_lookup);
741
742 /* ip6_ins_rt is called with FREE table->tb6_lock.
743 It takes new route entry, the addition fails by any reason the
744 route is freed. In any case, if caller does not hold it, it may
745 be destroyed.
746 */
747
748 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
749 {
750 int err;
751 struct fib6_table *table;
752
753 table = rt->rt6i_table;
754 write_lock_bh(&table->tb6_lock);
755 err = fib6_add(&table->tb6_root, rt, info);
756 write_unlock_bh(&table->tb6_lock);
757
758 return err;
759 }
760
761 int ip6_ins_rt(struct rt6_info *rt)
762 {
763 struct nl_info info = {
764 .nl_net = dev_net(rt->dst.dev),
765 };
766 return __ip6_ins_rt(rt, &info);
767 }
768
769 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
770 const struct in6_addr *daddr,
771 const struct in6_addr *saddr)
772 {
773 struct rt6_info *rt;
774
775 /*
776 * Clone the route.
777 */
778
779 rt = ip6_rt_copy(ort, daddr);
780
781 if (rt) {
782 int attempts = !in_softirq();
783
784 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
785 if (ort->rt6i_dst.plen != 128 &&
786 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
787 rt->rt6i_flags |= RTF_ANYCAST;
788 rt->rt6i_gateway = *daddr;
789 }
790
791 rt->rt6i_flags |= RTF_CACHE;
792
793 #ifdef CONFIG_IPV6_SUBTREES
794 if (rt->rt6i_src.plen && saddr) {
795 rt->rt6i_src.addr = *saddr;
796 rt->rt6i_src.plen = 128;
797 }
798 #endif
799
800 retry:
801 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
802 struct net *net = dev_net(rt->dst.dev);
803 int saved_rt_min_interval =
804 net->ipv6.sysctl.ip6_rt_gc_min_interval;
805 int saved_rt_elasticity =
806 net->ipv6.sysctl.ip6_rt_gc_elasticity;
807
808 if (attempts-- > 0) {
809 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
810 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
811
812 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
813
814 net->ipv6.sysctl.ip6_rt_gc_elasticity =
815 saved_rt_elasticity;
816 net->ipv6.sysctl.ip6_rt_gc_min_interval =
817 saved_rt_min_interval;
818 goto retry;
819 }
820
821 net_warn_ratelimited("Neighbour table overflow\n");
822 dst_free(&rt->dst);
823 return NULL;
824 }
825 }
826
827 return rt;
828 }
829
830 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
831 const struct in6_addr *daddr)
832 {
833 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
834
835 if (rt) {
836 rt->rt6i_flags |= RTF_CACHE;
837 rt->n = neigh_clone(ort->n);
838 }
839 return rt;
840 }
841
842 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
843 struct flowi6 *fl6, int flags)
844 {
845 struct fib6_node *fn;
846 struct rt6_info *rt, *nrt;
847 int strict = 0;
848 int attempts = 3;
849 int err;
850 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
851
852 strict |= flags & RT6_LOOKUP_F_IFACE;
853
854 relookup:
855 read_lock_bh(&table->tb6_lock);
856
857 restart_2:
858 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
859
860 restart:
861 rt = rt6_select(fn, oif, strict | reachable);
862
863 BACKTRACK(net, &fl6->saddr);
864 if (rt == net->ipv6.ip6_null_entry ||
865 rt->rt6i_flags & RTF_CACHE)
866 goto out;
867
868 dst_hold(&rt->dst);
869 read_unlock_bh(&table->tb6_lock);
870
871 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
872 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
873 else if (!(rt->dst.flags & DST_HOST))
874 nrt = rt6_alloc_clone(rt, &fl6->daddr);
875 else
876 goto out2;
877
878 dst_release(&rt->dst);
879 rt = nrt ? : net->ipv6.ip6_null_entry;
880
881 dst_hold(&rt->dst);
882 if (nrt) {
883 err = ip6_ins_rt(nrt);
884 if (!err)
885 goto out2;
886 }
887
888 if (--attempts <= 0)
889 goto out2;
890
891 /*
892 * Race condition! In the gap, when table->tb6_lock was
893 * released someone could insert this route. Relookup.
894 */
895 dst_release(&rt->dst);
896 goto relookup;
897
898 out:
899 if (reachable) {
900 reachable = 0;
901 goto restart_2;
902 }
903 dst_hold(&rt->dst);
904 read_unlock_bh(&table->tb6_lock);
905 out2:
906 rt->dst.lastuse = jiffies;
907 rt->dst.__use++;
908
909 return rt;
910 }
911
912 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
913 struct flowi6 *fl6, int flags)
914 {
915 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
916 }
917
918 static struct dst_entry *ip6_route_input_lookup(struct net *net,
919 struct net_device *dev,
920 struct flowi6 *fl6, int flags)
921 {
922 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
923 flags |= RT6_LOOKUP_F_IFACE;
924
925 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
926 }
927
928 void ip6_route_input(struct sk_buff *skb)
929 {
930 const struct ipv6hdr *iph = ipv6_hdr(skb);
931 struct net *net = dev_net(skb->dev);
932 int flags = RT6_LOOKUP_F_HAS_SADDR;
933 struct flowi6 fl6 = {
934 .flowi6_iif = skb->dev->ifindex,
935 .daddr = iph->daddr,
936 .saddr = iph->saddr,
937 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
938 .flowi6_mark = skb->mark,
939 .flowi6_proto = iph->nexthdr,
940 };
941
942 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
943 }
944
945 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
946 struct flowi6 *fl6, int flags)
947 {
948 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
949 }
950
951 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
952 struct flowi6 *fl6)
953 {
954 int flags = 0;
955
956 fl6->flowi6_iif = net->loopback_dev->ifindex;
957
958 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
959 flags |= RT6_LOOKUP_F_IFACE;
960
961 if (!ipv6_addr_any(&fl6->saddr))
962 flags |= RT6_LOOKUP_F_HAS_SADDR;
963 else if (sk)
964 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
965
966 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
967 }
968
969 EXPORT_SYMBOL(ip6_route_output);
970
971 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
972 {
973 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
974 struct dst_entry *new = NULL;
975
976 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
977 if (rt) {
978 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
979 rt6_init_peer(rt, net->ipv6.peers);
980
981 new = &rt->dst;
982
983 new->__use = 1;
984 new->input = dst_discard;
985 new->output = dst_discard;
986
987 if (dst_metrics_read_only(&ort->dst))
988 new->_metrics = ort->dst._metrics;
989 else
990 dst_copy_metrics(new, &ort->dst);
991 rt->rt6i_idev = ort->rt6i_idev;
992 if (rt->rt6i_idev)
993 in6_dev_hold(rt->rt6i_idev);
994
995 rt->rt6i_gateway = ort->rt6i_gateway;
996 rt->rt6i_flags = ort->rt6i_flags;
997 rt6_clean_expires(rt);
998 rt->rt6i_metric = 0;
999
1000 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1001 #ifdef CONFIG_IPV6_SUBTREES
1002 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1003 #endif
1004
1005 dst_free(new);
1006 }
1007
1008 dst_release(dst_orig);
1009 return new ? new : ERR_PTR(-ENOMEM);
1010 }
1011
1012 /*
1013 * Destination cache support functions
1014 */
1015
1016 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1017 {
1018 struct rt6_info *rt;
1019
1020 rt = (struct rt6_info *) dst;
1021
1022 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1023 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1024 if (!rt6_has_peer(rt))
1025 rt6_bind_peer(rt, 0);
1026 rt->rt6i_peer_genid = rt6_peer_genid();
1027 }
1028 return dst;
1029 }
1030 return NULL;
1031 }
1032
1033 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1034 {
1035 struct rt6_info *rt = (struct rt6_info *) dst;
1036
1037 if (rt) {
1038 if (rt->rt6i_flags & RTF_CACHE) {
1039 if (rt6_check_expired(rt)) {
1040 ip6_del_rt(rt);
1041 dst = NULL;
1042 }
1043 } else {
1044 dst_release(dst);
1045 dst = NULL;
1046 }
1047 }
1048 return dst;
1049 }
1050
1051 static void ip6_link_failure(struct sk_buff *skb)
1052 {
1053 struct rt6_info *rt;
1054
1055 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1056
1057 rt = (struct rt6_info *) skb_dst(skb);
1058 if (rt) {
1059 if (rt->rt6i_flags & RTF_CACHE)
1060 rt6_update_expires(rt, 0);
1061 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1062 rt->rt6i_node->fn_sernum = -1;
1063 }
1064 }
1065
1066 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1067 {
1068 struct rt6_info *rt6 = (struct rt6_info*)dst;
1069
1070 dst_confirm(dst);
1071 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1072 struct net *net = dev_net(dst->dev);
1073
1074 rt6->rt6i_flags |= RTF_MODIFIED;
1075 if (mtu < IPV6_MIN_MTU) {
1076 u32 features = dst_metric(dst, RTAX_FEATURES);
1077 mtu = IPV6_MIN_MTU;
1078 features |= RTAX_FEATURE_ALLFRAG;
1079 dst_metric_set(dst, RTAX_FEATURES, features);
1080 }
1081 dst_metric_set(dst, RTAX_MTU, mtu);
1082 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1083 }
1084 }
1085
1086 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1087 int oif, u32 mark)
1088 {
1089 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1090 struct dst_entry *dst;
1091 struct flowi6 fl6;
1092
1093 memset(&fl6, 0, sizeof(fl6));
1094 fl6.flowi6_oif = oif;
1095 fl6.flowi6_mark = mark;
1096 fl6.flowi6_flags = 0;
1097 fl6.daddr = iph->daddr;
1098 fl6.saddr = iph->saddr;
1099 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1100
1101 dst = ip6_route_output(net, NULL, &fl6);
1102 if (!dst->error)
1103 ip6_rt_update_pmtu(dst, ntohl(mtu));
1104 dst_release(dst);
1105 }
1106 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1107
1108 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1109 {
1110 ip6_update_pmtu(skb, sock_net(sk), mtu,
1111 sk->sk_bound_dev_if, sk->sk_mark);
1112 }
1113 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1114
1115 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1116 {
1117 struct net_device *dev = dst->dev;
1118 unsigned int mtu = dst_mtu(dst);
1119 struct net *net = dev_net(dev);
1120
1121 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1122
1123 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1124 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1125
1126 /*
1127 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1128 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1129 * IPV6_MAXPLEN is also valid and means: "any MSS,
1130 * rely only on pmtu discovery"
1131 */
1132 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1133 mtu = IPV6_MAXPLEN;
1134 return mtu;
1135 }
1136
1137 static unsigned int ip6_mtu(const struct dst_entry *dst)
1138 {
1139 struct inet6_dev *idev;
1140 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1141
1142 if (mtu)
1143 return mtu;
1144
1145 mtu = IPV6_MIN_MTU;
1146
1147 rcu_read_lock();
1148 idev = __in6_dev_get(dst->dev);
1149 if (idev)
1150 mtu = idev->cnf.mtu6;
1151 rcu_read_unlock();
1152
1153 return mtu;
1154 }
1155
1156 static struct dst_entry *icmp6_dst_gc_list;
1157 static DEFINE_SPINLOCK(icmp6_dst_lock);
1158
1159 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1160 struct neighbour *neigh,
1161 struct flowi6 *fl6)
1162 {
1163 struct dst_entry *dst;
1164 struct rt6_info *rt;
1165 struct inet6_dev *idev = in6_dev_get(dev);
1166 struct net *net = dev_net(dev);
1167
1168 if (unlikely(!idev))
1169 return ERR_PTR(-ENODEV);
1170
1171 rt = ip6_dst_alloc(net, dev, 0, NULL);
1172 if (unlikely(!rt)) {
1173 in6_dev_put(idev);
1174 dst = ERR_PTR(-ENOMEM);
1175 goto out;
1176 }
1177
1178 if (neigh)
1179 neigh_hold(neigh);
1180 else {
1181 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1182 if (IS_ERR(neigh)) {
1183 in6_dev_put(idev);
1184 dst_free(&rt->dst);
1185 return ERR_CAST(neigh);
1186 }
1187 }
1188
1189 rt->dst.flags |= DST_HOST;
1190 rt->dst.output = ip6_output;
1191 rt->n = neigh;
1192 atomic_set(&rt->dst.__refcnt, 1);
1193 rt->rt6i_dst.addr = fl6->daddr;
1194 rt->rt6i_dst.plen = 128;
1195 rt->rt6i_idev = idev;
1196 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1197
1198 spin_lock_bh(&icmp6_dst_lock);
1199 rt->dst.next = icmp6_dst_gc_list;
1200 icmp6_dst_gc_list = &rt->dst;
1201 spin_unlock_bh(&icmp6_dst_lock);
1202
1203 fib6_force_start_gc(net);
1204
1205 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1206
1207 out:
1208 return dst;
1209 }
1210
1211 int icmp6_dst_gc(void)
1212 {
1213 struct dst_entry *dst, **pprev;
1214 int more = 0;
1215
1216 spin_lock_bh(&icmp6_dst_lock);
1217 pprev = &icmp6_dst_gc_list;
1218
1219 while ((dst = *pprev) != NULL) {
1220 if (!atomic_read(&dst->__refcnt)) {
1221 *pprev = dst->next;
1222 dst_free(dst);
1223 } else {
1224 pprev = &dst->next;
1225 ++more;
1226 }
1227 }
1228
1229 spin_unlock_bh(&icmp6_dst_lock);
1230
1231 return more;
1232 }
1233
1234 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1235 void *arg)
1236 {
1237 struct dst_entry *dst, **pprev;
1238
1239 spin_lock_bh(&icmp6_dst_lock);
1240 pprev = &icmp6_dst_gc_list;
1241 while ((dst = *pprev) != NULL) {
1242 struct rt6_info *rt = (struct rt6_info *) dst;
1243 if (func(rt, arg)) {
1244 *pprev = dst->next;
1245 dst_free(dst);
1246 } else {
1247 pprev = &dst->next;
1248 }
1249 }
1250 spin_unlock_bh(&icmp6_dst_lock);
1251 }
1252
1253 static int ip6_dst_gc(struct dst_ops *ops)
1254 {
1255 unsigned long now = jiffies;
1256 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1257 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1258 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1259 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1260 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1261 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1262 int entries;
1263
1264 entries = dst_entries_get_fast(ops);
1265 if (time_after(rt_last_gc + rt_min_interval, now) &&
1266 entries <= rt_max_size)
1267 goto out;
1268
1269 net->ipv6.ip6_rt_gc_expire++;
1270 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1271 net->ipv6.ip6_rt_last_gc = now;
1272 entries = dst_entries_get_slow(ops);
1273 if (entries < ops->gc_thresh)
1274 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1275 out:
1276 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1277 return entries > rt_max_size;
1278 }
1279
1280 /* Clean host part of a prefix. Not necessary in radix tree,
1281 but results in cleaner routing tables.
1282
1283 Remove it only when all the things will work!
1284 */
1285
1286 int ip6_dst_hoplimit(struct dst_entry *dst)
1287 {
1288 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1289 if (hoplimit == 0) {
1290 struct net_device *dev = dst->dev;
1291 struct inet6_dev *idev;
1292
1293 rcu_read_lock();
1294 idev = __in6_dev_get(dev);
1295 if (idev)
1296 hoplimit = idev->cnf.hop_limit;
1297 else
1298 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1299 rcu_read_unlock();
1300 }
1301 return hoplimit;
1302 }
1303 EXPORT_SYMBOL(ip6_dst_hoplimit);
1304
1305 /*
1306 *
1307 */
1308
1309 int ip6_route_add(struct fib6_config *cfg)
1310 {
1311 int err;
1312 struct net *net = cfg->fc_nlinfo.nl_net;
1313 struct rt6_info *rt = NULL;
1314 struct net_device *dev = NULL;
1315 struct inet6_dev *idev = NULL;
1316 struct fib6_table *table;
1317 int addr_type;
1318
1319 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1320 return -EINVAL;
1321 #ifndef CONFIG_IPV6_SUBTREES
1322 if (cfg->fc_src_len)
1323 return -EINVAL;
1324 #endif
1325 if (cfg->fc_ifindex) {
1326 err = -ENODEV;
1327 dev = dev_get_by_index(net, cfg->fc_ifindex);
1328 if (!dev)
1329 goto out;
1330 idev = in6_dev_get(dev);
1331 if (!idev)
1332 goto out;
1333 }
1334
1335 if (cfg->fc_metric == 0)
1336 cfg->fc_metric = IP6_RT_PRIO_USER;
1337
1338 err = -ENOBUFS;
1339 if (cfg->fc_nlinfo.nlh &&
1340 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1341 table = fib6_get_table(net, cfg->fc_table);
1342 if (!table) {
1343 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1344 table = fib6_new_table(net, cfg->fc_table);
1345 }
1346 } else {
1347 table = fib6_new_table(net, cfg->fc_table);
1348 }
1349
1350 if (!table)
1351 goto out;
1352
1353 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1354
1355 if (!rt) {
1356 err = -ENOMEM;
1357 goto out;
1358 }
1359
1360 rt->dst.obsolete = -1;
1361
1362 if (cfg->fc_flags & RTF_EXPIRES)
1363 rt6_set_expires(rt, jiffies +
1364 clock_t_to_jiffies(cfg->fc_expires));
1365 else
1366 rt6_clean_expires(rt);
1367
1368 if (cfg->fc_protocol == RTPROT_UNSPEC)
1369 cfg->fc_protocol = RTPROT_BOOT;
1370 rt->rt6i_protocol = cfg->fc_protocol;
1371
1372 addr_type = ipv6_addr_type(&cfg->fc_dst);
1373
1374 if (addr_type & IPV6_ADDR_MULTICAST)
1375 rt->dst.input = ip6_mc_input;
1376 else if (cfg->fc_flags & RTF_LOCAL)
1377 rt->dst.input = ip6_input;
1378 else
1379 rt->dst.input = ip6_forward;
1380
1381 rt->dst.output = ip6_output;
1382
1383 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1384 rt->rt6i_dst.plen = cfg->fc_dst_len;
1385 if (rt->rt6i_dst.plen == 128)
1386 rt->dst.flags |= DST_HOST;
1387
1388 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1389 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1390 if (!metrics) {
1391 err = -ENOMEM;
1392 goto out;
1393 }
1394 dst_init_metrics(&rt->dst, metrics, 0);
1395 }
1396 #ifdef CONFIG_IPV6_SUBTREES
1397 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1398 rt->rt6i_src.plen = cfg->fc_src_len;
1399 #endif
1400
1401 rt->rt6i_metric = cfg->fc_metric;
1402
1403 /* We cannot add true routes via loopback here,
1404 they would result in kernel looping; promote them to reject routes
1405 */
1406 if ((cfg->fc_flags & RTF_REJECT) ||
1407 (dev && (dev->flags & IFF_LOOPBACK) &&
1408 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1409 !(cfg->fc_flags & RTF_LOCAL))) {
1410 /* hold loopback dev/idev if we haven't done so. */
1411 if (dev != net->loopback_dev) {
1412 if (dev) {
1413 dev_put(dev);
1414 in6_dev_put(idev);
1415 }
1416 dev = net->loopback_dev;
1417 dev_hold(dev);
1418 idev = in6_dev_get(dev);
1419 if (!idev) {
1420 err = -ENODEV;
1421 goto out;
1422 }
1423 }
1424 rt->dst.output = ip6_pkt_discard_out;
1425 rt->dst.input = ip6_pkt_discard;
1426 rt->dst.error = -ENETUNREACH;
1427 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1428 goto install_route;
1429 }
1430
1431 if (cfg->fc_flags & RTF_GATEWAY) {
1432 const struct in6_addr *gw_addr;
1433 int gwa_type;
1434
1435 gw_addr = &cfg->fc_gateway;
1436 rt->rt6i_gateway = *gw_addr;
1437 gwa_type = ipv6_addr_type(gw_addr);
1438
1439 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1440 struct rt6_info *grt;
1441
1442 /* IPv6 strictly inhibits using not link-local
1443 addresses as nexthop address.
1444 Otherwise, router will not able to send redirects.
1445 It is very good, but in some (rare!) circumstances
1446 (SIT, PtP, NBMA NOARP links) it is handy to allow
1447 some exceptions. --ANK
1448 */
1449 err = -EINVAL;
1450 if (!(gwa_type & IPV6_ADDR_UNICAST))
1451 goto out;
1452
1453 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1454
1455 err = -EHOSTUNREACH;
1456 if (!grt)
1457 goto out;
1458 if (dev) {
1459 if (dev != grt->dst.dev) {
1460 dst_release(&grt->dst);
1461 goto out;
1462 }
1463 } else {
1464 dev = grt->dst.dev;
1465 idev = grt->rt6i_idev;
1466 dev_hold(dev);
1467 in6_dev_hold(grt->rt6i_idev);
1468 }
1469 if (!(grt->rt6i_flags & RTF_GATEWAY))
1470 err = 0;
1471 dst_release(&grt->dst);
1472
1473 if (err)
1474 goto out;
1475 }
1476 err = -EINVAL;
1477 if (!dev || (dev->flags & IFF_LOOPBACK))
1478 goto out;
1479 }
1480
1481 err = -ENODEV;
1482 if (!dev)
1483 goto out;
1484
1485 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1486 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1487 err = -EINVAL;
1488 goto out;
1489 }
1490 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1491 rt->rt6i_prefsrc.plen = 128;
1492 } else
1493 rt->rt6i_prefsrc.plen = 0;
1494
1495 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1496 err = rt6_bind_neighbour(rt, dev);
1497 if (err)
1498 goto out;
1499 }
1500
1501 rt->rt6i_flags = cfg->fc_flags;
1502
1503 install_route:
1504 if (cfg->fc_mx) {
1505 struct nlattr *nla;
1506 int remaining;
1507
1508 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1509 int type = nla_type(nla);
1510
1511 if (type) {
1512 if (type > RTAX_MAX) {
1513 err = -EINVAL;
1514 goto out;
1515 }
1516
1517 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1518 }
1519 }
1520 }
1521
1522 rt->dst.dev = dev;
1523 rt->rt6i_idev = idev;
1524 rt->rt6i_table = table;
1525
1526 cfg->fc_nlinfo.nl_net = dev_net(dev);
1527
1528 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1529
1530 out:
1531 if (dev)
1532 dev_put(dev);
1533 if (idev)
1534 in6_dev_put(idev);
1535 if (rt)
1536 dst_free(&rt->dst);
1537 return err;
1538 }
1539
1540 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1541 {
1542 int err;
1543 struct fib6_table *table;
1544 struct net *net = dev_net(rt->dst.dev);
1545
1546 if (rt == net->ipv6.ip6_null_entry)
1547 return -ENOENT;
1548
1549 table = rt->rt6i_table;
1550 write_lock_bh(&table->tb6_lock);
1551
1552 err = fib6_del(rt, info);
1553 dst_release(&rt->dst);
1554
1555 write_unlock_bh(&table->tb6_lock);
1556
1557 return err;
1558 }
1559
1560 int ip6_del_rt(struct rt6_info *rt)
1561 {
1562 struct nl_info info = {
1563 .nl_net = dev_net(rt->dst.dev),
1564 };
1565 return __ip6_del_rt(rt, &info);
1566 }
1567
1568 static int ip6_route_del(struct fib6_config *cfg)
1569 {
1570 struct fib6_table *table;
1571 struct fib6_node *fn;
1572 struct rt6_info *rt;
1573 int err = -ESRCH;
1574
1575 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1576 if (!table)
1577 return err;
1578
1579 read_lock_bh(&table->tb6_lock);
1580
1581 fn = fib6_locate(&table->tb6_root,
1582 &cfg->fc_dst, cfg->fc_dst_len,
1583 &cfg->fc_src, cfg->fc_src_len);
1584
1585 if (fn) {
1586 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1587 if (cfg->fc_ifindex &&
1588 (!rt->dst.dev ||
1589 rt->dst.dev->ifindex != cfg->fc_ifindex))
1590 continue;
1591 if (cfg->fc_flags & RTF_GATEWAY &&
1592 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1593 continue;
1594 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1595 continue;
1596 dst_hold(&rt->dst);
1597 read_unlock_bh(&table->tb6_lock);
1598
1599 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1600 }
1601 }
1602 read_unlock_bh(&table->tb6_lock);
1603
1604 return err;
1605 }
1606
1607 /*
1608 * Handle redirects
1609 */
1610 struct ip6rd_flowi {
1611 struct flowi6 fl6;
1612 struct in6_addr gateway;
1613 };
1614
1615 static struct rt6_info *__ip6_route_redirect(struct net *net,
1616 struct fib6_table *table,
1617 struct flowi6 *fl6,
1618 int flags)
1619 {
1620 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1621 struct rt6_info *rt;
1622 struct fib6_node *fn;
1623
1624 /*
1625 * Get the "current" route for this destination and
1626 * check if the redirect has come from approriate router.
1627 *
1628 * RFC 2461 specifies that redirects should only be
1629 * accepted if they come from the nexthop to the target.
1630 * Due to the way the routes are chosen, this notion
1631 * is a bit fuzzy and one might need to check all possible
1632 * routes.
1633 */
1634
1635 read_lock_bh(&table->tb6_lock);
1636 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1637 restart:
1638 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1639 /*
1640 * Current route is on-link; redirect is always invalid.
1641 *
1642 * Seems, previous statement is not true. It could
1643 * be node, which looks for us as on-link (f.e. proxy ndisc)
1644 * But then router serving it might decide, that we should
1645 * know truth 8)8) --ANK (980726).
1646 */
1647 if (rt6_check_expired(rt))
1648 continue;
1649 if (!(rt->rt6i_flags & RTF_GATEWAY))
1650 continue;
1651 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1652 continue;
1653 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1654 continue;
1655 break;
1656 }
1657
1658 if (!rt)
1659 rt = net->ipv6.ip6_null_entry;
1660 BACKTRACK(net, &fl6->saddr);
1661 out:
1662 dst_hold(&rt->dst);
1663
1664 read_unlock_bh(&table->tb6_lock);
1665
1666 return rt;
1667 };
1668
1669 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1670 const struct in6_addr *src,
1671 const struct in6_addr *gateway,
1672 struct net_device *dev)
1673 {
1674 int flags = RT6_LOOKUP_F_HAS_SADDR;
1675 struct net *net = dev_net(dev);
1676 struct ip6rd_flowi rdfl = {
1677 .fl6 = {
1678 .flowi6_oif = dev->ifindex,
1679 .daddr = *dest,
1680 .saddr = *src,
1681 },
1682 };
1683
1684 rdfl.gateway = *gateway;
1685
1686 if (rt6_need_strict(dest))
1687 flags |= RT6_LOOKUP_F_IFACE;
1688
1689 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1690 flags, __ip6_route_redirect);
1691 }
1692
1693 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1694 const struct in6_addr *saddr,
1695 struct neighbour *neigh, u8 *lladdr, int on_link)
1696 {
1697 struct rt6_info *rt, *nrt = NULL;
1698 struct netevent_redirect netevent;
1699 struct net *net = dev_net(neigh->dev);
1700 struct neighbour *old_neigh;
1701
1702 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1703
1704 if (rt == net->ipv6.ip6_null_entry) {
1705 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1706 goto out;
1707 }
1708
1709 /*
1710 * We have finally decided to accept it.
1711 */
1712
1713 neigh_update(neigh, lladdr, NUD_STALE,
1714 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1715 NEIGH_UPDATE_F_OVERRIDE|
1716 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1717 NEIGH_UPDATE_F_ISROUTER))
1718 );
1719
1720 /*
1721 * Redirect received -> path was valid.
1722 * Look, redirects are sent only in response to data packets,
1723 * so that this nexthop apparently is reachable. --ANK
1724 */
1725 dst_confirm(&rt->dst);
1726
1727 /* Duplicate redirect: silently ignore. */
1728 old_neigh = rt->n;
1729 if (neigh == old_neigh)
1730 goto out;
1731
1732 nrt = ip6_rt_copy(rt, dest);
1733 if (!nrt)
1734 goto out;
1735
1736 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1737 if (on_link)
1738 nrt->rt6i_flags &= ~RTF_GATEWAY;
1739
1740 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1741 nrt->n = neigh_clone(neigh);
1742
1743 if (ip6_ins_rt(nrt))
1744 goto out;
1745
1746 netevent.old = &rt->dst;
1747 netevent.old_neigh = old_neigh;
1748 netevent.new = &nrt->dst;
1749 netevent.new_neigh = neigh;
1750 netevent.daddr = dest;
1751 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1752
1753 if (rt->rt6i_flags & RTF_CACHE) {
1754 ip6_del_rt(rt);
1755 return;
1756 }
1757
1758 out:
1759 dst_release(&rt->dst);
1760 }
1761
1762 /*
1763 * Misc support functions
1764 */
1765
1766 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1767 const struct in6_addr *dest)
1768 {
1769 struct net *net = dev_net(ort->dst.dev);
1770 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1771 ort->rt6i_table);
1772
1773 if (rt) {
1774 rt->dst.input = ort->dst.input;
1775 rt->dst.output = ort->dst.output;
1776 rt->dst.flags |= DST_HOST;
1777
1778 rt->rt6i_dst.addr = *dest;
1779 rt->rt6i_dst.plen = 128;
1780 dst_copy_metrics(&rt->dst, &ort->dst);
1781 rt->dst.error = ort->dst.error;
1782 rt->rt6i_idev = ort->rt6i_idev;
1783 if (rt->rt6i_idev)
1784 in6_dev_hold(rt->rt6i_idev);
1785 rt->dst.lastuse = jiffies;
1786
1787 rt->rt6i_gateway = ort->rt6i_gateway;
1788 rt->rt6i_flags = ort->rt6i_flags;
1789 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1790 (RTF_DEFAULT | RTF_ADDRCONF))
1791 rt6_set_from(rt, ort);
1792 else
1793 rt6_clean_expires(rt);
1794 rt->rt6i_metric = 0;
1795
1796 #ifdef CONFIG_IPV6_SUBTREES
1797 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1798 #endif
1799 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1800 rt->rt6i_table = ort->rt6i_table;
1801 }
1802 return rt;
1803 }
1804
1805 #ifdef CONFIG_IPV6_ROUTE_INFO
1806 static struct rt6_info *rt6_get_route_info(struct net *net,
1807 const struct in6_addr *prefix, int prefixlen,
1808 const struct in6_addr *gwaddr, int ifindex)
1809 {
1810 struct fib6_node *fn;
1811 struct rt6_info *rt = NULL;
1812 struct fib6_table *table;
1813
1814 table = fib6_get_table(net, RT6_TABLE_INFO);
1815 if (!table)
1816 return NULL;
1817
1818 write_lock_bh(&table->tb6_lock);
1819 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1820 if (!fn)
1821 goto out;
1822
1823 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1824 if (rt->dst.dev->ifindex != ifindex)
1825 continue;
1826 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1827 continue;
1828 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1829 continue;
1830 dst_hold(&rt->dst);
1831 break;
1832 }
1833 out:
1834 write_unlock_bh(&table->tb6_lock);
1835 return rt;
1836 }
1837
1838 static struct rt6_info *rt6_add_route_info(struct net *net,
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex,
1841 unsigned int pref)
1842 {
1843 struct fib6_config cfg = {
1844 .fc_table = RT6_TABLE_INFO,
1845 .fc_metric = IP6_RT_PRIO_USER,
1846 .fc_ifindex = ifindex,
1847 .fc_dst_len = prefixlen,
1848 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1849 RTF_UP | RTF_PREF(pref),
1850 .fc_nlinfo.pid = 0,
1851 .fc_nlinfo.nlh = NULL,
1852 .fc_nlinfo.nl_net = net,
1853 };
1854
1855 cfg.fc_dst = *prefix;
1856 cfg.fc_gateway = *gwaddr;
1857
1858 /* We should treat it as a default route if prefix length is 0. */
1859 if (!prefixlen)
1860 cfg.fc_flags |= RTF_DEFAULT;
1861
1862 ip6_route_add(&cfg);
1863
1864 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1865 }
1866 #endif
1867
1868 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1869 {
1870 struct rt6_info *rt;
1871 struct fib6_table *table;
1872
1873 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1874 if (!table)
1875 return NULL;
1876
1877 write_lock_bh(&table->tb6_lock);
1878 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1879 if (dev == rt->dst.dev &&
1880 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1881 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1882 break;
1883 }
1884 if (rt)
1885 dst_hold(&rt->dst);
1886 write_unlock_bh(&table->tb6_lock);
1887 return rt;
1888 }
1889
1890 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1891 struct net_device *dev,
1892 unsigned int pref)
1893 {
1894 struct fib6_config cfg = {
1895 .fc_table = RT6_TABLE_DFLT,
1896 .fc_metric = IP6_RT_PRIO_USER,
1897 .fc_ifindex = dev->ifindex,
1898 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1899 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1900 .fc_nlinfo.pid = 0,
1901 .fc_nlinfo.nlh = NULL,
1902 .fc_nlinfo.nl_net = dev_net(dev),
1903 };
1904
1905 cfg.fc_gateway = *gwaddr;
1906
1907 ip6_route_add(&cfg);
1908
1909 return rt6_get_dflt_router(gwaddr, dev);
1910 }
1911
1912 void rt6_purge_dflt_routers(struct net *net)
1913 {
1914 struct rt6_info *rt;
1915 struct fib6_table *table;
1916
1917 /* NOTE: Keep consistent with rt6_get_dflt_router */
1918 table = fib6_get_table(net, RT6_TABLE_DFLT);
1919 if (!table)
1920 return;
1921
1922 restart:
1923 read_lock_bh(&table->tb6_lock);
1924 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1925 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1926 dst_hold(&rt->dst);
1927 read_unlock_bh(&table->tb6_lock);
1928 ip6_del_rt(rt);
1929 goto restart;
1930 }
1931 }
1932 read_unlock_bh(&table->tb6_lock);
1933 }
1934
1935 static void rtmsg_to_fib6_config(struct net *net,
1936 struct in6_rtmsg *rtmsg,
1937 struct fib6_config *cfg)
1938 {
1939 memset(cfg, 0, sizeof(*cfg));
1940
1941 cfg->fc_table = RT6_TABLE_MAIN;
1942 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1943 cfg->fc_metric = rtmsg->rtmsg_metric;
1944 cfg->fc_expires = rtmsg->rtmsg_info;
1945 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1946 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1947 cfg->fc_flags = rtmsg->rtmsg_flags;
1948
1949 cfg->fc_nlinfo.nl_net = net;
1950
1951 cfg->fc_dst = rtmsg->rtmsg_dst;
1952 cfg->fc_src = rtmsg->rtmsg_src;
1953 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1954 }
1955
1956 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1957 {
1958 struct fib6_config cfg;
1959 struct in6_rtmsg rtmsg;
1960 int err;
1961
1962 switch(cmd) {
1963 case SIOCADDRT: /* Add a route */
1964 case SIOCDELRT: /* Delete a route */
1965 if (!capable(CAP_NET_ADMIN))
1966 return -EPERM;
1967 err = copy_from_user(&rtmsg, arg,
1968 sizeof(struct in6_rtmsg));
1969 if (err)
1970 return -EFAULT;
1971
1972 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1973
1974 rtnl_lock();
1975 switch (cmd) {
1976 case SIOCADDRT:
1977 err = ip6_route_add(&cfg);
1978 break;
1979 case SIOCDELRT:
1980 err = ip6_route_del(&cfg);
1981 break;
1982 default:
1983 err = -EINVAL;
1984 }
1985 rtnl_unlock();
1986
1987 return err;
1988 }
1989
1990 return -EINVAL;
1991 }
1992
1993 /*
1994 * Drop the packet on the floor
1995 */
1996
1997 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1998 {
1999 int type;
2000 struct dst_entry *dst = skb_dst(skb);
2001 switch (ipstats_mib_noroutes) {
2002 case IPSTATS_MIB_INNOROUTES:
2003 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2004 if (type == IPV6_ADDR_ANY) {
2005 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006 IPSTATS_MIB_INADDRERRORS);
2007 break;
2008 }
2009 /* FALLTHROUGH */
2010 case IPSTATS_MIB_OUTNOROUTES:
2011 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2012 ipstats_mib_noroutes);
2013 break;
2014 }
2015 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2016 kfree_skb(skb);
2017 return 0;
2018 }
2019
2020 static int ip6_pkt_discard(struct sk_buff *skb)
2021 {
2022 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2023 }
2024
2025 static int ip6_pkt_discard_out(struct sk_buff *skb)
2026 {
2027 skb->dev = skb_dst(skb)->dev;
2028 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2029 }
2030
2031 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2032
2033 static int ip6_pkt_prohibit(struct sk_buff *skb)
2034 {
2035 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2036 }
2037
2038 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2039 {
2040 skb->dev = skb_dst(skb)->dev;
2041 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2042 }
2043
2044 #endif
2045
2046 /*
2047 * Allocate a dst for local (unicast / anycast) address.
2048 */
2049
2050 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2051 const struct in6_addr *addr,
2052 bool anycast)
2053 {
2054 struct net *net = dev_net(idev->dev);
2055 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2056 int err;
2057
2058 if (!rt) {
2059 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2060 return ERR_PTR(-ENOMEM);
2061 }
2062
2063 in6_dev_hold(idev);
2064
2065 rt->dst.flags |= DST_HOST;
2066 rt->dst.input = ip6_input;
2067 rt->dst.output = ip6_output;
2068 rt->rt6i_idev = idev;
2069 rt->dst.obsolete = -1;
2070
2071 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2072 if (anycast)
2073 rt->rt6i_flags |= RTF_ANYCAST;
2074 else
2075 rt->rt6i_flags |= RTF_LOCAL;
2076 err = rt6_bind_neighbour(rt, rt->dst.dev);
2077 if (err) {
2078 dst_free(&rt->dst);
2079 return ERR_PTR(err);
2080 }
2081
2082 rt->rt6i_dst.addr = *addr;
2083 rt->rt6i_dst.plen = 128;
2084 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2085
2086 atomic_set(&rt->dst.__refcnt, 1);
2087
2088 return rt;
2089 }
2090
2091 int ip6_route_get_saddr(struct net *net,
2092 struct rt6_info *rt,
2093 const struct in6_addr *daddr,
2094 unsigned int prefs,
2095 struct in6_addr *saddr)
2096 {
2097 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2098 int err = 0;
2099 if (rt->rt6i_prefsrc.plen)
2100 *saddr = rt->rt6i_prefsrc.addr;
2101 else
2102 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2103 daddr, prefs, saddr);
2104 return err;
2105 }
2106
2107 /* remove deleted ip from prefsrc entries */
2108 struct arg_dev_net_ip {
2109 struct net_device *dev;
2110 struct net *net;
2111 struct in6_addr *addr;
2112 };
2113
2114 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2115 {
2116 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2117 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2118 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2119
2120 if (((void *)rt->dst.dev == dev || !dev) &&
2121 rt != net->ipv6.ip6_null_entry &&
2122 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2123 /* remove prefsrc entry */
2124 rt->rt6i_prefsrc.plen = 0;
2125 }
2126 return 0;
2127 }
2128
2129 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2130 {
2131 struct net *net = dev_net(ifp->idev->dev);
2132 struct arg_dev_net_ip adni = {
2133 .dev = ifp->idev->dev,
2134 .net = net,
2135 .addr = &ifp->addr,
2136 };
2137 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2138 }
2139
2140 struct arg_dev_net {
2141 struct net_device *dev;
2142 struct net *net;
2143 };
2144
2145 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2146 {
2147 const struct arg_dev_net *adn = arg;
2148 const struct net_device *dev = adn->dev;
2149
2150 if ((rt->dst.dev == dev || !dev) &&
2151 rt != adn->net->ipv6.ip6_null_entry)
2152 return -1;
2153
2154 return 0;
2155 }
2156
2157 void rt6_ifdown(struct net *net, struct net_device *dev)
2158 {
2159 struct arg_dev_net adn = {
2160 .dev = dev,
2161 .net = net,
2162 };
2163
2164 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2165 icmp6_clean_all(fib6_ifdown, &adn);
2166 }
2167
2168 struct rt6_mtu_change_arg {
2169 struct net_device *dev;
2170 unsigned int mtu;
2171 };
2172
2173 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2174 {
2175 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2176 struct inet6_dev *idev;
2177
2178 /* In IPv6 pmtu discovery is not optional,
2179 so that RTAX_MTU lock cannot disable it.
2180 We still use this lock to block changes
2181 caused by addrconf/ndisc.
2182 */
2183
2184 idev = __in6_dev_get(arg->dev);
2185 if (!idev)
2186 return 0;
2187
2188 /* For administrative MTU increase, there is no way to discover
2189 IPv6 PMTU increase, so PMTU increase should be updated here.
2190 Since RFC 1981 doesn't include administrative MTU increase
2191 update PMTU increase is a MUST. (i.e. jumbo frame)
2192 */
2193 /*
2194 If new MTU is less than route PMTU, this new MTU will be the
2195 lowest MTU in the path, update the route PMTU to reflect PMTU
2196 decreases; if new MTU is greater than route PMTU, and the
2197 old MTU is the lowest MTU in the path, update the route PMTU
2198 to reflect the increase. In this case if the other nodes' MTU
2199 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2200 PMTU discouvery.
2201 */
2202 if (rt->dst.dev == arg->dev &&
2203 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2204 (dst_mtu(&rt->dst) >= arg->mtu ||
2205 (dst_mtu(&rt->dst) < arg->mtu &&
2206 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2207 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2208 }
2209 return 0;
2210 }
2211
2212 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2213 {
2214 struct rt6_mtu_change_arg arg = {
2215 .dev = dev,
2216 .mtu = mtu,
2217 };
2218
2219 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2220 }
2221
2222 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2223 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2224 [RTA_OIF] = { .type = NLA_U32 },
2225 [RTA_IIF] = { .type = NLA_U32 },
2226 [RTA_PRIORITY] = { .type = NLA_U32 },
2227 [RTA_METRICS] = { .type = NLA_NESTED },
2228 };
2229
2230 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2231 struct fib6_config *cfg)
2232 {
2233 struct rtmsg *rtm;
2234 struct nlattr *tb[RTA_MAX+1];
2235 int err;
2236
2237 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238 if (err < 0)
2239 goto errout;
2240
2241 err = -EINVAL;
2242 rtm = nlmsg_data(nlh);
2243 memset(cfg, 0, sizeof(*cfg));
2244
2245 cfg->fc_table = rtm->rtm_table;
2246 cfg->fc_dst_len = rtm->rtm_dst_len;
2247 cfg->fc_src_len = rtm->rtm_src_len;
2248 cfg->fc_flags = RTF_UP;
2249 cfg->fc_protocol = rtm->rtm_protocol;
2250
2251 if (rtm->rtm_type == RTN_UNREACHABLE)
2252 cfg->fc_flags |= RTF_REJECT;
2253
2254 if (rtm->rtm_type == RTN_LOCAL)
2255 cfg->fc_flags |= RTF_LOCAL;
2256
2257 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2258 cfg->fc_nlinfo.nlh = nlh;
2259 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2260
2261 if (tb[RTA_GATEWAY]) {
2262 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2263 cfg->fc_flags |= RTF_GATEWAY;
2264 }
2265
2266 if (tb[RTA_DST]) {
2267 int plen = (rtm->rtm_dst_len + 7) >> 3;
2268
2269 if (nla_len(tb[RTA_DST]) < plen)
2270 goto errout;
2271
2272 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2273 }
2274
2275 if (tb[RTA_SRC]) {
2276 int plen = (rtm->rtm_src_len + 7) >> 3;
2277
2278 if (nla_len(tb[RTA_SRC]) < plen)
2279 goto errout;
2280
2281 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2282 }
2283
2284 if (tb[RTA_PREFSRC])
2285 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2286
2287 if (tb[RTA_OIF])
2288 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2289
2290 if (tb[RTA_PRIORITY])
2291 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2292
2293 if (tb[RTA_METRICS]) {
2294 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2295 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2296 }
2297
2298 if (tb[RTA_TABLE])
2299 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2300
2301 err = 0;
2302 errout:
2303 return err;
2304 }
2305
2306 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2307 {
2308 struct fib6_config cfg;
2309 int err;
2310
2311 err = rtm_to_fib6_config(skb, nlh, &cfg);
2312 if (err < 0)
2313 return err;
2314
2315 return ip6_route_del(&cfg);
2316 }
2317
2318 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2319 {
2320 struct fib6_config cfg;
2321 int err;
2322
2323 err = rtm_to_fib6_config(skb, nlh, &cfg);
2324 if (err < 0)
2325 return err;
2326
2327 return ip6_route_add(&cfg);
2328 }
2329
2330 static inline size_t rt6_nlmsg_size(void)
2331 {
2332 return NLMSG_ALIGN(sizeof(struct rtmsg))
2333 + nla_total_size(16) /* RTA_SRC */
2334 + nla_total_size(16) /* RTA_DST */
2335 + nla_total_size(16) /* RTA_GATEWAY */
2336 + nla_total_size(16) /* RTA_PREFSRC */
2337 + nla_total_size(4) /* RTA_TABLE */
2338 + nla_total_size(4) /* RTA_IIF */
2339 + nla_total_size(4) /* RTA_OIF */
2340 + nla_total_size(4) /* RTA_PRIORITY */
2341 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2342 + nla_total_size(sizeof(struct rta_cacheinfo));
2343 }
2344
2345 static int rt6_fill_node(struct net *net,
2346 struct sk_buff *skb, struct rt6_info *rt,
2347 struct in6_addr *dst, struct in6_addr *src,
2348 int iif, int type, u32 pid, u32 seq,
2349 int prefix, int nowait, unsigned int flags)
2350 {
2351 struct rtmsg *rtm;
2352 struct nlmsghdr *nlh;
2353 long expires;
2354 u32 table;
2355 struct neighbour *n;
2356
2357 if (prefix) { /* user wants prefix routes only */
2358 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2359 /* success since this is not a prefix route */
2360 return 1;
2361 }
2362 }
2363
2364 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2365 if (!nlh)
2366 return -EMSGSIZE;
2367
2368 rtm = nlmsg_data(nlh);
2369 rtm->rtm_family = AF_INET6;
2370 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2371 rtm->rtm_src_len = rt->rt6i_src.plen;
2372 rtm->rtm_tos = 0;
2373 if (rt->rt6i_table)
2374 table = rt->rt6i_table->tb6_id;
2375 else
2376 table = RT6_TABLE_UNSPEC;
2377 rtm->rtm_table = table;
2378 if (nla_put_u32(skb, RTA_TABLE, table))
2379 goto nla_put_failure;
2380 if (rt->rt6i_flags & RTF_REJECT)
2381 rtm->rtm_type = RTN_UNREACHABLE;
2382 else if (rt->rt6i_flags & RTF_LOCAL)
2383 rtm->rtm_type = RTN_LOCAL;
2384 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2385 rtm->rtm_type = RTN_LOCAL;
2386 else
2387 rtm->rtm_type = RTN_UNICAST;
2388 rtm->rtm_flags = 0;
2389 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2390 rtm->rtm_protocol = rt->rt6i_protocol;
2391 if (rt->rt6i_flags & RTF_DYNAMIC)
2392 rtm->rtm_protocol = RTPROT_REDIRECT;
2393 else if (rt->rt6i_flags & RTF_ADDRCONF)
2394 rtm->rtm_protocol = RTPROT_KERNEL;
2395 else if (rt->rt6i_flags & RTF_DEFAULT)
2396 rtm->rtm_protocol = RTPROT_RA;
2397
2398 if (rt->rt6i_flags & RTF_CACHE)
2399 rtm->rtm_flags |= RTM_F_CLONED;
2400
2401 if (dst) {
2402 if (nla_put(skb, RTA_DST, 16, dst))
2403 goto nla_put_failure;
2404 rtm->rtm_dst_len = 128;
2405 } else if (rtm->rtm_dst_len)
2406 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2407 goto nla_put_failure;
2408 #ifdef CONFIG_IPV6_SUBTREES
2409 if (src) {
2410 if (nla_put(skb, RTA_SRC, 16, src))
2411 goto nla_put_failure;
2412 rtm->rtm_src_len = 128;
2413 } else if (rtm->rtm_src_len &&
2414 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2415 goto nla_put_failure;
2416 #endif
2417 if (iif) {
2418 #ifdef CONFIG_IPV6_MROUTE
2419 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2420 int err = ip6mr_get_route(net, skb, rtm, nowait);
2421 if (err <= 0) {
2422 if (!nowait) {
2423 if (err == 0)
2424 return 0;
2425 goto nla_put_failure;
2426 } else {
2427 if (err == -EMSGSIZE)
2428 goto nla_put_failure;
2429 }
2430 }
2431 } else
2432 #endif
2433 if (nla_put_u32(skb, RTA_IIF, iif))
2434 goto nla_put_failure;
2435 } else if (dst) {
2436 struct in6_addr saddr_buf;
2437 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2438 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2439 goto nla_put_failure;
2440 }
2441
2442 if (rt->rt6i_prefsrc.plen) {
2443 struct in6_addr saddr_buf;
2444 saddr_buf = rt->rt6i_prefsrc.addr;
2445 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2446 goto nla_put_failure;
2447 }
2448
2449 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2450 goto nla_put_failure;
2451
2452 rcu_read_lock();
2453 n = rt->n;
2454 if (n) {
2455 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2456 rcu_read_unlock();
2457 goto nla_put_failure;
2458 }
2459 }
2460 rcu_read_unlock();
2461
2462 if (rt->dst.dev &&
2463 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2464 goto nla_put_failure;
2465 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2466 goto nla_put_failure;
2467 if (!(rt->rt6i_flags & RTF_EXPIRES))
2468 expires = 0;
2469 else if (rt->dst.expires - jiffies < INT_MAX)
2470 expires = rt->dst.expires - jiffies;
2471 else
2472 expires = INT_MAX;
2473
2474 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2475 expires, rt->dst.error) < 0)
2476 goto nla_put_failure;
2477
2478 return nlmsg_end(skb, nlh);
2479
2480 nla_put_failure:
2481 nlmsg_cancel(skb, nlh);
2482 return -EMSGSIZE;
2483 }
2484
2485 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2486 {
2487 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2488 int prefix;
2489
2490 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2491 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2492 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2493 } else
2494 prefix = 0;
2495
2496 return rt6_fill_node(arg->net,
2497 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2498 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2499 prefix, 0, NLM_F_MULTI);
2500 }
2501
2502 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2503 {
2504 struct net *net = sock_net(in_skb->sk);
2505 struct nlattr *tb[RTA_MAX+1];
2506 struct rt6_info *rt;
2507 struct sk_buff *skb;
2508 struct rtmsg *rtm;
2509 struct flowi6 fl6;
2510 int err, iif = 0, oif = 0;
2511
2512 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2513 if (err < 0)
2514 goto errout;
2515
2516 err = -EINVAL;
2517 memset(&fl6, 0, sizeof(fl6));
2518
2519 if (tb[RTA_SRC]) {
2520 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2521 goto errout;
2522
2523 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2524 }
2525
2526 if (tb[RTA_DST]) {
2527 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2528 goto errout;
2529
2530 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2531 }
2532
2533 if (tb[RTA_IIF])
2534 iif = nla_get_u32(tb[RTA_IIF]);
2535
2536 if (tb[RTA_OIF])
2537 oif = nla_get_u32(tb[RTA_OIF]);
2538
2539 if (iif) {
2540 struct net_device *dev;
2541 int flags = 0;
2542
2543 dev = __dev_get_by_index(net, iif);
2544 if (!dev) {
2545 err = -ENODEV;
2546 goto errout;
2547 }
2548
2549 fl6.flowi6_iif = iif;
2550
2551 if (!ipv6_addr_any(&fl6.saddr))
2552 flags |= RT6_LOOKUP_F_HAS_SADDR;
2553
2554 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2555 flags);
2556 } else {
2557 fl6.flowi6_oif = oif;
2558
2559 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2560 }
2561
2562 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2563 if (!skb) {
2564 dst_release(&rt->dst);
2565 err = -ENOBUFS;
2566 goto errout;
2567 }
2568
2569 /* Reserve room for dummy headers, this skb can pass
2570 through good chunk of routing engine.
2571 */
2572 skb_reset_mac_header(skb);
2573 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2574
2575 skb_dst_set(skb, &rt->dst);
2576
2577 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2578 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2579 nlh->nlmsg_seq, 0, 0, 0);
2580 if (err < 0) {
2581 kfree_skb(skb);
2582 goto errout;
2583 }
2584
2585 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2586 errout:
2587 return err;
2588 }
2589
2590 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2591 {
2592 struct sk_buff *skb;
2593 struct net *net = info->nl_net;
2594 u32 seq;
2595 int err;
2596
2597 err = -ENOBUFS;
2598 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2599
2600 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2601 if (!skb)
2602 goto errout;
2603
2604 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2605 event, info->pid, seq, 0, 0, 0);
2606 if (err < 0) {
2607 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2608 WARN_ON(err == -EMSGSIZE);
2609 kfree_skb(skb);
2610 goto errout;
2611 }
2612 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2613 info->nlh, gfp_any());
2614 return;
2615 errout:
2616 if (err < 0)
2617 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2618 }
2619
2620 static int ip6_route_dev_notify(struct notifier_block *this,
2621 unsigned long event, void *data)
2622 {
2623 struct net_device *dev = (struct net_device *)data;
2624 struct net *net = dev_net(dev);
2625
2626 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2627 net->ipv6.ip6_null_entry->dst.dev = dev;
2628 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2629 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2630 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2631 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2632 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2633 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2634 #endif
2635 }
2636
2637 return NOTIFY_OK;
2638 }
2639
2640 /*
2641 * /proc
2642 */
2643
2644 #ifdef CONFIG_PROC_FS
2645
2646 struct rt6_proc_arg
2647 {
2648 char *buffer;
2649 int offset;
2650 int length;
2651 int skip;
2652 int len;
2653 };
2654
2655 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2656 {
2657 struct seq_file *m = p_arg;
2658 struct neighbour *n;
2659
2660 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2661
2662 #ifdef CONFIG_IPV6_SUBTREES
2663 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2664 #else
2665 seq_puts(m, "00000000000000000000000000000000 00 ");
2666 #endif
2667 rcu_read_lock();
2668 n = rt->n;
2669 if (n) {
2670 seq_printf(m, "%pi6", n->primary_key);
2671 } else {
2672 seq_puts(m, "00000000000000000000000000000000");
2673 }
2674 rcu_read_unlock();
2675 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2676 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2677 rt->dst.__use, rt->rt6i_flags,
2678 rt->dst.dev ? rt->dst.dev->name : "");
2679 return 0;
2680 }
2681
2682 static int ipv6_route_show(struct seq_file *m, void *v)
2683 {
2684 struct net *net = (struct net *)m->private;
2685 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2686 return 0;
2687 }
2688
2689 static int ipv6_route_open(struct inode *inode, struct file *file)
2690 {
2691 return single_open_net(inode, file, ipv6_route_show);
2692 }
2693
2694 static const struct file_operations ipv6_route_proc_fops = {
2695 .owner = THIS_MODULE,
2696 .open = ipv6_route_open,
2697 .read = seq_read,
2698 .llseek = seq_lseek,
2699 .release = single_release_net,
2700 };
2701
2702 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2703 {
2704 struct net *net = (struct net *)seq->private;
2705 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2706 net->ipv6.rt6_stats->fib_nodes,
2707 net->ipv6.rt6_stats->fib_route_nodes,
2708 net->ipv6.rt6_stats->fib_rt_alloc,
2709 net->ipv6.rt6_stats->fib_rt_entries,
2710 net->ipv6.rt6_stats->fib_rt_cache,
2711 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2712 net->ipv6.rt6_stats->fib_discarded_routes);
2713
2714 return 0;
2715 }
2716
2717 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2718 {
2719 return single_open_net(inode, file, rt6_stats_seq_show);
2720 }
2721
2722 static const struct file_operations rt6_stats_seq_fops = {
2723 .owner = THIS_MODULE,
2724 .open = rt6_stats_seq_open,
2725 .read = seq_read,
2726 .llseek = seq_lseek,
2727 .release = single_release_net,
2728 };
2729 #endif /* CONFIG_PROC_FS */
2730
2731 #ifdef CONFIG_SYSCTL
2732
2733 static
2734 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2735 void __user *buffer, size_t *lenp, loff_t *ppos)
2736 {
2737 struct net *net;
2738 int delay;
2739 if (!write)
2740 return -EINVAL;
2741
2742 net = (struct net *)ctl->extra1;
2743 delay = net->ipv6.sysctl.flush_delay;
2744 proc_dointvec(ctl, write, buffer, lenp, ppos);
2745 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2746 return 0;
2747 }
2748
2749 ctl_table ipv6_route_table_template[] = {
2750 {
2751 .procname = "flush",
2752 .data = &init_net.ipv6.sysctl.flush_delay,
2753 .maxlen = sizeof(int),
2754 .mode = 0200,
2755 .proc_handler = ipv6_sysctl_rtcache_flush
2756 },
2757 {
2758 .procname = "gc_thresh",
2759 .data = &ip6_dst_ops_template.gc_thresh,
2760 .maxlen = sizeof(int),
2761 .mode = 0644,
2762 .proc_handler = proc_dointvec,
2763 },
2764 {
2765 .procname = "max_size",
2766 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2767 .maxlen = sizeof(int),
2768 .mode = 0644,
2769 .proc_handler = proc_dointvec,
2770 },
2771 {
2772 .procname = "gc_min_interval",
2773 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2774 .maxlen = sizeof(int),
2775 .mode = 0644,
2776 .proc_handler = proc_dointvec_jiffies,
2777 },
2778 {
2779 .procname = "gc_timeout",
2780 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2781 .maxlen = sizeof(int),
2782 .mode = 0644,
2783 .proc_handler = proc_dointvec_jiffies,
2784 },
2785 {
2786 .procname = "gc_interval",
2787 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2788 .maxlen = sizeof(int),
2789 .mode = 0644,
2790 .proc_handler = proc_dointvec_jiffies,
2791 },
2792 {
2793 .procname = "gc_elasticity",
2794 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2795 .maxlen = sizeof(int),
2796 .mode = 0644,
2797 .proc_handler = proc_dointvec,
2798 },
2799 {
2800 .procname = "mtu_expires",
2801 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2802 .maxlen = sizeof(int),
2803 .mode = 0644,
2804 .proc_handler = proc_dointvec_jiffies,
2805 },
2806 {
2807 .procname = "min_adv_mss",
2808 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2809 .maxlen = sizeof(int),
2810 .mode = 0644,
2811 .proc_handler = proc_dointvec,
2812 },
2813 {
2814 .procname = "gc_min_interval_ms",
2815 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2816 .maxlen = sizeof(int),
2817 .mode = 0644,
2818 .proc_handler = proc_dointvec_ms_jiffies,
2819 },
2820 { }
2821 };
2822
2823 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2824 {
2825 struct ctl_table *table;
2826
2827 table = kmemdup(ipv6_route_table_template,
2828 sizeof(ipv6_route_table_template),
2829 GFP_KERNEL);
2830
2831 if (table) {
2832 table[0].data = &net->ipv6.sysctl.flush_delay;
2833 table[0].extra1 = net;
2834 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2835 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2836 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2837 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2838 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2839 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2840 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2841 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2842 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2843 }
2844
2845 return table;
2846 }
2847 #endif
2848
2849 static int __net_init ip6_route_net_init(struct net *net)
2850 {
2851 int ret = -ENOMEM;
2852
2853 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2854 sizeof(net->ipv6.ip6_dst_ops));
2855
2856 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2857 goto out_ip6_dst_ops;
2858
2859 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2860 sizeof(*net->ipv6.ip6_null_entry),
2861 GFP_KERNEL);
2862 if (!net->ipv6.ip6_null_entry)
2863 goto out_ip6_dst_entries;
2864 net->ipv6.ip6_null_entry->dst.path =
2865 (struct dst_entry *)net->ipv6.ip6_null_entry;
2866 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2867 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2868 ip6_template_metrics, true);
2869
2870 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2871 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2872 sizeof(*net->ipv6.ip6_prohibit_entry),
2873 GFP_KERNEL);
2874 if (!net->ipv6.ip6_prohibit_entry)
2875 goto out_ip6_null_entry;
2876 net->ipv6.ip6_prohibit_entry->dst.path =
2877 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2878 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2879 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2880 ip6_template_metrics, true);
2881
2882 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2883 sizeof(*net->ipv6.ip6_blk_hole_entry),
2884 GFP_KERNEL);
2885 if (!net->ipv6.ip6_blk_hole_entry)
2886 goto out_ip6_prohibit_entry;
2887 net->ipv6.ip6_blk_hole_entry->dst.path =
2888 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2889 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2890 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2891 ip6_template_metrics, true);
2892 #endif
2893
2894 net->ipv6.sysctl.flush_delay = 0;
2895 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2896 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2897 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2898 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2899 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2900 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2901 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2902
2903 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2904
2905 ret = 0;
2906 out:
2907 return ret;
2908
2909 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910 out_ip6_prohibit_entry:
2911 kfree(net->ipv6.ip6_prohibit_entry);
2912 out_ip6_null_entry:
2913 kfree(net->ipv6.ip6_null_entry);
2914 #endif
2915 out_ip6_dst_entries:
2916 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2917 out_ip6_dst_ops:
2918 goto out;
2919 }
2920
2921 static void __net_exit ip6_route_net_exit(struct net *net)
2922 {
2923 kfree(net->ipv6.ip6_null_entry);
2924 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2925 kfree(net->ipv6.ip6_prohibit_entry);
2926 kfree(net->ipv6.ip6_blk_hole_entry);
2927 #endif
2928 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2929 }
2930
2931 static int __net_init ip6_route_net_init_late(struct net *net)
2932 {
2933 #ifdef CONFIG_PROC_FS
2934 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2935 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2936 #endif
2937 return 0;
2938 }
2939
2940 static void __net_exit ip6_route_net_exit_late(struct net *net)
2941 {
2942 #ifdef CONFIG_PROC_FS
2943 proc_net_remove(net, "ipv6_route");
2944 proc_net_remove(net, "rt6_stats");
2945 #endif
2946 }
2947
2948 static struct pernet_operations ip6_route_net_ops = {
2949 .init = ip6_route_net_init,
2950 .exit = ip6_route_net_exit,
2951 };
2952
2953 static int __net_init ipv6_inetpeer_init(struct net *net)
2954 {
2955 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2956
2957 if (!bp)
2958 return -ENOMEM;
2959 inet_peer_base_init(bp);
2960 net->ipv6.peers = bp;
2961 return 0;
2962 }
2963
2964 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2965 {
2966 struct inet_peer_base *bp = net->ipv6.peers;
2967
2968 net->ipv6.peers = NULL;
2969 inetpeer_invalidate_tree(bp);
2970 kfree(bp);
2971 }
2972
2973 static struct pernet_operations ipv6_inetpeer_ops = {
2974 .init = ipv6_inetpeer_init,
2975 .exit = ipv6_inetpeer_exit,
2976 };
2977
2978 static struct pernet_operations ip6_route_net_late_ops = {
2979 .init = ip6_route_net_init_late,
2980 .exit = ip6_route_net_exit_late,
2981 };
2982
2983 static struct notifier_block ip6_route_dev_notifier = {
2984 .notifier_call = ip6_route_dev_notify,
2985 .priority = 0,
2986 };
2987
2988 int __init ip6_route_init(void)
2989 {
2990 int ret;
2991
2992 ret = -ENOMEM;
2993 ip6_dst_ops_template.kmem_cachep =
2994 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2995 SLAB_HWCACHE_ALIGN, NULL);
2996 if (!ip6_dst_ops_template.kmem_cachep)
2997 goto out;
2998
2999 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3000 if (ret)
3001 goto out_kmem_cache;
3002
3003 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3004 if (ret)
3005 goto out_dst_entries;
3006
3007 ret = register_pernet_subsys(&ip6_route_net_ops);
3008 if (ret)
3009 goto out_register_inetpeer;
3010
3011 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3012
3013 /* Registering of the loopback is done before this portion of code,
3014 * the loopback reference in rt6_info will not be taken, do it
3015 * manually for init_net */
3016 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3017 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3018 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3019 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3020 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3021 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3022 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3023 #endif
3024 ret = fib6_init();
3025 if (ret)
3026 goto out_register_subsys;
3027
3028 ret = xfrm6_init();
3029 if (ret)
3030 goto out_fib6_init;
3031
3032 ret = fib6_rules_init();
3033 if (ret)
3034 goto xfrm6_init;
3035
3036 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3037 if (ret)
3038 goto fib6_rules_init;
3039
3040 ret = -ENOBUFS;
3041 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3042 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3043 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3044 goto out_register_late_subsys;
3045
3046 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3047 if (ret)
3048 goto out_register_late_subsys;
3049
3050 out:
3051 return ret;
3052
3053 out_register_late_subsys:
3054 unregister_pernet_subsys(&ip6_route_net_late_ops);
3055 fib6_rules_init:
3056 fib6_rules_cleanup();
3057 xfrm6_init:
3058 xfrm6_fini();
3059 out_fib6_init:
3060 fib6_gc_cleanup();
3061 out_register_subsys:
3062 unregister_pernet_subsys(&ip6_route_net_ops);
3063 out_register_inetpeer:
3064 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3065 out_dst_entries:
3066 dst_entries_destroy(&ip6_dst_blackhole_ops);
3067 out_kmem_cache:
3068 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3069 goto out;
3070 }
3071
3072 void ip6_route_cleanup(void)
3073 {
3074 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3075 unregister_pernet_subsys(&ip6_route_net_late_ops);
3076 fib6_rules_cleanup();
3077 xfrm6_fini();
3078 fib6_gc_cleanup();
3079 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3080 unregister_pernet_subsys(&ip6_route_net_ops);
3081 dst_entries_destroy(&ip6_dst_blackhole_ops);
3082 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3083 }
This page took 0.097517 seconds and 5 git commands to generate.