Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58
59 #include <asm/uaccess.h>
60
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64
65 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
66 const struct in6_addr *dest);
67 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void ip6_dst_destroy(struct dst_entry *);
72 static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
74 static int ip6_dst_gc(struct dst_ops *ops);
75
76 static int ip6_pkt_discard(struct sk_buff *skb);
77 static int ip6_pkt_discard_out(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
79 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
85 unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
89 #endif
90
91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122 }
123
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125 {
126 struct in6_addr *p = &rt->rt6i_gateway;
127
128 if (!ipv6_addr_any(p))
129 return (const void *) p;
130 return daddr;
131 }
132
133 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134 {
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144
145 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
146 {
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
156 }
157
158 static struct dst_ops ip6_dst_ops_template = {
159 .family = AF_INET6,
160 .protocol = cpu_to_be16(ETH_P_IPV6),
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
164 .default_advmss = ip6_default_advmss,
165 .mtu = ip6_mtu,
166 .cow_metrics = ipv6_cow_metrics,
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
172 .local_out = __ip6_local_out,
173 .neigh_lookup = ip6_neigh_lookup,
174 };
175
176 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
177 {
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
181 }
182
183 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184 {
185 }
186
187 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189 {
190 return NULL;
191 }
192
193 static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
195 .protocol = cpu_to_be16(ETH_P_IPV6),
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
198 .mtu = ip6_blackhole_mtu,
199 .default_advmss = ip6_default_advmss,
200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
202 .neigh_lookup = ip6_neigh_lookup,
203 };
204
205 static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207 };
208
209 static struct rt6_info ip6_null_entry_template = {
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
219 .rt6i_protocol = RTPROT_KERNEL,
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222 };
223
224 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
226 static int ip6_pkt_prohibit(struct sk_buff *skb);
227 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
228
229 static struct rt6_info ip6_prohibit_entry_template = {
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
239 .rt6i_protocol = RTPROT_KERNEL,
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242 };
243
244 static struct rt6_info ip6_blk_hole_entry_template = {
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
250 .input = dst_discard,
251 .output = dst_discard,
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
254 .rt6i_protocol = RTPROT_KERNEL,
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257 };
258
259 #endif
260
261 /* allocate dst with ip6_dst_ops */
262 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
263 struct net_device *dev,
264 int flags)
265 {
266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
267
268 if (rt)
269 memset(&rt->rt6i_table, 0,
270 sizeof(*rt) - sizeof(struct dst_entry));
271
272 return rt;
273 }
274
275 static void ip6_dst_destroy(struct dst_entry *dst)
276 {
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
279 struct inet_peer *peer = rt->rt6i_peer;
280
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
284 if (idev) {
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
287 }
288
289 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 dst_release(dst->from);
291
292 if (peer) {
293 rt->rt6i_peer = NULL;
294 inet_putpeer(peer);
295 }
296 }
297
298 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300 static u32 rt6_peer_genid(void)
301 {
302 return atomic_read(&__rt6_peer_genid);
303 }
304
305 void rt6_bind_peer(struct rt6_info *rt, int create)
306 {
307 struct inet_peer *peer;
308
309 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
310 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 inet_putpeer(peer);
312 else
313 rt->rt6i_peer_genid = rt6_peer_genid();
314 }
315
316 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 int how)
318 {
319 struct rt6_info *rt = (struct rt6_info *)dst;
320 struct inet6_dev *idev = rt->rt6i_idev;
321 struct net_device *loopback_dev =
322 dev_net(dev)->loopback_dev;
323
324 if (dev != loopback_dev && idev && idev->dev == dev) {
325 struct inet6_dev *loopback_idev =
326 in6_dev_get(loopback_dev);
327 if (loopback_idev) {
328 rt->rt6i_idev = loopback_idev;
329 in6_dev_put(idev);
330 }
331 }
332 }
333
334 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
335 {
336 struct rt6_info *ort = NULL;
337
338 if (rt->rt6i_flags & RTF_EXPIRES) {
339 if (time_after(jiffies, rt->dst.expires))
340 return 1;
341 } else if (rt->dst.from) {
342 ort = (struct rt6_info *) rt->dst.from;
343 return (ort->rt6i_flags & RTF_EXPIRES) &&
344 time_after(jiffies, ort->dst.expires);
345 }
346 return 0;
347 }
348
349 static inline int rt6_need_strict(const struct in6_addr *daddr)
350 {
351 return ipv6_addr_type(daddr) &
352 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
353 }
354
355 /*
356 * Route lookup. Any table->tb6_lock is implied.
357 */
358
359 static inline struct rt6_info *rt6_device_match(struct net *net,
360 struct rt6_info *rt,
361 const struct in6_addr *saddr,
362 int oif,
363 int flags)
364 {
365 struct rt6_info *local = NULL;
366 struct rt6_info *sprt;
367
368 if (!oif && ipv6_addr_any(saddr))
369 goto out;
370
371 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
372 struct net_device *dev = sprt->dst.dev;
373
374 if (oif) {
375 if (dev->ifindex == oif)
376 return sprt;
377 if (dev->flags & IFF_LOOPBACK) {
378 if (!sprt->rt6i_idev ||
379 sprt->rt6i_idev->dev->ifindex != oif) {
380 if (flags & RT6_LOOKUP_F_IFACE && oif)
381 continue;
382 if (local && (!oif ||
383 local->rt6i_idev->dev->ifindex == oif))
384 continue;
385 }
386 local = sprt;
387 }
388 } else {
389 if (ipv6_chk_addr(net, saddr, dev,
390 flags & RT6_LOOKUP_F_IFACE))
391 return sprt;
392 }
393 }
394
395 if (oif) {
396 if (local)
397 return local;
398
399 if (flags & RT6_LOOKUP_F_IFACE)
400 return net->ipv6.ip6_null_entry;
401 }
402 out:
403 return rt;
404 }
405
406 #ifdef CONFIG_IPV6_ROUTER_PREF
407 static void rt6_probe(struct rt6_info *rt)
408 {
409 struct neighbour *neigh;
410 /*
411 * Okay, this does not seem to be appropriate
412 * for now, however, we need to check if it
413 * is really so; aka Router Reachability Probing.
414 *
415 * Router Reachability Probe MUST be rate-limited
416 * to no more than one per minute.
417 */
418 rcu_read_lock();
419 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
420 if (!neigh || (neigh->nud_state & NUD_VALID))
421 goto out;
422 read_lock_bh(&neigh->lock);
423 if (!(neigh->nud_state & NUD_VALID) &&
424 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
425 struct in6_addr mcaddr;
426 struct in6_addr *target;
427
428 neigh->updated = jiffies;
429 read_unlock_bh(&neigh->lock);
430
431 target = (struct in6_addr *)&neigh->primary_key;
432 addrconf_addr_solict_mult(target, &mcaddr);
433 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
434 } else {
435 read_unlock_bh(&neigh->lock);
436 }
437 out:
438 rcu_read_unlock();
439 }
440 #else
441 static inline void rt6_probe(struct rt6_info *rt)
442 {
443 }
444 #endif
445
446 /*
447 * Default Router Selection (RFC 2461 6.3.6)
448 */
449 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
450 {
451 struct net_device *dev = rt->dst.dev;
452 if (!oif || dev->ifindex == oif)
453 return 2;
454 if ((dev->flags & IFF_LOOPBACK) &&
455 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 return 1;
457 return 0;
458 }
459
460 static inline int rt6_check_neigh(struct rt6_info *rt)
461 {
462 struct neighbour *neigh;
463 int m;
464
465 rcu_read_lock();
466 neigh = dst_get_neighbour_noref(&rt->dst);
467 if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 !(rt->rt6i_flags & RTF_GATEWAY))
469 m = 1;
470 else if (neigh) {
471 read_lock_bh(&neigh->lock);
472 if (neigh->nud_state & NUD_VALID)
473 m = 2;
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475 else if (neigh->nud_state & NUD_FAILED)
476 m = 0;
477 #endif
478 else
479 m = 1;
480 read_unlock_bh(&neigh->lock);
481 } else
482 m = 0;
483 rcu_read_unlock();
484 return m;
485 }
486
487 static int rt6_score_route(struct rt6_info *rt, int oif,
488 int strict)
489 {
490 int m, n;
491
492 m = rt6_check_dev(rt, oif);
493 if (!m && (strict & RT6_LOOKUP_F_IFACE))
494 return -1;
495 #ifdef CONFIG_IPV6_ROUTER_PREF
496 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497 #endif
498 n = rt6_check_neigh(rt);
499 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
500 return -1;
501 return m;
502 }
503
504 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 int *mpri, struct rt6_info *match)
506 {
507 int m;
508
509 if (rt6_check_expired(rt))
510 goto out;
511
512 m = rt6_score_route(rt, oif, strict);
513 if (m < 0)
514 goto out;
515
516 if (m > *mpri) {
517 if (strict & RT6_LOOKUP_F_REACHABLE)
518 rt6_probe(match);
519 *mpri = m;
520 match = rt;
521 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 rt6_probe(rt);
523 }
524
525 out:
526 return match;
527 }
528
529 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 struct rt6_info *rr_head,
531 u32 metric, int oif, int strict)
532 {
533 struct rt6_info *rt, *match;
534 int mpri = -1;
535
536 match = NULL;
537 for (rt = rr_head; rt && rt->rt6i_metric == metric;
538 rt = rt->dst.rt6_next)
539 match = find_match(rt, oif, strict, &mpri, match);
540 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
541 rt = rt->dst.rt6_next)
542 match = find_match(rt, oif, strict, &mpri, match);
543
544 return match;
545 }
546
547 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548 {
549 struct rt6_info *match, *rt0;
550 struct net *net;
551
552 rt0 = fn->rr_ptr;
553 if (!rt0)
554 fn->rr_ptr = rt0 = fn->leaf;
555
556 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
557
558 if (!match &&
559 (strict & RT6_LOOKUP_F_REACHABLE)) {
560 struct rt6_info *next = rt0->dst.rt6_next;
561
562 /* no entries matched; do round-robin */
563 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 next = fn->leaf;
565
566 if (next != rt0)
567 fn->rr_ptr = next;
568 }
569
570 net = dev_net(rt0->dst.dev);
571 return match ? match : net->ipv6.ip6_null_entry;
572 }
573
574 #ifdef CONFIG_IPV6_ROUTE_INFO
575 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
576 const struct in6_addr *gwaddr)
577 {
578 struct net *net = dev_net(dev);
579 struct route_info *rinfo = (struct route_info *) opt;
580 struct in6_addr prefix_buf, *prefix;
581 unsigned int pref;
582 unsigned long lifetime;
583 struct rt6_info *rt;
584
585 if (len < sizeof(struct route_info)) {
586 return -EINVAL;
587 }
588
589 /* Sanity check for prefix_len and length */
590 if (rinfo->length > 3) {
591 return -EINVAL;
592 } else if (rinfo->prefix_len > 128) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 64) {
595 if (rinfo->length < 2) {
596 return -EINVAL;
597 }
598 } else if (rinfo->prefix_len > 0) {
599 if (rinfo->length < 1) {
600 return -EINVAL;
601 }
602 }
603
604 pref = rinfo->route_pref;
605 if (pref == ICMPV6_ROUTER_PREF_INVALID)
606 return -EINVAL;
607
608 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
609
610 if (rinfo->length == 3)
611 prefix = (struct in6_addr *)rinfo->prefix;
612 else {
613 /* this function is safe */
614 ipv6_addr_prefix(&prefix_buf,
615 (struct in6_addr *)rinfo->prefix,
616 rinfo->prefix_len);
617 prefix = &prefix_buf;
618 }
619
620 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
621 dev->ifindex);
622
623 if (rt && !lifetime) {
624 ip6_del_rt(rt);
625 rt = NULL;
626 }
627
628 if (!rt && lifetime)
629 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
630 pref);
631 else if (rt)
632 rt->rt6i_flags = RTF_ROUTEINFO |
633 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
634
635 if (rt) {
636 if (!addrconf_finite_timeout(lifetime))
637 rt6_clean_expires(rt);
638 else
639 rt6_set_expires(rt, jiffies + HZ * lifetime);
640
641 dst_release(&rt->dst);
642 }
643 return 0;
644 }
645 #endif
646
647 #define BACKTRACK(__net, saddr) \
648 do { \
649 if (rt == __net->ipv6.ip6_null_entry) { \
650 struct fib6_node *pn; \
651 while (1) { \
652 if (fn->fn_flags & RTN_TL_ROOT) \
653 goto out; \
654 pn = fn->parent; \
655 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
656 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
657 else \
658 fn = pn; \
659 if (fn->fn_flags & RTN_RTINFO) \
660 goto restart; \
661 } \
662 } \
663 } while (0)
664
665 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
666 struct fib6_table *table,
667 struct flowi6 *fl6, int flags)
668 {
669 struct fib6_node *fn;
670 struct rt6_info *rt;
671
672 read_lock_bh(&table->tb6_lock);
673 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
674 restart:
675 rt = fn->leaf;
676 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
677 BACKTRACK(net, &fl6->saddr);
678 out:
679 dst_use(&rt->dst, jiffies);
680 read_unlock_bh(&table->tb6_lock);
681 return rt;
682
683 }
684
685 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
686 int flags)
687 {
688 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
689 }
690 EXPORT_SYMBOL_GPL(ip6_route_lookup);
691
692 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
693 const struct in6_addr *saddr, int oif, int strict)
694 {
695 struct flowi6 fl6 = {
696 .flowi6_oif = oif,
697 .daddr = *daddr,
698 };
699 struct dst_entry *dst;
700 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
701
702 if (saddr) {
703 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
704 flags |= RT6_LOOKUP_F_HAS_SADDR;
705 }
706
707 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
708 if (dst->error == 0)
709 return (struct rt6_info *) dst;
710
711 dst_release(dst);
712
713 return NULL;
714 }
715
716 EXPORT_SYMBOL(rt6_lookup);
717
718 /* ip6_ins_rt is called with FREE table->tb6_lock.
719 It takes new route entry, the addition fails by any reason the
720 route is freed. In any case, if caller does not hold it, it may
721 be destroyed.
722 */
723
724 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
725 {
726 int err;
727 struct fib6_table *table;
728
729 table = rt->rt6i_table;
730 write_lock_bh(&table->tb6_lock);
731 err = fib6_add(&table->tb6_root, rt, info);
732 write_unlock_bh(&table->tb6_lock);
733
734 return err;
735 }
736
737 int ip6_ins_rt(struct rt6_info *rt)
738 {
739 struct nl_info info = {
740 .nl_net = dev_net(rt->dst.dev),
741 };
742 return __ip6_ins_rt(rt, &info);
743 }
744
745 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
746 const struct in6_addr *daddr,
747 const struct in6_addr *saddr)
748 {
749 struct rt6_info *rt;
750
751 /*
752 * Clone the route.
753 */
754
755 rt = ip6_rt_copy(ort, daddr);
756
757 if (rt) {
758 int attempts = !in_softirq();
759
760 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
761 if (ort->rt6i_dst.plen != 128 &&
762 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
763 rt->rt6i_flags |= RTF_ANYCAST;
764 rt->rt6i_gateway = *daddr;
765 }
766
767 rt->rt6i_flags |= RTF_CACHE;
768
769 #ifdef CONFIG_IPV6_SUBTREES
770 if (rt->rt6i_src.plen && saddr) {
771 rt->rt6i_src.addr = *saddr;
772 rt->rt6i_src.plen = 128;
773 }
774 #endif
775
776 retry:
777 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
778 struct net *net = dev_net(rt->dst.dev);
779 int saved_rt_min_interval =
780 net->ipv6.sysctl.ip6_rt_gc_min_interval;
781 int saved_rt_elasticity =
782 net->ipv6.sysctl.ip6_rt_gc_elasticity;
783
784 if (attempts-- > 0) {
785 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
786 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
787
788 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
789
790 net->ipv6.sysctl.ip6_rt_gc_elasticity =
791 saved_rt_elasticity;
792 net->ipv6.sysctl.ip6_rt_gc_min_interval =
793 saved_rt_min_interval;
794 goto retry;
795 }
796
797 if (net_ratelimit())
798 printk(KERN_WARNING
799 "ipv6: Neighbour table overflow.\n");
800 dst_free(&rt->dst);
801 return NULL;
802 }
803 }
804
805 return rt;
806 }
807
808 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
809 const struct in6_addr *daddr)
810 {
811 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
812
813 if (rt) {
814 rt->rt6i_flags |= RTF_CACHE;
815 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
816 }
817 return rt;
818 }
819
820 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
821 struct flowi6 *fl6, int flags)
822 {
823 struct fib6_node *fn;
824 struct rt6_info *rt, *nrt;
825 int strict = 0;
826 int attempts = 3;
827 int err;
828 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
829
830 strict |= flags & RT6_LOOKUP_F_IFACE;
831
832 relookup:
833 read_lock_bh(&table->tb6_lock);
834
835 restart_2:
836 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
837
838 restart:
839 rt = rt6_select(fn, oif, strict | reachable);
840
841 BACKTRACK(net, &fl6->saddr);
842 if (rt == net->ipv6.ip6_null_entry ||
843 rt->rt6i_flags & RTF_CACHE)
844 goto out;
845
846 dst_hold(&rt->dst);
847 read_unlock_bh(&table->tb6_lock);
848
849 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
850 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
851 else if (!(rt->dst.flags & DST_HOST))
852 nrt = rt6_alloc_clone(rt, &fl6->daddr);
853 else
854 goto out2;
855
856 dst_release(&rt->dst);
857 rt = nrt ? : net->ipv6.ip6_null_entry;
858
859 dst_hold(&rt->dst);
860 if (nrt) {
861 err = ip6_ins_rt(nrt);
862 if (!err)
863 goto out2;
864 }
865
866 if (--attempts <= 0)
867 goto out2;
868
869 /*
870 * Race condition! In the gap, when table->tb6_lock was
871 * released someone could insert this route. Relookup.
872 */
873 dst_release(&rt->dst);
874 goto relookup;
875
876 out:
877 if (reachable) {
878 reachable = 0;
879 goto restart_2;
880 }
881 dst_hold(&rt->dst);
882 read_unlock_bh(&table->tb6_lock);
883 out2:
884 rt->dst.lastuse = jiffies;
885 rt->dst.__use++;
886
887 return rt;
888 }
889
890 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
891 struct flowi6 *fl6, int flags)
892 {
893 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
894 }
895
896 static struct dst_entry *ip6_route_input_lookup(struct net *net,
897 struct net_device *dev,
898 struct flowi6 *fl6, int flags)
899 {
900 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
904 }
905
906 void ip6_route_input(struct sk_buff *skb)
907 {
908 const struct ipv6hdr *iph = ipv6_hdr(skb);
909 struct net *net = dev_net(skb->dev);
910 int flags = RT6_LOOKUP_F_HAS_SADDR;
911 struct flowi6 fl6 = {
912 .flowi6_iif = skb->dev->ifindex,
913 .daddr = iph->daddr,
914 .saddr = iph->saddr,
915 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
916 .flowi6_mark = skb->mark,
917 .flowi6_proto = iph->nexthdr,
918 };
919
920 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
921 }
922
923 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
924 struct flowi6 *fl6, int flags)
925 {
926 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
927 }
928
929 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
930 struct flowi6 *fl6)
931 {
932 int flags = 0;
933
934 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
935 flags |= RT6_LOOKUP_F_IFACE;
936
937 if (!ipv6_addr_any(&fl6->saddr))
938 flags |= RT6_LOOKUP_F_HAS_SADDR;
939 else if (sk)
940 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
941
942 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
943 }
944
945 EXPORT_SYMBOL(ip6_route_output);
946
947 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
948 {
949 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
950 struct dst_entry *new = NULL;
951
952 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
953 if (rt) {
954 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
955
956 new = &rt->dst;
957
958 new->__use = 1;
959 new->input = dst_discard;
960 new->output = dst_discard;
961
962 if (dst_metrics_read_only(&ort->dst))
963 new->_metrics = ort->dst._metrics;
964 else
965 dst_copy_metrics(new, &ort->dst);
966 rt->rt6i_idev = ort->rt6i_idev;
967 if (rt->rt6i_idev)
968 in6_dev_hold(rt->rt6i_idev);
969
970 rt->rt6i_gateway = ort->rt6i_gateway;
971 rt->rt6i_flags = ort->rt6i_flags;
972 rt6_clean_expires(rt);
973 rt->rt6i_metric = 0;
974
975 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
976 #ifdef CONFIG_IPV6_SUBTREES
977 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
978 #endif
979
980 dst_free(new);
981 }
982
983 dst_release(dst_orig);
984 return new ? new : ERR_PTR(-ENOMEM);
985 }
986
987 /*
988 * Destination cache support functions
989 */
990
991 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
992 {
993 struct rt6_info *rt;
994
995 rt = (struct rt6_info *) dst;
996
997 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
998 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
999 if (!rt->rt6i_peer)
1000 rt6_bind_peer(rt, 0);
1001 rt->rt6i_peer_genid = rt6_peer_genid();
1002 }
1003 return dst;
1004 }
1005 return NULL;
1006 }
1007
1008 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009 {
1010 struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012 if (rt) {
1013 if (rt->rt6i_flags & RTF_CACHE) {
1014 if (rt6_check_expired(rt)) {
1015 ip6_del_rt(rt);
1016 dst = NULL;
1017 }
1018 } else {
1019 dst_release(dst);
1020 dst = NULL;
1021 }
1022 }
1023 return dst;
1024 }
1025
1026 static void ip6_link_failure(struct sk_buff *skb)
1027 {
1028 struct rt6_info *rt;
1029
1030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1031
1032 rt = (struct rt6_info *) skb_dst(skb);
1033 if (rt) {
1034 if (rt->rt6i_flags & RTF_CACHE)
1035 rt6_update_expires(rt, 0);
1036 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1037 rt->rt6i_node->fn_sernum = -1;
1038 }
1039 }
1040
1041 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042 {
1043 struct rt6_info *rt6 = (struct rt6_info*)dst;
1044
1045 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046 rt6->rt6i_flags |= RTF_MODIFIED;
1047 if (mtu < IPV6_MIN_MTU) {
1048 u32 features = dst_metric(dst, RTAX_FEATURES);
1049 mtu = IPV6_MIN_MTU;
1050 features |= RTAX_FEATURE_ALLFRAG;
1051 dst_metric_set(dst, RTAX_FEATURES, features);
1052 }
1053 dst_metric_set(dst, RTAX_MTU, mtu);
1054 }
1055 }
1056
1057 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1058 {
1059 struct net_device *dev = dst->dev;
1060 unsigned int mtu = dst_mtu(dst);
1061 struct net *net = dev_net(dev);
1062
1063 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
1065 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1067
1068 /*
1069 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071 * IPV6_MAXPLEN is also valid and means: "any MSS,
1072 * rely only on pmtu discovery"
1073 */
1074 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075 mtu = IPV6_MAXPLEN;
1076 return mtu;
1077 }
1078
1079 static unsigned int ip6_mtu(const struct dst_entry *dst)
1080 {
1081 struct inet6_dev *idev;
1082 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084 if (mtu)
1085 return mtu;
1086
1087 mtu = IPV6_MIN_MTU;
1088
1089 rcu_read_lock();
1090 idev = __in6_dev_get(dst->dev);
1091 if (idev)
1092 mtu = idev->cnf.mtu6;
1093 rcu_read_unlock();
1094
1095 return mtu;
1096 }
1097
1098 static struct dst_entry *icmp6_dst_gc_list;
1099 static DEFINE_SPINLOCK(icmp6_dst_lock);
1100
1101 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1102 struct neighbour *neigh,
1103 struct flowi6 *fl6)
1104 {
1105 struct dst_entry *dst;
1106 struct rt6_info *rt;
1107 struct inet6_dev *idev = in6_dev_get(dev);
1108 struct net *net = dev_net(dev);
1109
1110 if (unlikely(!idev))
1111 return ERR_PTR(-ENODEV);
1112
1113 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1114 if (unlikely(!rt)) {
1115 in6_dev_put(idev);
1116 dst = ERR_PTR(-ENOMEM);
1117 goto out;
1118 }
1119
1120 if (neigh)
1121 neigh_hold(neigh);
1122 else {
1123 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1124 if (IS_ERR(neigh)) {
1125 in6_dev_put(idev);
1126 dst_free(&rt->dst);
1127 return ERR_CAST(neigh);
1128 }
1129 }
1130
1131 rt->dst.flags |= DST_HOST;
1132 rt->dst.output = ip6_output;
1133 dst_set_neighbour(&rt->dst, neigh);
1134 atomic_set(&rt->dst.__refcnt, 1);
1135 rt->rt6i_dst.addr = fl6->daddr;
1136 rt->rt6i_dst.plen = 128;
1137 rt->rt6i_idev = idev;
1138 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1139
1140 spin_lock_bh(&icmp6_dst_lock);
1141 rt->dst.next = icmp6_dst_gc_list;
1142 icmp6_dst_gc_list = &rt->dst;
1143 spin_unlock_bh(&icmp6_dst_lock);
1144
1145 fib6_force_start_gc(net);
1146
1147 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
1149 out:
1150 return dst;
1151 }
1152
1153 int icmp6_dst_gc(void)
1154 {
1155 struct dst_entry *dst, **pprev;
1156 int more = 0;
1157
1158 spin_lock_bh(&icmp6_dst_lock);
1159 pprev = &icmp6_dst_gc_list;
1160
1161 while ((dst = *pprev) != NULL) {
1162 if (!atomic_read(&dst->__refcnt)) {
1163 *pprev = dst->next;
1164 dst_free(dst);
1165 } else {
1166 pprev = &dst->next;
1167 ++more;
1168 }
1169 }
1170
1171 spin_unlock_bh(&icmp6_dst_lock);
1172
1173 return more;
1174 }
1175
1176 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177 void *arg)
1178 {
1179 struct dst_entry *dst, **pprev;
1180
1181 spin_lock_bh(&icmp6_dst_lock);
1182 pprev = &icmp6_dst_gc_list;
1183 while ((dst = *pprev) != NULL) {
1184 struct rt6_info *rt = (struct rt6_info *) dst;
1185 if (func(rt, arg)) {
1186 *pprev = dst->next;
1187 dst_free(dst);
1188 } else {
1189 pprev = &dst->next;
1190 }
1191 }
1192 spin_unlock_bh(&icmp6_dst_lock);
1193 }
1194
1195 static int ip6_dst_gc(struct dst_ops *ops)
1196 {
1197 unsigned long now = jiffies;
1198 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1199 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1204 int entries;
1205
1206 entries = dst_entries_get_fast(ops);
1207 if (time_after(rt_last_gc + rt_min_interval, now) &&
1208 entries <= rt_max_size)
1209 goto out;
1210
1211 net->ipv6.ip6_rt_gc_expire++;
1212 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213 net->ipv6.ip6_rt_last_gc = now;
1214 entries = dst_entries_get_slow(ops);
1215 if (entries < ops->gc_thresh)
1216 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1217 out:
1218 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1219 return entries > rt_max_size;
1220 }
1221
1222 /* Clean host part of a prefix. Not necessary in radix tree,
1223 but results in cleaner routing tables.
1224
1225 Remove it only when all the things will work!
1226 */
1227
1228 int ip6_dst_hoplimit(struct dst_entry *dst)
1229 {
1230 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1231 if (hoplimit == 0) {
1232 struct net_device *dev = dst->dev;
1233 struct inet6_dev *idev;
1234
1235 rcu_read_lock();
1236 idev = __in6_dev_get(dev);
1237 if (idev)
1238 hoplimit = idev->cnf.hop_limit;
1239 else
1240 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1241 rcu_read_unlock();
1242 }
1243 return hoplimit;
1244 }
1245 EXPORT_SYMBOL(ip6_dst_hoplimit);
1246
1247 /*
1248 *
1249 */
1250
1251 int ip6_route_add(struct fib6_config *cfg)
1252 {
1253 int err;
1254 struct net *net = cfg->fc_nlinfo.nl_net;
1255 struct rt6_info *rt = NULL;
1256 struct net_device *dev = NULL;
1257 struct inet6_dev *idev = NULL;
1258 struct fib6_table *table;
1259 int addr_type;
1260
1261 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1262 return -EINVAL;
1263 #ifndef CONFIG_IPV6_SUBTREES
1264 if (cfg->fc_src_len)
1265 return -EINVAL;
1266 #endif
1267 if (cfg->fc_ifindex) {
1268 err = -ENODEV;
1269 dev = dev_get_by_index(net, cfg->fc_ifindex);
1270 if (!dev)
1271 goto out;
1272 idev = in6_dev_get(dev);
1273 if (!idev)
1274 goto out;
1275 }
1276
1277 if (cfg->fc_metric == 0)
1278 cfg->fc_metric = IP6_RT_PRIO_USER;
1279
1280 err = -ENOBUFS;
1281 if (cfg->fc_nlinfo.nlh &&
1282 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1283 table = fib6_get_table(net, cfg->fc_table);
1284 if (!table) {
1285 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1286 table = fib6_new_table(net, cfg->fc_table);
1287 }
1288 } else {
1289 table = fib6_new_table(net, cfg->fc_table);
1290 }
1291
1292 if (!table)
1293 goto out;
1294
1295 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1296
1297 if (!rt) {
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301
1302 rt->dst.obsolete = -1;
1303
1304 if (cfg->fc_flags & RTF_EXPIRES)
1305 rt6_set_expires(rt, jiffies +
1306 clock_t_to_jiffies(cfg->fc_expires));
1307 else
1308 rt6_clean_expires(rt);
1309
1310 if (cfg->fc_protocol == RTPROT_UNSPEC)
1311 cfg->fc_protocol = RTPROT_BOOT;
1312 rt->rt6i_protocol = cfg->fc_protocol;
1313
1314 addr_type = ipv6_addr_type(&cfg->fc_dst);
1315
1316 if (addr_type & IPV6_ADDR_MULTICAST)
1317 rt->dst.input = ip6_mc_input;
1318 else if (cfg->fc_flags & RTF_LOCAL)
1319 rt->dst.input = ip6_input;
1320 else
1321 rt->dst.input = ip6_forward;
1322
1323 rt->dst.output = ip6_output;
1324
1325 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326 rt->rt6i_dst.plen = cfg->fc_dst_len;
1327 if (rt->rt6i_dst.plen == 128)
1328 rt->dst.flags |= DST_HOST;
1329
1330 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332 if (!metrics) {
1333 err = -ENOMEM;
1334 goto out;
1335 }
1336 dst_init_metrics(&rt->dst, metrics, 0);
1337 }
1338 #ifdef CONFIG_IPV6_SUBTREES
1339 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340 rt->rt6i_src.plen = cfg->fc_src_len;
1341 #endif
1342
1343 rt->rt6i_metric = cfg->fc_metric;
1344
1345 /* We cannot add true routes via loopback here,
1346 they would result in kernel looping; promote them to reject routes
1347 */
1348 if ((cfg->fc_flags & RTF_REJECT) ||
1349 (dev && (dev->flags & IFF_LOOPBACK) &&
1350 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351 !(cfg->fc_flags & RTF_LOCAL))) {
1352 /* hold loopback dev/idev if we haven't done so. */
1353 if (dev != net->loopback_dev) {
1354 if (dev) {
1355 dev_put(dev);
1356 in6_dev_put(idev);
1357 }
1358 dev = net->loopback_dev;
1359 dev_hold(dev);
1360 idev = in6_dev_get(dev);
1361 if (!idev) {
1362 err = -ENODEV;
1363 goto out;
1364 }
1365 }
1366 rt->dst.output = ip6_pkt_discard_out;
1367 rt->dst.input = ip6_pkt_discard;
1368 rt->dst.error = -ENETUNREACH;
1369 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1370 goto install_route;
1371 }
1372
1373 if (cfg->fc_flags & RTF_GATEWAY) {
1374 const struct in6_addr *gw_addr;
1375 int gwa_type;
1376
1377 gw_addr = &cfg->fc_gateway;
1378 rt->rt6i_gateway = *gw_addr;
1379 gwa_type = ipv6_addr_type(gw_addr);
1380
1381 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382 struct rt6_info *grt;
1383
1384 /* IPv6 strictly inhibits using not link-local
1385 addresses as nexthop address.
1386 Otherwise, router will not able to send redirects.
1387 It is very good, but in some (rare!) circumstances
1388 (SIT, PtP, NBMA NOARP links) it is handy to allow
1389 some exceptions. --ANK
1390 */
1391 err = -EINVAL;
1392 if (!(gwa_type & IPV6_ADDR_UNICAST))
1393 goto out;
1394
1395 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1396
1397 err = -EHOSTUNREACH;
1398 if (!grt)
1399 goto out;
1400 if (dev) {
1401 if (dev != grt->dst.dev) {
1402 dst_release(&grt->dst);
1403 goto out;
1404 }
1405 } else {
1406 dev = grt->dst.dev;
1407 idev = grt->rt6i_idev;
1408 dev_hold(dev);
1409 in6_dev_hold(grt->rt6i_idev);
1410 }
1411 if (!(grt->rt6i_flags & RTF_GATEWAY))
1412 err = 0;
1413 dst_release(&grt->dst);
1414
1415 if (err)
1416 goto out;
1417 }
1418 err = -EINVAL;
1419 if (!dev || (dev->flags & IFF_LOOPBACK))
1420 goto out;
1421 }
1422
1423 err = -ENODEV;
1424 if (!dev)
1425 goto out;
1426
1427 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429 err = -EINVAL;
1430 goto out;
1431 }
1432 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1433 rt->rt6i_prefsrc.plen = 128;
1434 } else
1435 rt->rt6i_prefsrc.plen = 0;
1436
1437 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1438 err = rt6_bind_neighbour(rt, dev);
1439 if (err)
1440 goto out;
1441 }
1442
1443 rt->rt6i_flags = cfg->fc_flags;
1444
1445 install_route:
1446 if (cfg->fc_mx) {
1447 struct nlattr *nla;
1448 int remaining;
1449
1450 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1451 int type = nla_type(nla);
1452
1453 if (type) {
1454 if (type > RTAX_MAX) {
1455 err = -EINVAL;
1456 goto out;
1457 }
1458
1459 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1460 }
1461 }
1462 }
1463
1464 rt->dst.dev = dev;
1465 rt->rt6i_idev = idev;
1466 rt->rt6i_table = table;
1467
1468 cfg->fc_nlinfo.nl_net = dev_net(dev);
1469
1470 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1471
1472 out:
1473 if (dev)
1474 dev_put(dev);
1475 if (idev)
1476 in6_dev_put(idev);
1477 if (rt)
1478 dst_free(&rt->dst);
1479 return err;
1480 }
1481
1482 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1483 {
1484 int err;
1485 struct fib6_table *table;
1486 struct net *net = dev_net(rt->dst.dev);
1487
1488 if (rt == net->ipv6.ip6_null_entry)
1489 return -ENOENT;
1490
1491 table = rt->rt6i_table;
1492 write_lock_bh(&table->tb6_lock);
1493
1494 err = fib6_del(rt, info);
1495 dst_release(&rt->dst);
1496
1497 write_unlock_bh(&table->tb6_lock);
1498
1499 return err;
1500 }
1501
1502 int ip6_del_rt(struct rt6_info *rt)
1503 {
1504 struct nl_info info = {
1505 .nl_net = dev_net(rt->dst.dev),
1506 };
1507 return __ip6_del_rt(rt, &info);
1508 }
1509
1510 static int ip6_route_del(struct fib6_config *cfg)
1511 {
1512 struct fib6_table *table;
1513 struct fib6_node *fn;
1514 struct rt6_info *rt;
1515 int err = -ESRCH;
1516
1517 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1518 if (!table)
1519 return err;
1520
1521 read_lock_bh(&table->tb6_lock);
1522
1523 fn = fib6_locate(&table->tb6_root,
1524 &cfg->fc_dst, cfg->fc_dst_len,
1525 &cfg->fc_src, cfg->fc_src_len);
1526
1527 if (fn) {
1528 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1529 if (cfg->fc_ifindex &&
1530 (!rt->dst.dev ||
1531 rt->dst.dev->ifindex != cfg->fc_ifindex))
1532 continue;
1533 if (cfg->fc_flags & RTF_GATEWAY &&
1534 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1535 continue;
1536 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1537 continue;
1538 dst_hold(&rt->dst);
1539 read_unlock_bh(&table->tb6_lock);
1540
1541 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1542 }
1543 }
1544 read_unlock_bh(&table->tb6_lock);
1545
1546 return err;
1547 }
1548
1549 /*
1550 * Handle redirects
1551 */
1552 struct ip6rd_flowi {
1553 struct flowi6 fl6;
1554 struct in6_addr gateway;
1555 };
1556
1557 static struct rt6_info *__ip6_route_redirect(struct net *net,
1558 struct fib6_table *table,
1559 struct flowi6 *fl6,
1560 int flags)
1561 {
1562 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1563 struct rt6_info *rt;
1564 struct fib6_node *fn;
1565
1566 /*
1567 * Get the "current" route for this destination and
1568 * check if the redirect has come from approriate router.
1569 *
1570 * RFC 2461 specifies that redirects should only be
1571 * accepted if they come from the nexthop to the target.
1572 * Due to the way the routes are chosen, this notion
1573 * is a bit fuzzy and one might need to check all possible
1574 * routes.
1575 */
1576
1577 read_lock_bh(&table->tb6_lock);
1578 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1579 restart:
1580 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1581 /*
1582 * Current route is on-link; redirect is always invalid.
1583 *
1584 * Seems, previous statement is not true. It could
1585 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586 * But then router serving it might decide, that we should
1587 * know truth 8)8) --ANK (980726).
1588 */
1589 if (rt6_check_expired(rt))
1590 continue;
1591 if (!(rt->rt6i_flags & RTF_GATEWAY))
1592 continue;
1593 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1594 continue;
1595 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1596 continue;
1597 break;
1598 }
1599
1600 if (!rt)
1601 rt = net->ipv6.ip6_null_entry;
1602 BACKTRACK(net, &fl6->saddr);
1603 out:
1604 dst_hold(&rt->dst);
1605
1606 read_unlock_bh(&table->tb6_lock);
1607
1608 return rt;
1609 };
1610
1611 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612 const struct in6_addr *src,
1613 const struct in6_addr *gateway,
1614 struct net_device *dev)
1615 {
1616 int flags = RT6_LOOKUP_F_HAS_SADDR;
1617 struct net *net = dev_net(dev);
1618 struct ip6rd_flowi rdfl = {
1619 .fl6 = {
1620 .flowi6_oif = dev->ifindex,
1621 .daddr = *dest,
1622 .saddr = *src,
1623 },
1624 };
1625
1626 rdfl.gateway = *gateway;
1627
1628 if (rt6_need_strict(dest))
1629 flags |= RT6_LOOKUP_F_IFACE;
1630
1631 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1632 flags, __ip6_route_redirect);
1633 }
1634
1635 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636 const struct in6_addr *saddr,
1637 struct neighbour *neigh, u8 *lladdr, int on_link)
1638 {
1639 struct rt6_info *rt, *nrt = NULL;
1640 struct netevent_redirect netevent;
1641 struct net *net = dev_net(neigh->dev);
1642
1643 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1644
1645 if (rt == net->ipv6.ip6_null_entry) {
1646 if (net_ratelimit())
1647 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1648 "for redirect target\n");
1649 goto out;
1650 }
1651
1652 /*
1653 * We have finally decided to accept it.
1654 */
1655
1656 neigh_update(neigh, lladdr, NUD_STALE,
1657 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1658 NEIGH_UPDATE_F_OVERRIDE|
1659 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1660 NEIGH_UPDATE_F_ISROUTER))
1661 );
1662
1663 /*
1664 * Redirect received -> path was valid.
1665 * Look, redirects are sent only in response to data packets,
1666 * so that this nexthop apparently is reachable. --ANK
1667 */
1668 dst_confirm(&rt->dst);
1669
1670 /* Duplicate redirect: silently ignore. */
1671 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1672 goto out;
1673
1674 nrt = ip6_rt_copy(rt, dest);
1675 if (!nrt)
1676 goto out;
1677
1678 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1679 if (on_link)
1680 nrt->rt6i_flags &= ~RTF_GATEWAY;
1681
1682 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1683 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1684
1685 if (ip6_ins_rt(nrt))
1686 goto out;
1687
1688 netevent.old = &rt->dst;
1689 netevent.new = &nrt->dst;
1690 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1691
1692 if (rt->rt6i_flags & RTF_CACHE) {
1693 ip6_del_rt(rt);
1694 return;
1695 }
1696
1697 out:
1698 dst_release(&rt->dst);
1699 }
1700
1701 /*
1702 * Handle ICMP "packet too big" messages
1703 * i.e. Path MTU discovery
1704 */
1705
1706 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1707 struct net *net, u32 pmtu, int ifindex)
1708 {
1709 struct rt6_info *rt, *nrt;
1710 int allfrag = 0;
1711 again:
1712 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1713 if (!rt)
1714 return;
1715
1716 if (rt6_check_expired(rt)) {
1717 ip6_del_rt(rt);
1718 goto again;
1719 }
1720
1721 if (pmtu >= dst_mtu(&rt->dst))
1722 goto out;
1723
1724 if (pmtu < IPV6_MIN_MTU) {
1725 /*
1726 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1727 * MTU (1280) and a fragment header should always be included
1728 * after a node receiving Too Big message reporting PMTU is
1729 * less than the IPv6 Minimum Link MTU.
1730 */
1731 pmtu = IPV6_MIN_MTU;
1732 allfrag = 1;
1733 }
1734
1735 /* New mtu received -> path was valid.
1736 They are sent only in response to data packets,
1737 so that this nexthop apparently is reachable. --ANK
1738 */
1739 dst_confirm(&rt->dst);
1740
1741 /* Host route. If it is static, it would be better
1742 not to override it, but add new one, so that
1743 when cache entry will expire old pmtu
1744 would return automatically.
1745 */
1746 if (rt->rt6i_flags & RTF_CACHE) {
1747 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1748 if (allfrag) {
1749 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1750 features |= RTAX_FEATURE_ALLFRAG;
1751 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1752 }
1753 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1754 rt->rt6i_flags |= RTF_MODIFIED;
1755 goto out;
1756 }
1757
1758 /* Network route.
1759 Two cases are possible:
1760 1. It is connected route. Action: COW
1761 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1762 */
1763 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1764 nrt = rt6_alloc_cow(rt, daddr, saddr);
1765 else
1766 nrt = rt6_alloc_clone(rt, daddr);
1767
1768 if (nrt) {
1769 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1770 if (allfrag) {
1771 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1772 features |= RTAX_FEATURE_ALLFRAG;
1773 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1774 }
1775
1776 /* According to RFC 1981, detecting PMTU increase shouldn't be
1777 * happened within 5 mins, the recommended timer is 10 mins.
1778 * Here this route expiration time is set to ip6_rt_mtu_expires
1779 * which is 10 mins. After 10 mins the decreased pmtu is expired
1780 * and detecting PMTU increase will be automatically happened.
1781 */
1782 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1783 nrt->rt6i_flags |= RTF_DYNAMIC;
1784 ip6_ins_rt(nrt);
1785 }
1786 out:
1787 dst_release(&rt->dst);
1788 }
1789
1790 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1791 struct net_device *dev, u32 pmtu)
1792 {
1793 struct net *net = dev_net(dev);
1794
1795 /*
1796 * RFC 1981 states that a node "MUST reduce the size of the packets it
1797 * is sending along the path" that caused the Packet Too Big message.
1798 * Since it's not possible in the general case to determine which
1799 * interface was used to send the original packet, we update the MTU
1800 * on the interface that will be used to send future packets. We also
1801 * update the MTU on the interface that received the Packet Too Big in
1802 * case the original packet was forced out that interface with
1803 * SO_BINDTODEVICE or similar. This is the next best thing to the
1804 * correct behaviour, which would be to update the MTU on all
1805 * interfaces.
1806 */
1807 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1808 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1809 }
1810
1811 /*
1812 * Misc support functions
1813 */
1814
1815 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1816 const struct in6_addr *dest)
1817 {
1818 struct net *net = dev_net(ort->dst.dev);
1819 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1820 ort->dst.dev, 0);
1821
1822 if (rt) {
1823 rt->dst.input = ort->dst.input;
1824 rt->dst.output = ort->dst.output;
1825 rt->dst.flags |= DST_HOST;
1826
1827 rt->rt6i_dst.addr = *dest;
1828 rt->rt6i_dst.plen = 128;
1829 dst_copy_metrics(&rt->dst, &ort->dst);
1830 rt->dst.error = ort->dst.error;
1831 rt->rt6i_idev = ort->rt6i_idev;
1832 if (rt->rt6i_idev)
1833 in6_dev_hold(rt->rt6i_idev);
1834 rt->dst.lastuse = jiffies;
1835
1836 rt->rt6i_gateway = ort->rt6i_gateway;
1837 rt->rt6i_flags = ort->rt6i_flags;
1838 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1839 (RTF_DEFAULT | RTF_ADDRCONF))
1840 rt6_set_from(rt, ort);
1841 else
1842 rt6_clean_expires(rt);
1843 rt->rt6i_metric = 0;
1844
1845 #ifdef CONFIG_IPV6_SUBTREES
1846 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1847 #endif
1848 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1849 rt->rt6i_table = ort->rt6i_table;
1850 }
1851 return rt;
1852 }
1853
1854 #ifdef CONFIG_IPV6_ROUTE_INFO
1855 static struct rt6_info *rt6_get_route_info(struct net *net,
1856 const struct in6_addr *prefix, int prefixlen,
1857 const struct in6_addr *gwaddr, int ifindex)
1858 {
1859 struct fib6_node *fn;
1860 struct rt6_info *rt = NULL;
1861 struct fib6_table *table;
1862
1863 table = fib6_get_table(net, RT6_TABLE_INFO);
1864 if (!table)
1865 return NULL;
1866
1867 write_lock_bh(&table->tb6_lock);
1868 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1869 if (!fn)
1870 goto out;
1871
1872 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1873 if (rt->dst.dev->ifindex != ifindex)
1874 continue;
1875 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1876 continue;
1877 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1878 continue;
1879 dst_hold(&rt->dst);
1880 break;
1881 }
1882 out:
1883 write_unlock_bh(&table->tb6_lock);
1884 return rt;
1885 }
1886
1887 static struct rt6_info *rt6_add_route_info(struct net *net,
1888 const struct in6_addr *prefix, int prefixlen,
1889 const struct in6_addr *gwaddr, int ifindex,
1890 unsigned pref)
1891 {
1892 struct fib6_config cfg = {
1893 .fc_table = RT6_TABLE_INFO,
1894 .fc_metric = IP6_RT_PRIO_USER,
1895 .fc_ifindex = ifindex,
1896 .fc_dst_len = prefixlen,
1897 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1898 RTF_UP | RTF_PREF(pref),
1899 .fc_nlinfo.pid = 0,
1900 .fc_nlinfo.nlh = NULL,
1901 .fc_nlinfo.nl_net = net,
1902 };
1903
1904 cfg.fc_dst = *prefix;
1905 cfg.fc_gateway = *gwaddr;
1906
1907 /* We should treat it as a default route if prefix length is 0. */
1908 if (!prefixlen)
1909 cfg.fc_flags |= RTF_DEFAULT;
1910
1911 ip6_route_add(&cfg);
1912
1913 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1914 }
1915 #endif
1916
1917 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1918 {
1919 struct rt6_info *rt;
1920 struct fib6_table *table;
1921
1922 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1923 if (!table)
1924 return NULL;
1925
1926 write_lock_bh(&table->tb6_lock);
1927 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1928 if (dev == rt->dst.dev &&
1929 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1930 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1931 break;
1932 }
1933 if (rt)
1934 dst_hold(&rt->dst);
1935 write_unlock_bh(&table->tb6_lock);
1936 return rt;
1937 }
1938
1939 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1940 struct net_device *dev,
1941 unsigned int pref)
1942 {
1943 struct fib6_config cfg = {
1944 .fc_table = RT6_TABLE_DFLT,
1945 .fc_metric = IP6_RT_PRIO_USER,
1946 .fc_ifindex = dev->ifindex,
1947 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1948 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1949 .fc_nlinfo.pid = 0,
1950 .fc_nlinfo.nlh = NULL,
1951 .fc_nlinfo.nl_net = dev_net(dev),
1952 };
1953
1954 cfg.fc_gateway = *gwaddr;
1955
1956 ip6_route_add(&cfg);
1957
1958 return rt6_get_dflt_router(gwaddr, dev);
1959 }
1960
1961 void rt6_purge_dflt_routers(struct net *net)
1962 {
1963 struct rt6_info *rt;
1964 struct fib6_table *table;
1965
1966 /* NOTE: Keep consistent with rt6_get_dflt_router */
1967 table = fib6_get_table(net, RT6_TABLE_DFLT);
1968 if (!table)
1969 return;
1970
1971 restart:
1972 read_lock_bh(&table->tb6_lock);
1973 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1974 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1975 dst_hold(&rt->dst);
1976 read_unlock_bh(&table->tb6_lock);
1977 ip6_del_rt(rt);
1978 goto restart;
1979 }
1980 }
1981 read_unlock_bh(&table->tb6_lock);
1982 }
1983
1984 static void rtmsg_to_fib6_config(struct net *net,
1985 struct in6_rtmsg *rtmsg,
1986 struct fib6_config *cfg)
1987 {
1988 memset(cfg, 0, sizeof(*cfg));
1989
1990 cfg->fc_table = RT6_TABLE_MAIN;
1991 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1992 cfg->fc_metric = rtmsg->rtmsg_metric;
1993 cfg->fc_expires = rtmsg->rtmsg_info;
1994 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1995 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1996 cfg->fc_flags = rtmsg->rtmsg_flags;
1997
1998 cfg->fc_nlinfo.nl_net = net;
1999
2000 cfg->fc_dst = rtmsg->rtmsg_dst;
2001 cfg->fc_src = rtmsg->rtmsg_src;
2002 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2003 }
2004
2005 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2006 {
2007 struct fib6_config cfg;
2008 struct in6_rtmsg rtmsg;
2009 int err;
2010
2011 switch(cmd) {
2012 case SIOCADDRT: /* Add a route */
2013 case SIOCDELRT: /* Delete a route */
2014 if (!capable(CAP_NET_ADMIN))
2015 return -EPERM;
2016 err = copy_from_user(&rtmsg, arg,
2017 sizeof(struct in6_rtmsg));
2018 if (err)
2019 return -EFAULT;
2020
2021 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2022
2023 rtnl_lock();
2024 switch (cmd) {
2025 case SIOCADDRT:
2026 err = ip6_route_add(&cfg);
2027 break;
2028 case SIOCDELRT:
2029 err = ip6_route_del(&cfg);
2030 break;
2031 default:
2032 err = -EINVAL;
2033 }
2034 rtnl_unlock();
2035
2036 return err;
2037 }
2038
2039 return -EINVAL;
2040 }
2041
2042 /*
2043 * Drop the packet on the floor
2044 */
2045
2046 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2047 {
2048 int type;
2049 struct dst_entry *dst = skb_dst(skb);
2050 switch (ipstats_mib_noroutes) {
2051 case IPSTATS_MIB_INNOROUTES:
2052 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2053 if (type == IPV6_ADDR_ANY) {
2054 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2055 IPSTATS_MIB_INADDRERRORS);
2056 break;
2057 }
2058 /* FALLTHROUGH */
2059 case IPSTATS_MIB_OUTNOROUTES:
2060 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2061 ipstats_mib_noroutes);
2062 break;
2063 }
2064 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2065 kfree_skb(skb);
2066 return 0;
2067 }
2068
2069 static int ip6_pkt_discard(struct sk_buff *skb)
2070 {
2071 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2072 }
2073
2074 static int ip6_pkt_discard_out(struct sk_buff *skb)
2075 {
2076 skb->dev = skb_dst(skb)->dev;
2077 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2078 }
2079
2080 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2081
2082 static int ip6_pkt_prohibit(struct sk_buff *skb)
2083 {
2084 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2085 }
2086
2087 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2088 {
2089 skb->dev = skb_dst(skb)->dev;
2090 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2091 }
2092
2093 #endif
2094
2095 /*
2096 * Allocate a dst for local (unicast / anycast) address.
2097 */
2098
2099 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2100 const struct in6_addr *addr,
2101 bool anycast)
2102 {
2103 struct net *net = dev_net(idev->dev);
2104 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2105 net->loopback_dev, 0);
2106 int err;
2107
2108 if (!rt) {
2109 if (net_ratelimit())
2110 pr_warning("IPv6: Maximum number of routes reached,"
2111 " consider increasing route/max_size.\n");
2112 return ERR_PTR(-ENOMEM);
2113 }
2114
2115 in6_dev_hold(idev);
2116
2117 rt->dst.flags |= DST_HOST;
2118 rt->dst.input = ip6_input;
2119 rt->dst.output = ip6_output;
2120 rt->rt6i_idev = idev;
2121 rt->dst.obsolete = -1;
2122
2123 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2124 if (anycast)
2125 rt->rt6i_flags |= RTF_ANYCAST;
2126 else
2127 rt->rt6i_flags |= RTF_LOCAL;
2128 err = rt6_bind_neighbour(rt, rt->dst.dev);
2129 if (err) {
2130 dst_free(&rt->dst);
2131 return ERR_PTR(err);
2132 }
2133
2134 rt->rt6i_dst.addr = *addr;
2135 rt->rt6i_dst.plen = 128;
2136 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2137
2138 atomic_set(&rt->dst.__refcnt, 1);
2139
2140 return rt;
2141 }
2142
2143 int ip6_route_get_saddr(struct net *net,
2144 struct rt6_info *rt,
2145 const struct in6_addr *daddr,
2146 unsigned int prefs,
2147 struct in6_addr *saddr)
2148 {
2149 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2150 int err = 0;
2151 if (rt->rt6i_prefsrc.plen)
2152 *saddr = rt->rt6i_prefsrc.addr;
2153 else
2154 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2155 daddr, prefs, saddr);
2156 return err;
2157 }
2158
2159 /* remove deleted ip from prefsrc entries */
2160 struct arg_dev_net_ip {
2161 struct net_device *dev;
2162 struct net *net;
2163 struct in6_addr *addr;
2164 };
2165
2166 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2167 {
2168 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2169 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2170 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2171
2172 if (((void *)rt->dst.dev == dev || !dev) &&
2173 rt != net->ipv6.ip6_null_entry &&
2174 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2175 /* remove prefsrc entry */
2176 rt->rt6i_prefsrc.plen = 0;
2177 }
2178 return 0;
2179 }
2180
2181 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2182 {
2183 struct net *net = dev_net(ifp->idev->dev);
2184 struct arg_dev_net_ip adni = {
2185 .dev = ifp->idev->dev,
2186 .net = net,
2187 .addr = &ifp->addr,
2188 };
2189 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2190 }
2191
2192 struct arg_dev_net {
2193 struct net_device *dev;
2194 struct net *net;
2195 };
2196
2197 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2198 {
2199 const struct arg_dev_net *adn = arg;
2200 const struct net_device *dev = adn->dev;
2201
2202 if ((rt->dst.dev == dev || !dev) &&
2203 rt != adn->net->ipv6.ip6_null_entry)
2204 return -1;
2205
2206 return 0;
2207 }
2208
2209 void rt6_ifdown(struct net *net, struct net_device *dev)
2210 {
2211 struct arg_dev_net adn = {
2212 .dev = dev,
2213 .net = net,
2214 };
2215
2216 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2217 icmp6_clean_all(fib6_ifdown, &adn);
2218 }
2219
2220 struct rt6_mtu_change_arg
2221 {
2222 struct net_device *dev;
2223 unsigned mtu;
2224 };
2225
2226 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2227 {
2228 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2229 struct inet6_dev *idev;
2230
2231 /* In IPv6 pmtu discovery is not optional,
2232 so that RTAX_MTU lock cannot disable it.
2233 We still use this lock to block changes
2234 caused by addrconf/ndisc.
2235 */
2236
2237 idev = __in6_dev_get(arg->dev);
2238 if (!idev)
2239 return 0;
2240
2241 /* For administrative MTU increase, there is no way to discover
2242 IPv6 PMTU increase, so PMTU increase should be updated here.
2243 Since RFC 1981 doesn't include administrative MTU increase
2244 update PMTU increase is a MUST. (i.e. jumbo frame)
2245 */
2246 /*
2247 If new MTU is less than route PMTU, this new MTU will be the
2248 lowest MTU in the path, update the route PMTU to reflect PMTU
2249 decreases; if new MTU is greater than route PMTU, and the
2250 old MTU is the lowest MTU in the path, update the route PMTU
2251 to reflect the increase. In this case if the other nodes' MTU
2252 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2253 PMTU discouvery.
2254 */
2255 if (rt->dst.dev == arg->dev &&
2256 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2257 (dst_mtu(&rt->dst) >= arg->mtu ||
2258 (dst_mtu(&rt->dst) < arg->mtu &&
2259 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2260 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2261 }
2262 return 0;
2263 }
2264
2265 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2266 {
2267 struct rt6_mtu_change_arg arg = {
2268 .dev = dev,
2269 .mtu = mtu,
2270 };
2271
2272 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2273 }
2274
2275 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2276 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2277 [RTA_OIF] = { .type = NLA_U32 },
2278 [RTA_IIF] = { .type = NLA_U32 },
2279 [RTA_PRIORITY] = { .type = NLA_U32 },
2280 [RTA_METRICS] = { .type = NLA_NESTED },
2281 };
2282
2283 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2284 struct fib6_config *cfg)
2285 {
2286 struct rtmsg *rtm;
2287 struct nlattr *tb[RTA_MAX+1];
2288 int err;
2289
2290 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2291 if (err < 0)
2292 goto errout;
2293
2294 err = -EINVAL;
2295 rtm = nlmsg_data(nlh);
2296 memset(cfg, 0, sizeof(*cfg));
2297
2298 cfg->fc_table = rtm->rtm_table;
2299 cfg->fc_dst_len = rtm->rtm_dst_len;
2300 cfg->fc_src_len = rtm->rtm_src_len;
2301 cfg->fc_flags = RTF_UP;
2302 cfg->fc_protocol = rtm->rtm_protocol;
2303
2304 if (rtm->rtm_type == RTN_UNREACHABLE)
2305 cfg->fc_flags |= RTF_REJECT;
2306
2307 if (rtm->rtm_type == RTN_LOCAL)
2308 cfg->fc_flags |= RTF_LOCAL;
2309
2310 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2311 cfg->fc_nlinfo.nlh = nlh;
2312 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2313
2314 if (tb[RTA_GATEWAY]) {
2315 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2316 cfg->fc_flags |= RTF_GATEWAY;
2317 }
2318
2319 if (tb[RTA_DST]) {
2320 int plen = (rtm->rtm_dst_len + 7) >> 3;
2321
2322 if (nla_len(tb[RTA_DST]) < plen)
2323 goto errout;
2324
2325 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2326 }
2327
2328 if (tb[RTA_SRC]) {
2329 int plen = (rtm->rtm_src_len + 7) >> 3;
2330
2331 if (nla_len(tb[RTA_SRC]) < plen)
2332 goto errout;
2333
2334 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2335 }
2336
2337 if (tb[RTA_PREFSRC])
2338 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2339
2340 if (tb[RTA_OIF])
2341 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2342
2343 if (tb[RTA_PRIORITY])
2344 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2345
2346 if (tb[RTA_METRICS]) {
2347 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2348 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2349 }
2350
2351 if (tb[RTA_TABLE])
2352 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2353
2354 err = 0;
2355 errout:
2356 return err;
2357 }
2358
2359 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2360 {
2361 struct fib6_config cfg;
2362 int err;
2363
2364 err = rtm_to_fib6_config(skb, nlh, &cfg);
2365 if (err < 0)
2366 return err;
2367
2368 return ip6_route_del(&cfg);
2369 }
2370
2371 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2372 {
2373 struct fib6_config cfg;
2374 int err;
2375
2376 err = rtm_to_fib6_config(skb, nlh, &cfg);
2377 if (err < 0)
2378 return err;
2379
2380 return ip6_route_add(&cfg);
2381 }
2382
2383 static inline size_t rt6_nlmsg_size(void)
2384 {
2385 return NLMSG_ALIGN(sizeof(struct rtmsg))
2386 + nla_total_size(16) /* RTA_SRC */
2387 + nla_total_size(16) /* RTA_DST */
2388 + nla_total_size(16) /* RTA_GATEWAY */
2389 + nla_total_size(16) /* RTA_PREFSRC */
2390 + nla_total_size(4) /* RTA_TABLE */
2391 + nla_total_size(4) /* RTA_IIF */
2392 + nla_total_size(4) /* RTA_OIF */
2393 + nla_total_size(4) /* RTA_PRIORITY */
2394 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2395 + nla_total_size(sizeof(struct rta_cacheinfo));
2396 }
2397
2398 static int rt6_fill_node(struct net *net,
2399 struct sk_buff *skb, struct rt6_info *rt,
2400 struct in6_addr *dst, struct in6_addr *src,
2401 int iif, int type, u32 pid, u32 seq,
2402 int prefix, int nowait, unsigned int flags)
2403 {
2404 const struct inet_peer *peer;
2405 struct rtmsg *rtm;
2406 struct nlmsghdr *nlh;
2407 long expires;
2408 u32 table;
2409 struct neighbour *n;
2410 u32 ts, tsage;
2411
2412 if (prefix) { /* user wants prefix routes only */
2413 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2414 /* success since this is not a prefix route */
2415 return 1;
2416 }
2417 }
2418
2419 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2420 if (!nlh)
2421 return -EMSGSIZE;
2422
2423 rtm = nlmsg_data(nlh);
2424 rtm->rtm_family = AF_INET6;
2425 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2426 rtm->rtm_src_len = rt->rt6i_src.plen;
2427 rtm->rtm_tos = 0;
2428 if (rt->rt6i_table)
2429 table = rt->rt6i_table->tb6_id;
2430 else
2431 table = RT6_TABLE_UNSPEC;
2432 rtm->rtm_table = table;
2433 NLA_PUT_U32(skb, RTA_TABLE, table);
2434 if (rt->rt6i_flags & RTF_REJECT)
2435 rtm->rtm_type = RTN_UNREACHABLE;
2436 else if (rt->rt6i_flags & RTF_LOCAL)
2437 rtm->rtm_type = RTN_LOCAL;
2438 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2439 rtm->rtm_type = RTN_LOCAL;
2440 else
2441 rtm->rtm_type = RTN_UNICAST;
2442 rtm->rtm_flags = 0;
2443 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2444 rtm->rtm_protocol = rt->rt6i_protocol;
2445 if (rt->rt6i_flags & RTF_DYNAMIC)
2446 rtm->rtm_protocol = RTPROT_REDIRECT;
2447 else if (rt->rt6i_flags & RTF_ADDRCONF)
2448 rtm->rtm_protocol = RTPROT_KERNEL;
2449 else if (rt->rt6i_flags & RTF_DEFAULT)
2450 rtm->rtm_protocol = RTPROT_RA;
2451
2452 if (rt->rt6i_flags & RTF_CACHE)
2453 rtm->rtm_flags |= RTM_F_CLONED;
2454
2455 if (dst) {
2456 NLA_PUT(skb, RTA_DST, 16, dst);
2457 rtm->rtm_dst_len = 128;
2458 } else if (rtm->rtm_dst_len)
2459 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2460 #ifdef CONFIG_IPV6_SUBTREES
2461 if (src) {
2462 NLA_PUT(skb, RTA_SRC, 16, src);
2463 rtm->rtm_src_len = 128;
2464 } else if (rtm->rtm_src_len)
2465 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2466 #endif
2467 if (iif) {
2468 #ifdef CONFIG_IPV6_MROUTE
2469 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2470 int err = ip6mr_get_route(net, skb, rtm, nowait);
2471 if (err <= 0) {
2472 if (!nowait) {
2473 if (err == 0)
2474 return 0;
2475 goto nla_put_failure;
2476 } else {
2477 if (err == -EMSGSIZE)
2478 goto nla_put_failure;
2479 }
2480 }
2481 } else
2482 #endif
2483 NLA_PUT_U32(skb, RTA_IIF, iif);
2484 } else if (dst) {
2485 struct in6_addr saddr_buf;
2486 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2487 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2488 }
2489
2490 if (rt->rt6i_prefsrc.plen) {
2491 struct in6_addr saddr_buf;
2492 saddr_buf = rt->rt6i_prefsrc.addr;
2493 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2494 }
2495
2496 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2497 goto nla_put_failure;
2498
2499 rcu_read_lock();
2500 n = dst_get_neighbour_noref(&rt->dst);
2501 if (n) {
2502 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2503 rcu_read_unlock();
2504 goto nla_put_failure;
2505 }
2506 }
2507 rcu_read_unlock();
2508
2509 if (rt->dst.dev)
2510 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2511
2512 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2513
2514 if (!(rt->rt6i_flags & RTF_EXPIRES))
2515 expires = 0;
2516 else if (rt->dst.expires - jiffies < INT_MAX)
2517 expires = rt->dst.expires - jiffies;
2518 else
2519 expires = INT_MAX;
2520
2521 peer = rt->rt6i_peer;
2522 ts = tsage = 0;
2523 if (peer && peer->tcp_ts_stamp) {
2524 ts = peer->tcp_ts;
2525 tsage = get_seconds() - peer->tcp_ts_stamp;
2526 }
2527
2528 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2529 expires, rt->dst.error) < 0)
2530 goto nla_put_failure;
2531
2532 return nlmsg_end(skb, nlh);
2533
2534 nla_put_failure:
2535 nlmsg_cancel(skb, nlh);
2536 return -EMSGSIZE;
2537 }
2538
2539 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2540 {
2541 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2542 int prefix;
2543
2544 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2545 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2546 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2547 } else
2548 prefix = 0;
2549
2550 return rt6_fill_node(arg->net,
2551 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2552 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2553 prefix, 0, NLM_F_MULTI);
2554 }
2555
2556 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2557 {
2558 struct net *net = sock_net(in_skb->sk);
2559 struct nlattr *tb[RTA_MAX+1];
2560 struct rt6_info *rt;
2561 struct sk_buff *skb;
2562 struct rtmsg *rtm;
2563 struct flowi6 fl6;
2564 int err, iif = 0, oif = 0;
2565
2566 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2567 if (err < 0)
2568 goto errout;
2569
2570 err = -EINVAL;
2571 memset(&fl6, 0, sizeof(fl6));
2572
2573 if (tb[RTA_SRC]) {
2574 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2575 goto errout;
2576
2577 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2578 }
2579
2580 if (tb[RTA_DST]) {
2581 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2582 goto errout;
2583
2584 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2585 }
2586
2587 if (tb[RTA_IIF])
2588 iif = nla_get_u32(tb[RTA_IIF]);
2589
2590 if (tb[RTA_OIF])
2591 oif = nla_get_u32(tb[RTA_OIF]);
2592
2593 if (iif) {
2594 struct net_device *dev;
2595 int flags = 0;
2596
2597 dev = __dev_get_by_index(net, iif);
2598 if (!dev) {
2599 err = -ENODEV;
2600 goto errout;
2601 }
2602
2603 fl6.flowi6_iif = iif;
2604
2605 if (!ipv6_addr_any(&fl6.saddr))
2606 flags |= RT6_LOOKUP_F_HAS_SADDR;
2607
2608 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2609 flags);
2610 } else {
2611 fl6.flowi6_oif = oif;
2612
2613 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2614 }
2615
2616 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2617 if (!skb) {
2618 err = -ENOBUFS;
2619 goto errout;
2620 }
2621
2622 /* Reserve room for dummy headers, this skb can pass
2623 through good chunk of routing engine.
2624 */
2625 skb_reset_mac_header(skb);
2626 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2627
2628 skb_dst_set(skb, &rt->dst);
2629
2630 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2631 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2632 nlh->nlmsg_seq, 0, 0, 0);
2633 if (err < 0) {
2634 kfree_skb(skb);
2635 goto errout;
2636 }
2637
2638 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2639 errout:
2640 return err;
2641 }
2642
2643 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2644 {
2645 struct sk_buff *skb;
2646 struct net *net = info->nl_net;
2647 u32 seq;
2648 int err;
2649
2650 err = -ENOBUFS;
2651 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2652
2653 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2654 if (!skb)
2655 goto errout;
2656
2657 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2658 event, info->pid, seq, 0, 0, 0);
2659 if (err < 0) {
2660 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2661 WARN_ON(err == -EMSGSIZE);
2662 kfree_skb(skb);
2663 goto errout;
2664 }
2665 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2666 info->nlh, gfp_any());
2667 return;
2668 errout:
2669 if (err < 0)
2670 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2671 }
2672
2673 static int ip6_route_dev_notify(struct notifier_block *this,
2674 unsigned long event, void *data)
2675 {
2676 struct net_device *dev = (struct net_device *)data;
2677 struct net *net = dev_net(dev);
2678
2679 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2680 net->ipv6.ip6_null_entry->dst.dev = dev;
2681 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2682 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2683 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2684 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2685 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2686 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2687 #endif
2688 }
2689
2690 return NOTIFY_OK;
2691 }
2692
2693 /*
2694 * /proc
2695 */
2696
2697 #ifdef CONFIG_PROC_FS
2698
2699 struct rt6_proc_arg
2700 {
2701 char *buffer;
2702 int offset;
2703 int length;
2704 int skip;
2705 int len;
2706 };
2707
2708 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2709 {
2710 struct seq_file *m = p_arg;
2711 struct neighbour *n;
2712
2713 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2714
2715 #ifdef CONFIG_IPV6_SUBTREES
2716 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2717 #else
2718 seq_puts(m, "00000000000000000000000000000000 00 ");
2719 #endif
2720 rcu_read_lock();
2721 n = dst_get_neighbour_noref(&rt->dst);
2722 if (n) {
2723 seq_printf(m, "%pi6", n->primary_key);
2724 } else {
2725 seq_puts(m, "00000000000000000000000000000000");
2726 }
2727 rcu_read_unlock();
2728 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2729 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2730 rt->dst.__use, rt->rt6i_flags,
2731 rt->dst.dev ? rt->dst.dev->name : "");
2732 return 0;
2733 }
2734
2735 static int ipv6_route_show(struct seq_file *m, void *v)
2736 {
2737 struct net *net = (struct net *)m->private;
2738 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2739 return 0;
2740 }
2741
2742 static int ipv6_route_open(struct inode *inode, struct file *file)
2743 {
2744 return single_open_net(inode, file, ipv6_route_show);
2745 }
2746
2747 static const struct file_operations ipv6_route_proc_fops = {
2748 .owner = THIS_MODULE,
2749 .open = ipv6_route_open,
2750 .read = seq_read,
2751 .llseek = seq_lseek,
2752 .release = single_release_net,
2753 };
2754
2755 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2756 {
2757 struct net *net = (struct net *)seq->private;
2758 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2759 net->ipv6.rt6_stats->fib_nodes,
2760 net->ipv6.rt6_stats->fib_route_nodes,
2761 net->ipv6.rt6_stats->fib_rt_alloc,
2762 net->ipv6.rt6_stats->fib_rt_entries,
2763 net->ipv6.rt6_stats->fib_rt_cache,
2764 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2765 net->ipv6.rt6_stats->fib_discarded_routes);
2766
2767 return 0;
2768 }
2769
2770 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2771 {
2772 return single_open_net(inode, file, rt6_stats_seq_show);
2773 }
2774
2775 static const struct file_operations rt6_stats_seq_fops = {
2776 .owner = THIS_MODULE,
2777 .open = rt6_stats_seq_open,
2778 .read = seq_read,
2779 .llseek = seq_lseek,
2780 .release = single_release_net,
2781 };
2782 #endif /* CONFIG_PROC_FS */
2783
2784 #ifdef CONFIG_SYSCTL
2785
2786 static
2787 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2788 void __user *buffer, size_t *lenp, loff_t *ppos)
2789 {
2790 struct net *net;
2791 int delay;
2792 if (!write)
2793 return -EINVAL;
2794
2795 net = (struct net *)ctl->extra1;
2796 delay = net->ipv6.sysctl.flush_delay;
2797 proc_dointvec(ctl, write, buffer, lenp, ppos);
2798 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2799 return 0;
2800 }
2801
2802 ctl_table ipv6_route_table_template[] = {
2803 {
2804 .procname = "flush",
2805 .data = &init_net.ipv6.sysctl.flush_delay,
2806 .maxlen = sizeof(int),
2807 .mode = 0200,
2808 .proc_handler = ipv6_sysctl_rtcache_flush
2809 },
2810 {
2811 .procname = "gc_thresh",
2812 .data = &ip6_dst_ops_template.gc_thresh,
2813 .maxlen = sizeof(int),
2814 .mode = 0644,
2815 .proc_handler = proc_dointvec,
2816 },
2817 {
2818 .procname = "max_size",
2819 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2820 .maxlen = sizeof(int),
2821 .mode = 0644,
2822 .proc_handler = proc_dointvec,
2823 },
2824 {
2825 .procname = "gc_min_interval",
2826 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2827 .maxlen = sizeof(int),
2828 .mode = 0644,
2829 .proc_handler = proc_dointvec_jiffies,
2830 },
2831 {
2832 .procname = "gc_timeout",
2833 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2834 .maxlen = sizeof(int),
2835 .mode = 0644,
2836 .proc_handler = proc_dointvec_jiffies,
2837 },
2838 {
2839 .procname = "gc_interval",
2840 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2841 .maxlen = sizeof(int),
2842 .mode = 0644,
2843 .proc_handler = proc_dointvec_jiffies,
2844 },
2845 {
2846 .procname = "gc_elasticity",
2847 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2848 .maxlen = sizeof(int),
2849 .mode = 0644,
2850 .proc_handler = proc_dointvec,
2851 },
2852 {
2853 .procname = "mtu_expires",
2854 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2855 .maxlen = sizeof(int),
2856 .mode = 0644,
2857 .proc_handler = proc_dointvec_jiffies,
2858 },
2859 {
2860 .procname = "min_adv_mss",
2861 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2862 .maxlen = sizeof(int),
2863 .mode = 0644,
2864 .proc_handler = proc_dointvec,
2865 },
2866 {
2867 .procname = "gc_min_interval_ms",
2868 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2869 .maxlen = sizeof(int),
2870 .mode = 0644,
2871 .proc_handler = proc_dointvec_ms_jiffies,
2872 },
2873 { }
2874 };
2875
2876 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2877 {
2878 struct ctl_table *table;
2879
2880 table = kmemdup(ipv6_route_table_template,
2881 sizeof(ipv6_route_table_template),
2882 GFP_KERNEL);
2883
2884 if (table) {
2885 table[0].data = &net->ipv6.sysctl.flush_delay;
2886 table[0].extra1 = net;
2887 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2888 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2889 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2890 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2891 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2892 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2893 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2894 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2895 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2896 }
2897
2898 return table;
2899 }
2900 #endif
2901
2902 static int __net_init ip6_route_net_init(struct net *net)
2903 {
2904 int ret = -ENOMEM;
2905
2906 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2907 sizeof(net->ipv6.ip6_dst_ops));
2908
2909 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2910 goto out_ip6_dst_ops;
2911
2912 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2913 sizeof(*net->ipv6.ip6_null_entry),
2914 GFP_KERNEL);
2915 if (!net->ipv6.ip6_null_entry)
2916 goto out_ip6_dst_entries;
2917 net->ipv6.ip6_null_entry->dst.path =
2918 (struct dst_entry *)net->ipv6.ip6_null_entry;
2919 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2920 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2921 ip6_template_metrics, true);
2922
2923 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2924 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2925 sizeof(*net->ipv6.ip6_prohibit_entry),
2926 GFP_KERNEL);
2927 if (!net->ipv6.ip6_prohibit_entry)
2928 goto out_ip6_null_entry;
2929 net->ipv6.ip6_prohibit_entry->dst.path =
2930 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2931 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2932 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2933 ip6_template_metrics, true);
2934
2935 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2936 sizeof(*net->ipv6.ip6_blk_hole_entry),
2937 GFP_KERNEL);
2938 if (!net->ipv6.ip6_blk_hole_entry)
2939 goto out_ip6_prohibit_entry;
2940 net->ipv6.ip6_blk_hole_entry->dst.path =
2941 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2942 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2943 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2944 ip6_template_metrics, true);
2945 #endif
2946
2947 net->ipv6.sysctl.flush_delay = 0;
2948 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2949 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2950 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2951 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2952 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2953 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2954 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2955
2956 #ifdef CONFIG_PROC_FS
2957 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2958 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2959 #endif
2960 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2961
2962 ret = 0;
2963 out:
2964 return ret;
2965
2966 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967 out_ip6_prohibit_entry:
2968 kfree(net->ipv6.ip6_prohibit_entry);
2969 out_ip6_null_entry:
2970 kfree(net->ipv6.ip6_null_entry);
2971 #endif
2972 out_ip6_dst_entries:
2973 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2974 out_ip6_dst_ops:
2975 goto out;
2976 }
2977
2978 static void __net_exit ip6_route_net_exit(struct net *net)
2979 {
2980 #ifdef CONFIG_PROC_FS
2981 proc_net_remove(net, "ipv6_route");
2982 proc_net_remove(net, "rt6_stats");
2983 #endif
2984 kfree(net->ipv6.ip6_null_entry);
2985 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2986 kfree(net->ipv6.ip6_prohibit_entry);
2987 kfree(net->ipv6.ip6_blk_hole_entry);
2988 #endif
2989 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2990 }
2991
2992 static struct pernet_operations ip6_route_net_ops = {
2993 .init = ip6_route_net_init,
2994 .exit = ip6_route_net_exit,
2995 };
2996
2997 static struct notifier_block ip6_route_dev_notifier = {
2998 .notifier_call = ip6_route_dev_notify,
2999 .priority = 0,
3000 };
3001
3002 int __init ip6_route_init(void)
3003 {
3004 int ret;
3005
3006 ret = -ENOMEM;
3007 ip6_dst_ops_template.kmem_cachep =
3008 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3009 SLAB_HWCACHE_ALIGN, NULL);
3010 if (!ip6_dst_ops_template.kmem_cachep)
3011 goto out;
3012
3013 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3014 if (ret)
3015 goto out_kmem_cache;
3016
3017 ret = register_pernet_subsys(&ip6_route_net_ops);
3018 if (ret)
3019 goto out_dst_entries;
3020
3021 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3022
3023 /* Registering of the loopback is done before this portion of code,
3024 * the loopback reference in rt6_info will not be taken, do it
3025 * manually for init_net */
3026 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3027 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3028 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3029 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3030 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3031 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3032 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3033 #endif
3034 ret = fib6_init();
3035 if (ret)
3036 goto out_register_subsys;
3037
3038 ret = xfrm6_init();
3039 if (ret)
3040 goto out_fib6_init;
3041
3042 ret = fib6_rules_init();
3043 if (ret)
3044 goto xfrm6_init;
3045
3046 ret = -ENOBUFS;
3047 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3048 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3049 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3050 goto fib6_rules_init;
3051
3052 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3053 if (ret)
3054 goto fib6_rules_init;
3055
3056 out:
3057 return ret;
3058
3059 fib6_rules_init:
3060 fib6_rules_cleanup();
3061 xfrm6_init:
3062 xfrm6_fini();
3063 out_fib6_init:
3064 fib6_gc_cleanup();
3065 out_register_subsys:
3066 unregister_pernet_subsys(&ip6_route_net_ops);
3067 out_dst_entries:
3068 dst_entries_destroy(&ip6_dst_blackhole_ops);
3069 out_kmem_cache:
3070 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3071 goto out;
3072 }
3073
3074 void ip6_route_cleanup(void)
3075 {
3076 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3077 fib6_rules_cleanup();
3078 xfrm6_fini();
3079 fib6_gc_cleanup();
3080 unregister_pernet_subsys(&ip6_route_net_ops);
3081 dst_entries_destroy(&ip6_dst_blackhole_ops);
3082 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3083 }
This page took 0.186083 seconds and 5 git commands to generate.