net: remove k{un}map_skb_frag()
[deliverable/linux.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58
59 #include <asm/uaccess.h>
60
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64
65 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
67 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void ip6_dst_destroy(struct dst_entry *);
72 static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
74 static int ip6_dst_gc(struct dst_ops *ops);
75
76 static int ip6_pkt_discard(struct sk_buff *skb);
77 static int ip6_pkt_discard_out(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
79 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
85 unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
89 #endif
90
91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122 }
123
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125 {
126 struct in6_addr *p = &rt->rt6i_gateway;
127
128 if (!ipv6_addr_any(p))
129 return (const void *) p;
130 return daddr;
131 }
132
133 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134 {
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144
145 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
146 {
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
156 }
157
158 static struct dst_ops ip6_dst_ops_template = {
159 .family = AF_INET6,
160 .protocol = cpu_to_be16(ETH_P_IPV6),
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
164 .default_advmss = ip6_default_advmss,
165 .mtu = ip6_mtu,
166 .cow_metrics = ipv6_cow_metrics,
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
172 .local_out = __ip6_local_out,
173 .neigh_lookup = ip6_neigh_lookup,
174 };
175
176 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
177 {
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
181 }
182
183 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184 {
185 }
186
187 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189 {
190 return NULL;
191 }
192
193 static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
195 .protocol = cpu_to_be16(ETH_P_IPV6),
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
198 .mtu = ip6_blackhole_mtu,
199 .default_advmss = ip6_default_advmss,
200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
202 .neigh_lookup = ip6_neigh_lookup,
203 };
204
205 static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207 };
208
209 static struct rt6_info ip6_null_entry_template = {
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
219 .rt6i_protocol = RTPROT_KERNEL,
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222 };
223
224 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
226 static int ip6_pkt_prohibit(struct sk_buff *skb);
227 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
228
229 static struct rt6_info ip6_prohibit_entry_template = {
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
239 .rt6i_protocol = RTPROT_KERNEL,
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242 };
243
244 static struct rt6_info ip6_blk_hole_entry_template = {
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
250 .input = dst_discard,
251 .output = dst_discard,
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
254 .rt6i_protocol = RTPROT_KERNEL,
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257 };
258
259 #endif
260
261 /* allocate dst with ip6_dst_ops */
262 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
263 struct net_device *dev,
264 int flags)
265 {
266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
267
268 if (rt)
269 memset(&rt->rt6i_table, 0,
270 sizeof(*rt) - sizeof(struct dst_entry));
271
272 return rt;
273 }
274
275 static void ip6_dst_destroy(struct dst_entry *dst)
276 {
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
279 struct inet_peer *peer = rt->rt6i_peer;
280
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
284 if (idev) {
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
287 }
288 if (peer) {
289 rt->rt6i_peer = NULL;
290 inet_putpeer(peer);
291 }
292 }
293
294 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295
296 static u32 rt6_peer_genid(void)
297 {
298 return atomic_read(&__rt6_peer_genid);
299 }
300
301 void rt6_bind_peer(struct rt6_info *rt, int create)
302 {
303 struct inet_peer *peer;
304
305 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 inet_putpeer(peer);
308 else
309 rt->rt6i_peer_genid = rt6_peer_genid();
310 }
311
312 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 int how)
314 {
315 struct rt6_info *rt = (struct rt6_info *)dst;
316 struct inet6_dev *idev = rt->rt6i_idev;
317 struct net_device *loopback_dev =
318 dev_net(dev)->loopback_dev;
319
320 if (dev != loopback_dev && idev && idev->dev == dev) {
321 struct inet6_dev *loopback_idev =
322 in6_dev_get(loopback_dev);
323 if (loopback_idev) {
324 rt->rt6i_idev = loopback_idev;
325 in6_dev_put(idev);
326 }
327 }
328 }
329
330 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331 {
332 return (rt->rt6i_flags & RTF_EXPIRES) &&
333 time_after(jiffies, rt->dst.expires);
334 }
335
336 static inline int rt6_need_strict(const struct in6_addr *daddr)
337 {
338 return ipv6_addr_type(daddr) &
339 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
340 }
341
342 /*
343 * Route lookup. Any table->tb6_lock is implied.
344 */
345
346 static inline struct rt6_info *rt6_device_match(struct net *net,
347 struct rt6_info *rt,
348 const struct in6_addr *saddr,
349 int oif,
350 int flags)
351 {
352 struct rt6_info *local = NULL;
353 struct rt6_info *sprt;
354
355 if (!oif && ipv6_addr_any(saddr))
356 goto out;
357
358 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
359 struct net_device *dev = sprt->dst.dev;
360
361 if (oif) {
362 if (dev->ifindex == oif)
363 return sprt;
364 if (dev->flags & IFF_LOOPBACK) {
365 if (!sprt->rt6i_idev ||
366 sprt->rt6i_idev->dev->ifindex != oif) {
367 if (flags & RT6_LOOKUP_F_IFACE && oif)
368 continue;
369 if (local && (!oif ||
370 local->rt6i_idev->dev->ifindex == oif))
371 continue;
372 }
373 local = sprt;
374 }
375 } else {
376 if (ipv6_chk_addr(net, saddr, dev,
377 flags & RT6_LOOKUP_F_IFACE))
378 return sprt;
379 }
380 }
381
382 if (oif) {
383 if (local)
384 return local;
385
386 if (flags & RT6_LOOKUP_F_IFACE)
387 return net->ipv6.ip6_null_entry;
388 }
389 out:
390 return rt;
391 }
392
393 #ifdef CONFIG_IPV6_ROUTER_PREF
394 static void rt6_probe(struct rt6_info *rt)
395 {
396 struct neighbour *neigh;
397 /*
398 * Okay, this does not seem to be appropriate
399 * for now, however, we need to check if it
400 * is really so; aka Router Reachability Probing.
401 *
402 * Router Reachability Probe MUST be rate-limited
403 * to no more than one per minute.
404 */
405 rcu_read_lock();
406 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
407 if (!neigh || (neigh->nud_state & NUD_VALID))
408 goto out;
409 read_lock_bh(&neigh->lock);
410 if (!(neigh->nud_state & NUD_VALID) &&
411 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
412 struct in6_addr mcaddr;
413 struct in6_addr *target;
414
415 neigh->updated = jiffies;
416 read_unlock_bh(&neigh->lock);
417
418 target = (struct in6_addr *)&neigh->primary_key;
419 addrconf_addr_solict_mult(target, &mcaddr);
420 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
421 } else {
422 read_unlock_bh(&neigh->lock);
423 }
424 out:
425 rcu_read_unlock();
426 }
427 #else
428 static inline void rt6_probe(struct rt6_info *rt)
429 {
430 }
431 #endif
432
433 /*
434 * Default Router Selection (RFC 2461 6.3.6)
435 */
436 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
437 {
438 struct net_device *dev = rt->dst.dev;
439 if (!oif || dev->ifindex == oif)
440 return 2;
441 if ((dev->flags & IFF_LOOPBACK) &&
442 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 return 1;
444 return 0;
445 }
446
447 static inline int rt6_check_neigh(struct rt6_info *rt)
448 {
449 struct neighbour *neigh;
450 int m;
451
452 rcu_read_lock();
453 neigh = dst_get_neighbour_noref(&rt->dst);
454 if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 !(rt->rt6i_flags & RTF_GATEWAY))
456 m = 1;
457 else if (neigh) {
458 read_lock_bh(&neigh->lock);
459 if (neigh->nud_state & NUD_VALID)
460 m = 2;
461 #ifdef CONFIG_IPV6_ROUTER_PREF
462 else if (neigh->nud_state & NUD_FAILED)
463 m = 0;
464 #endif
465 else
466 m = 1;
467 read_unlock_bh(&neigh->lock);
468 } else
469 m = 0;
470 rcu_read_unlock();
471 return m;
472 }
473
474 static int rt6_score_route(struct rt6_info *rt, int oif,
475 int strict)
476 {
477 int m, n;
478
479 m = rt6_check_dev(rt, oif);
480 if (!m && (strict & RT6_LOOKUP_F_IFACE))
481 return -1;
482 #ifdef CONFIG_IPV6_ROUTER_PREF
483 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484 #endif
485 n = rt6_check_neigh(rt);
486 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
487 return -1;
488 return m;
489 }
490
491 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 int *mpri, struct rt6_info *match)
493 {
494 int m;
495
496 if (rt6_check_expired(rt))
497 goto out;
498
499 m = rt6_score_route(rt, oif, strict);
500 if (m < 0)
501 goto out;
502
503 if (m > *mpri) {
504 if (strict & RT6_LOOKUP_F_REACHABLE)
505 rt6_probe(match);
506 *mpri = m;
507 match = rt;
508 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 rt6_probe(rt);
510 }
511
512 out:
513 return match;
514 }
515
516 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 struct rt6_info *rr_head,
518 u32 metric, int oif, int strict)
519 {
520 struct rt6_info *rt, *match;
521 int mpri = -1;
522
523 match = NULL;
524 for (rt = rr_head; rt && rt->rt6i_metric == metric;
525 rt = rt->dst.rt6_next)
526 match = find_match(rt, oif, strict, &mpri, match);
527 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
528 rt = rt->dst.rt6_next)
529 match = find_match(rt, oif, strict, &mpri, match);
530
531 return match;
532 }
533
534 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535 {
536 struct rt6_info *match, *rt0;
537 struct net *net;
538
539 rt0 = fn->rr_ptr;
540 if (!rt0)
541 fn->rr_ptr = rt0 = fn->leaf;
542
543 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
544
545 if (!match &&
546 (strict & RT6_LOOKUP_F_REACHABLE)) {
547 struct rt6_info *next = rt0->dst.rt6_next;
548
549 /* no entries matched; do round-robin */
550 if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 next = fn->leaf;
552
553 if (next != rt0)
554 fn->rr_ptr = next;
555 }
556
557 net = dev_net(rt0->dst.dev);
558 return match ? match : net->ipv6.ip6_null_entry;
559 }
560
561 #ifdef CONFIG_IPV6_ROUTE_INFO
562 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
563 const struct in6_addr *gwaddr)
564 {
565 struct net *net = dev_net(dev);
566 struct route_info *rinfo = (struct route_info *) opt;
567 struct in6_addr prefix_buf, *prefix;
568 unsigned int pref;
569 unsigned long lifetime;
570 struct rt6_info *rt;
571
572 if (len < sizeof(struct route_info)) {
573 return -EINVAL;
574 }
575
576 /* Sanity check for prefix_len and length */
577 if (rinfo->length > 3) {
578 return -EINVAL;
579 } else if (rinfo->prefix_len > 128) {
580 return -EINVAL;
581 } else if (rinfo->prefix_len > 64) {
582 if (rinfo->length < 2) {
583 return -EINVAL;
584 }
585 } else if (rinfo->prefix_len > 0) {
586 if (rinfo->length < 1) {
587 return -EINVAL;
588 }
589 }
590
591 pref = rinfo->route_pref;
592 if (pref == ICMPV6_ROUTER_PREF_INVALID)
593 return -EINVAL;
594
595 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
596
597 if (rinfo->length == 3)
598 prefix = (struct in6_addr *)rinfo->prefix;
599 else {
600 /* this function is safe */
601 ipv6_addr_prefix(&prefix_buf,
602 (struct in6_addr *)rinfo->prefix,
603 rinfo->prefix_len);
604 prefix = &prefix_buf;
605 }
606
607 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 dev->ifindex);
609
610 if (rt && !lifetime) {
611 ip6_del_rt(rt);
612 rt = NULL;
613 }
614
615 if (!rt && lifetime)
616 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
617 pref);
618 else if (rt)
619 rt->rt6i_flags = RTF_ROUTEINFO |
620 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621
622 if (rt) {
623 if (!addrconf_finite_timeout(lifetime)) {
624 rt->rt6i_flags &= ~RTF_EXPIRES;
625 } else {
626 rt->dst.expires = jiffies + HZ * lifetime;
627 rt->rt6i_flags |= RTF_EXPIRES;
628 }
629 dst_release(&rt->dst);
630 }
631 return 0;
632 }
633 #endif
634
635 #define BACKTRACK(__net, saddr) \
636 do { \
637 if (rt == __net->ipv6.ip6_null_entry) { \
638 struct fib6_node *pn; \
639 while (1) { \
640 if (fn->fn_flags & RTN_TL_ROOT) \
641 goto out; \
642 pn = fn->parent; \
643 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
644 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
645 else \
646 fn = pn; \
647 if (fn->fn_flags & RTN_RTINFO) \
648 goto restart; \
649 } \
650 } \
651 } while (0)
652
653 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 struct fib6_table *table,
655 struct flowi6 *fl6, int flags)
656 {
657 struct fib6_node *fn;
658 struct rt6_info *rt;
659
660 read_lock_bh(&table->tb6_lock);
661 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
662 restart:
663 rt = fn->leaf;
664 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 BACKTRACK(net, &fl6->saddr);
666 out:
667 dst_use(&rt->dst, jiffies);
668 read_unlock_bh(&table->tb6_lock);
669 return rt;
670
671 }
672
673 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 int flags)
675 {
676 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677 }
678 EXPORT_SYMBOL_GPL(ip6_route_lookup);
679
680 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 const struct in6_addr *saddr, int oif, int strict)
682 {
683 struct flowi6 fl6 = {
684 .flowi6_oif = oif,
685 .daddr = *daddr,
686 };
687 struct dst_entry *dst;
688 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
689
690 if (saddr) {
691 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
692 flags |= RT6_LOOKUP_F_HAS_SADDR;
693 }
694
695 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
696 if (dst->error == 0)
697 return (struct rt6_info *) dst;
698
699 dst_release(dst);
700
701 return NULL;
702 }
703
704 EXPORT_SYMBOL(rt6_lookup);
705
706 /* ip6_ins_rt is called with FREE table->tb6_lock.
707 It takes new route entry, the addition fails by any reason the
708 route is freed. In any case, if caller does not hold it, it may
709 be destroyed.
710 */
711
712 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
713 {
714 int err;
715 struct fib6_table *table;
716
717 table = rt->rt6i_table;
718 write_lock_bh(&table->tb6_lock);
719 err = fib6_add(&table->tb6_root, rt, info);
720 write_unlock_bh(&table->tb6_lock);
721
722 return err;
723 }
724
725 int ip6_ins_rt(struct rt6_info *rt)
726 {
727 struct nl_info info = {
728 .nl_net = dev_net(rt->dst.dev),
729 };
730 return __ip6_ins_rt(rt, &info);
731 }
732
733 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 const struct in6_addr *daddr,
735 const struct in6_addr *saddr)
736 {
737 struct rt6_info *rt;
738
739 /*
740 * Clone the route.
741 */
742
743 rt = ip6_rt_copy(ort, daddr);
744
745 if (rt) {
746 int attempts = !in_softirq();
747
748 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
749 if (ort->rt6i_dst.plen != 128 &&
750 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
751 rt->rt6i_flags |= RTF_ANYCAST;
752 rt->rt6i_gateway = *daddr;
753 }
754
755 rt->rt6i_flags |= RTF_CACHE;
756
757 #ifdef CONFIG_IPV6_SUBTREES
758 if (rt->rt6i_src.plen && saddr) {
759 rt->rt6i_src.addr = *saddr;
760 rt->rt6i_src.plen = 128;
761 }
762 #endif
763
764 retry:
765 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
766 struct net *net = dev_net(rt->dst.dev);
767 int saved_rt_min_interval =
768 net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 int saved_rt_elasticity =
770 net->ipv6.sysctl.ip6_rt_gc_elasticity;
771
772 if (attempts-- > 0) {
773 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775
776 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
777
778 net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 saved_rt_elasticity;
780 net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 saved_rt_min_interval;
782 goto retry;
783 }
784
785 if (net_ratelimit())
786 printk(KERN_WARNING
787 "ipv6: Neighbour table overflow.\n");
788 dst_free(&rt->dst);
789 return NULL;
790 }
791 }
792
793 return rt;
794 }
795
796 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 const struct in6_addr *daddr)
798 {
799 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800
801 if (rt) {
802 rt->rt6i_flags |= RTF_CACHE;
803 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
804 }
805 return rt;
806 }
807
808 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
809 struct flowi6 *fl6, int flags)
810 {
811 struct fib6_node *fn;
812 struct rt6_info *rt, *nrt;
813 int strict = 0;
814 int attempts = 3;
815 int err;
816 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
817
818 strict |= flags & RT6_LOOKUP_F_IFACE;
819
820 relookup:
821 read_lock_bh(&table->tb6_lock);
822
823 restart_2:
824 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
825
826 restart:
827 rt = rt6_select(fn, oif, strict | reachable);
828
829 BACKTRACK(net, &fl6->saddr);
830 if (rt == net->ipv6.ip6_null_entry ||
831 rt->rt6i_flags & RTF_CACHE)
832 goto out;
833
834 dst_hold(&rt->dst);
835 read_unlock_bh(&table->tb6_lock);
836
837 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
838 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
839 else if (!(rt->dst.flags & DST_HOST))
840 nrt = rt6_alloc_clone(rt, &fl6->daddr);
841 else
842 goto out2;
843
844 dst_release(&rt->dst);
845 rt = nrt ? : net->ipv6.ip6_null_entry;
846
847 dst_hold(&rt->dst);
848 if (nrt) {
849 err = ip6_ins_rt(nrt);
850 if (!err)
851 goto out2;
852 }
853
854 if (--attempts <= 0)
855 goto out2;
856
857 /*
858 * Race condition! In the gap, when table->tb6_lock was
859 * released someone could insert this route. Relookup.
860 */
861 dst_release(&rt->dst);
862 goto relookup;
863
864 out:
865 if (reachable) {
866 reachable = 0;
867 goto restart_2;
868 }
869 dst_hold(&rt->dst);
870 read_unlock_bh(&table->tb6_lock);
871 out2:
872 rt->dst.lastuse = jiffies;
873 rt->dst.__use++;
874
875 return rt;
876 }
877
878 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
879 struct flowi6 *fl6, int flags)
880 {
881 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
882 }
883
884 static struct dst_entry *ip6_route_input_lookup(struct net *net,
885 struct net_device *dev,
886 struct flowi6 *fl6, int flags)
887 {
888 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
889 flags |= RT6_LOOKUP_F_IFACE;
890
891 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
892 }
893
894 void ip6_route_input(struct sk_buff *skb)
895 {
896 const struct ipv6hdr *iph = ipv6_hdr(skb);
897 struct net *net = dev_net(skb->dev);
898 int flags = RT6_LOOKUP_F_HAS_SADDR;
899 struct flowi6 fl6 = {
900 .flowi6_iif = skb->dev->ifindex,
901 .daddr = iph->daddr,
902 .saddr = iph->saddr,
903 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
904 .flowi6_mark = skb->mark,
905 .flowi6_proto = iph->nexthdr,
906 };
907
908 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
909 }
910
911 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
912 struct flowi6 *fl6, int flags)
913 {
914 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
915 }
916
917 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
918 struct flowi6 *fl6)
919 {
920 int flags = 0;
921
922 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
923 flags |= RT6_LOOKUP_F_IFACE;
924
925 if (!ipv6_addr_any(&fl6->saddr))
926 flags |= RT6_LOOKUP_F_HAS_SADDR;
927 else if (sk)
928 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
929
930 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
931 }
932
933 EXPORT_SYMBOL(ip6_route_output);
934
935 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
936 {
937 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
938 struct dst_entry *new = NULL;
939
940 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
941 if (rt) {
942 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
943
944 new = &rt->dst;
945
946 new->__use = 1;
947 new->input = dst_discard;
948 new->output = dst_discard;
949
950 if (dst_metrics_read_only(&ort->dst))
951 new->_metrics = ort->dst._metrics;
952 else
953 dst_copy_metrics(new, &ort->dst);
954 rt->rt6i_idev = ort->rt6i_idev;
955 if (rt->rt6i_idev)
956 in6_dev_hold(rt->rt6i_idev);
957 rt->dst.expires = 0;
958
959 rt->rt6i_gateway = ort->rt6i_gateway;
960 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
961 rt->rt6i_metric = 0;
962
963 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
964 #ifdef CONFIG_IPV6_SUBTREES
965 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
966 #endif
967
968 dst_free(new);
969 }
970
971 dst_release(dst_orig);
972 return new ? new : ERR_PTR(-ENOMEM);
973 }
974
975 /*
976 * Destination cache support functions
977 */
978
979 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
980 {
981 struct rt6_info *rt;
982
983 rt = (struct rt6_info *) dst;
984
985 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
986 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
987 if (!rt->rt6i_peer)
988 rt6_bind_peer(rt, 0);
989 rt->rt6i_peer_genid = rt6_peer_genid();
990 }
991 return dst;
992 }
993 return NULL;
994 }
995
996 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
997 {
998 struct rt6_info *rt = (struct rt6_info *) dst;
999
1000 if (rt) {
1001 if (rt->rt6i_flags & RTF_CACHE) {
1002 if (rt6_check_expired(rt)) {
1003 ip6_del_rt(rt);
1004 dst = NULL;
1005 }
1006 } else {
1007 dst_release(dst);
1008 dst = NULL;
1009 }
1010 }
1011 return dst;
1012 }
1013
1014 static void ip6_link_failure(struct sk_buff *skb)
1015 {
1016 struct rt6_info *rt;
1017
1018 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1019
1020 rt = (struct rt6_info *) skb_dst(skb);
1021 if (rt) {
1022 if (rt->rt6i_flags & RTF_CACHE) {
1023 dst_set_expires(&rt->dst, 0);
1024 rt->rt6i_flags |= RTF_EXPIRES;
1025 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1026 rt->rt6i_node->fn_sernum = -1;
1027 }
1028 }
1029
1030 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1031 {
1032 struct rt6_info *rt6 = (struct rt6_info*)dst;
1033
1034 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1035 rt6->rt6i_flags |= RTF_MODIFIED;
1036 if (mtu < IPV6_MIN_MTU) {
1037 u32 features = dst_metric(dst, RTAX_FEATURES);
1038 mtu = IPV6_MIN_MTU;
1039 features |= RTAX_FEATURE_ALLFRAG;
1040 dst_metric_set(dst, RTAX_FEATURES, features);
1041 }
1042 dst_metric_set(dst, RTAX_MTU, mtu);
1043 }
1044 }
1045
1046 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1047 {
1048 struct net_device *dev = dst->dev;
1049 unsigned int mtu = dst_mtu(dst);
1050 struct net *net = dev_net(dev);
1051
1052 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1053
1054 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1055 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1056
1057 /*
1058 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1059 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1060 * IPV6_MAXPLEN is also valid and means: "any MSS,
1061 * rely only on pmtu discovery"
1062 */
1063 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1064 mtu = IPV6_MAXPLEN;
1065 return mtu;
1066 }
1067
1068 static unsigned int ip6_mtu(const struct dst_entry *dst)
1069 {
1070 struct inet6_dev *idev;
1071 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1072
1073 if (mtu)
1074 return mtu;
1075
1076 mtu = IPV6_MIN_MTU;
1077
1078 rcu_read_lock();
1079 idev = __in6_dev_get(dst->dev);
1080 if (idev)
1081 mtu = idev->cnf.mtu6;
1082 rcu_read_unlock();
1083
1084 return mtu;
1085 }
1086
1087 static struct dst_entry *icmp6_dst_gc_list;
1088 static DEFINE_SPINLOCK(icmp6_dst_lock);
1089
1090 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1091 struct neighbour *neigh,
1092 struct flowi6 *fl6)
1093 {
1094 struct dst_entry *dst;
1095 struct rt6_info *rt;
1096 struct inet6_dev *idev = in6_dev_get(dev);
1097 struct net *net = dev_net(dev);
1098
1099 if (unlikely(!idev))
1100 return ERR_PTR(-ENODEV);
1101
1102 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1103 if (unlikely(!rt)) {
1104 in6_dev_put(idev);
1105 dst = ERR_PTR(-ENOMEM);
1106 goto out;
1107 }
1108
1109 if (neigh)
1110 neigh_hold(neigh);
1111 else {
1112 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1113 if (IS_ERR(neigh)) {
1114 in6_dev_put(idev);
1115 dst_free(&rt->dst);
1116 return ERR_CAST(neigh);
1117 }
1118 }
1119
1120 rt->dst.flags |= DST_HOST;
1121 rt->dst.output = ip6_output;
1122 dst_set_neighbour(&rt->dst, neigh);
1123 atomic_set(&rt->dst.__refcnt, 1);
1124 rt->rt6i_dst.addr = fl6->daddr;
1125 rt->rt6i_dst.plen = 128;
1126 rt->rt6i_idev = idev;
1127 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1128
1129 spin_lock_bh(&icmp6_dst_lock);
1130 rt->dst.next = icmp6_dst_gc_list;
1131 icmp6_dst_gc_list = &rt->dst;
1132 spin_unlock_bh(&icmp6_dst_lock);
1133
1134 fib6_force_start_gc(net);
1135
1136 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1137
1138 out:
1139 return dst;
1140 }
1141
1142 int icmp6_dst_gc(void)
1143 {
1144 struct dst_entry *dst, **pprev;
1145 int more = 0;
1146
1147 spin_lock_bh(&icmp6_dst_lock);
1148 pprev = &icmp6_dst_gc_list;
1149
1150 while ((dst = *pprev) != NULL) {
1151 if (!atomic_read(&dst->__refcnt)) {
1152 *pprev = dst->next;
1153 dst_free(dst);
1154 } else {
1155 pprev = &dst->next;
1156 ++more;
1157 }
1158 }
1159
1160 spin_unlock_bh(&icmp6_dst_lock);
1161
1162 return more;
1163 }
1164
1165 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1166 void *arg)
1167 {
1168 struct dst_entry *dst, **pprev;
1169
1170 spin_lock_bh(&icmp6_dst_lock);
1171 pprev = &icmp6_dst_gc_list;
1172 while ((dst = *pprev) != NULL) {
1173 struct rt6_info *rt = (struct rt6_info *) dst;
1174 if (func(rt, arg)) {
1175 *pprev = dst->next;
1176 dst_free(dst);
1177 } else {
1178 pprev = &dst->next;
1179 }
1180 }
1181 spin_unlock_bh(&icmp6_dst_lock);
1182 }
1183
1184 static int ip6_dst_gc(struct dst_ops *ops)
1185 {
1186 unsigned long now = jiffies;
1187 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1188 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1189 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1190 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1191 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1192 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1193 int entries;
1194
1195 entries = dst_entries_get_fast(ops);
1196 if (time_after(rt_last_gc + rt_min_interval, now) &&
1197 entries <= rt_max_size)
1198 goto out;
1199
1200 net->ipv6.ip6_rt_gc_expire++;
1201 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1202 net->ipv6.ip6_rt_last_gc = now;
1203 entries = dst_entries_get_slow(ops);
1204 if (entries < ops->gc_thresh)
1205 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1206 out:
1207 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1208 return entries > rt_max_size;
1209 }
1210
1211 /* Clean host part of a prefix. Not necessary in radix tree,
1212 but results in cleaner routing tables.
1213
1214 Remove it only when all the things will work!
1215 */
1216
1217 int ip6_dst_hoplimit(struct dst_entry *dst)
1218 {
1219 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1220 if (hoplimit == 0) {
1221 struct net_device *dev = dst->dev;
1222 struct inet6_dev *idev;
1223
1224 rcu_read_lock();
1225 idev = __in6_dev_get(dev);
1226 if (idev)
1227 hoplimit = idev->cnf.hop_limit;
1228 else
1229 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1230 rcu_read_unlock();
1231 }
1232 return hoplimit;
1233 }
1234 EXPORT_SYMBOL(ip6_dst_hoplimit);
1235
1236 /*
1237 *
1238 */
1239
1240 int ip6_route_add(struct fib6_config *cfg)
1241 {
1242 int err;
1243 struct net *net = cfg->fc_nlinfo.nl_net;
1244 struct rt6_info *rt = NULL;
1245 struct net_device *dev = NULL;
1246 struct inet6_dev *idev = NULL;
1247 struct fib6_table *table;
1248 int addr_type;
1249
1250 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1251 return -EINVAL;
1252 #ifndef CONFIG_IPV6_SUBTREES
1253 if (cfg->fc_src_len)
1254 return -EINVAL;
1255 #endif
1256 if (cfg->fc_ifindex) {
1257 err = -ENODEV;
1258 dev = dev_get_by_index(net, cfg->fc_ifindex);
1259 if (!dev)
1260 goto out;
1261 idev = in6_dev_get(dev);
1262 if (!idev)
1263 goto out;
1264 }
1265
1266 if (cfg->fc_metric == 0)
1267 cfg->fc_metric = IP6_RT_PRIO_USER;
1268
1269 err = -ENOBUFS;
1270 if (cfg->fc_nlinfo.nlh &&
1271 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1272 table = fib6_get_table(net, cfg->fc_table);
1273 if (!table) {
1274 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1275 table = fib6_new_table(net, cfg->fc_table);
1276 }
1277 } else {
1278 table = fib6_new_table(net, cfg->fc_table);
1279 }
1280
1281 if (!table)
1282 goto out;
1283
1284 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1285
1286 if (!rt) {
1287 err = -ENOMEM;
1288 goto out;
1289 }
1290
1291 rt->dst.obsolete = -1;
1292 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
1293 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1294 0;
1295
1296 if (cfg->fc_protocol == RTPROT_UNSPEC)
1297 cfg->fc_protocol = RTPROT_BOOT;
1298 rt->rt6i_protocol = cfg->fc_protocol;
1299
1300 addr_type = ipv6_addr_type(&cfg->fc_dst);
1301
1302 if (addr_type & IPV6_ADDR_MULTICAST)
1303 rt->dst.input = ip6_mc_input;
1304 else if (cfg->fc_flags & RTF_LOCAL)
1305 rt->dst.input = ip6_input;
1306 else
1307 rt->dst.input = ip6_forward;
1308
1309 rt->dst.output = ip6_output;
1310
1311 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1312 rt->rt6i_dst.plen = cfg->fc_dst_len;
1313 if (rt->rt6i_dst.plen == 128)
1314 rt->dst.flags |= DST_HOST;
1315
1316 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1317 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1318 if (!metrics) {
1319 err = -ENOMEM;
1320 goto out;
1321 }
1322 dst_init_metrics(&rt->dst, metrics, 0);
1323 }
1324 #ifdef CONFIG_IPV6_SUBTREES
1325 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1326 rt->rt6i_src.plen = cfg->fc_src_len;
1327 #endif
1328
1329 rt->rt6i_metric = cfg->fc_metric;
1330
1331 /* We cannot add true routes via loopback here,
1332 they would result in kernel looping; promote them to reject routes
1333 */
1334 if ((cfg->fc_flags & RTF_REJECT) ||
1335 (dev && (dev->flags & IFF_LOOPBACK) &&
1336 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1337 !(cfg->fc_flags & RTF_LOCAL))) {
1338 /* hold loopback dev/idev if we haven't done so. */
1339 if (dev != net->loopback_dev) {
1340 if (dev) {
1341 dev_put(dev);
1342 in6_dev_put(idev);
1343 }
1344 dev = net->loopback_dev;
1345 dev_hold(dev);
1346 idev = in6_dev_get(dev);
1347 if (!idev) {
1348 err = -ENODEV;
1349 goto out;
1350 }
1351 }
1352 rt->dst.output = ip6_pkt_discard_out;
1353 rt->dst.input = ip6_pkt_discard;
1354 rt->dst.error = -ENETUNREACH;
1355 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1356 goto install_route;
1357 }
1358
1359 if (cfg->fc_flags & RTF_GATEWAY) {
1360 const struct in6_addr *gw_addr;
1361 int gwa_type;
1362
1363 gw_addr = &cfg->fc_gateway;
1364 rt->rt6i_gateway = *gw_addr;
1365 gwa_type = ipv6_addr_type(gw_addr);
1366
1367 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1368 struct rt6_info *grt;
1369
1370 /* IPv6 strictly inhibits using not link-local
1371 addresses as nexthop address.
1372 Otherwise, router will not able to send redirects.
1373 It is very good, but in some (rare!) circumstances
1374 (SIT, PtP, NBMA NOARP links) it is handy to allow
1375 some exceptions. --ANK
1376 */
1377 err = -EINVAL;
1378 if (!(gwa_type & IPV6_ADDR_UNICAST))
1379 goto out;
1380
1381 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1382
1383 err = -EHOSTUNREACH;
1384 if (!grt)
1385 goto out;
1386 if (dev) {
1387 if (dev != grt->dst.dev) {
1388 dst_release(&grt->dst);
1389 goto out;
1390 }
1391 } else {
1392 dev = grt->dst.dev;
1393 idev = grt->rt6i_idev;
1394 dev_hold(dev);
1395 in6_dev_hold(grt->rt6i_idev);
1396 }
1397 if (!(grt->rt6i_flags & RTF_GATEWAY))
1398 err = 0;
1399 dst_release(&grt->dst);
1400
1401 if (err)
1402 goto out;
1403 }
1404 err = -EINVAL;
1405 if (!dev || (dev->flags & IFF_LOOPBACK))
1406 goto out;
1407 }
1408
1409 err = -ENODEV;
1410 if (!dev)
1411 goto out;
1412
1413 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1414 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1415 err = -EINVAL;
1416 goto out;
1417 }
1418 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1419 rt->rt6i_prefsrc.plen = 128;
1420 } else
1421 rt->rt6i_prefsrc.plen = 0;
1422
1423 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1424 err = rt6_bind_neighbour(rt, dev);
1425 if (err)
1426 goto out;
1427 }
1428
1429 rt->rt6i_flags = cfg->fc_flags;
1430
1431 install_route:
1432 if (cfg->fc_mx) {
1433 struct nlattr *nla;
1434 int remaining;
1435
1436 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1437 int type = nla_type(nla);
1438
1439 if (type) {
1440 if (type > RTAX_MAX) {
1441 err = -EINVAL;
1442 goto out;
1443 }
1444
1445 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1446 }
1447 }
1448 }
1449
1450 rt->dst.dev = dev;
1451 rt->rt6i_idev = idev;
1452 rt->rt6i_table = table;
1453
1454 cfg->fc_nlinfo.nl_net = dev_net(dev);
1455
1456 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1457
1458 out:
1459 if (dev)
1460 dev_put(dev);
1461 if (idev)
1462 in6_dev_put(idev);
1463 if (rt)
1464 dst_free(&rt->dst);
1465 return err;
1466 }
1467
1468 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1469 {
1470 int err;
1471 struct fib6_table *table;
1472 struct net *net = dev_net(rt->dst.dev);
1473
1474 if (rt == net->ipv6.ip6_null_entry)
1475 return -ENOENT;
1476
1477 table = rt->rt6i_table;
1478 write_lock_bh(&table->tb6_lock);
1479
1480 err = fib6_del(rt, info);
1481 dst_release(&rt->dst);
1482
1483 write_unlock_bh(&table->tb6_lock);
1484
1485 return err;
1486 }
1487
1488 int ip6_del_rt(struct rt6_info *rt)
1489 {
1490 struct nl_info info = {
1491 .nl_net = dev_net(rt->dst.dev),
1492 };
1493 return __ip6_del_rt(rt, &info);
1494 }
1495
1496 static int ip6_route_del(struct fib6_config *cfg)
1497 {
1498 struct fib6_table *table;
1499 struct fib6_node *fn;
1500 struct rt6_info *rt;
1501 int err = -ESRCH;
1502
1503 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1504 if (!table)
1505 return err;
1506
1507 read_lock_bh(&table->tb6_lock);
1508
1509 fn = fib6_locate(&table->tb6_root,
1510 &cfg->fc_dst, cfg->fc_dst_len,
1511 &cfg->fc_src, cfg->fc_src_len);
1512
1513 if (fn) {
1514 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1515 if (cfg->fc_ifindex &&
1516 (!rt->dst.dev ||
1517 rt->dst.dev->ifindex != cfg->fc_ifindex))
1518 continue;
1519 if (cfg->fc_flags & RTF_GATEWAY &&
1520 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1521 continue;
1522 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1523 continue;
1524 dst_hold(&rt->dst);
1525 read_unlock_bh(&table->tb6_lock);
1526
1527 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1528 }
1529 }
1530 read_unlock_bh(&table->tb6_lock);
1531
1532 return err;
1533 }
1534
1535 /*
1536 * Handle redirects
1537 */
1538 struct ip6rd_flowi {
1539 struct flowi6 fl6;
1540 struct in6_addr gateway;
1541 };
1542
1543 static struct rt6_info *__ip6_route_redirect(struct net *net,
1544 struct fib6_table *table,
1545 struct flowi6 *fl6,
1546 int flags)
1547 {
1548 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1549 struct rt6_info *rt;
1550 struct fib6_node *fn;
1551
1552 /*
1553 * Get the "current" route for this destination and
1554 * check if the redirect has come from approriate router.
1555 *
1556 * RFC 2461 specifies that redirects should only be
1557 * accepted if they come from the nexthop to the target.
1558 * Due to the way the routes are chosen, this notion
1559 * is a bit fuzzy and one might need to check all possible
1560 * routes.
1561 */
1562
1563 read_lock_bh(&table->tb6_lock);
1564 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1565 restart:
1566 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1567 /*
1568 * Current route is on-link; redirect is always invalid.
1569 *
1570 * Seems, previous statement is not true. It could
1571 * be node, which looks for us as on-link (f.e. proxy ndisc)
1572 * But then router serving it might decide, that we should
1573 * know truth 8)8) --ANK (980726).
1574 */
1575 if (rt6_check_expired(rt))
1576 continue;
1577 if (!(rt->rt6i_flags & RTF_GATEWAY))
1578 continue;
1579 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1580 continue;
1581 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1582 continue;
1583 break;
1584 }
1585
1586 if (!rt)
1587 rt = net->ipv6.ip6_null_entry;
1588 BACKTRACK(net, &fl6->saddr);
1589 out:
1590 dst_hold(&rt->dst);
1591
1592 read_unlock_bh(&table->tb6_lock);
1593
1594 return rt;
1595 };
1596
1597 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1598 const struct in6_addr *src,
1599 const struct in6_addr *gateway,
1600 struct net_device *dev)
1601 {
1602 int flags = RT6_LOOKUP_F_HAS_SADDR;
1603 struct net *net = dev_net(dev);
1604 struct ip6rd_flowi rdfl = {
1605 .fl6 = {
1606 .flowi6_oif = dev->ifindex,
1607 .daddr = *dest,
1608 .saddr = *src,
1609 },
1610 };
1611
1612 rdfl.gateway = *gateway;
1613
1614 if (rt6_need_strict(dest))
1615 flags |= RT6_LOOKUP_F_IFACE;
1616
1617 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1618 flags, __ip6_route_redirect);
1619 }
1620
1621 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1622 const struct in6_addr *saddr,
1623 struct neighbour *neigh, u8 *lladdr, int on_link)
1624 {
1625 struct rt6_info *rt, *nrt = NULL;
1626 struct netevent_redirect netevent;
1627 struct net *net = dev_net(neigh->dev);
1628
1629 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1630
1631 if (rt == net->ipv6.ip6_null_entry) {
1632 if (net_ratelimit())
1633 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1634 "for redirect target\n");
1635 goto out;
1636 }
1637
1638 /*
1639 * We have finally decided to accept it.
1640 */
1641
1642 neigh_update(neigh, lladdr, NUD_STALE,
1643 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1644 NEIGH_UPDATE_F_OVERRIDE|
1645 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1646 NEIGH_UPDATE_F_ISROUTER))
1647 );
1648
1649 /*
1650 * Redirect received -> path was valid.
1651 * Look, redirects are sent only in response to data packets,
1652 * so that this nexthop apparently is reachable. --ANK
1653 */
1654 dst_confirm(&rt->dst);
1655
1656 /* Duplicate redirect: silently ignore. */
1657 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1658 goto out;
1659
1660 nrt = ip6_rt_copy(rt, dest);
1661 if (!nrt)
1662 goto out;
1663
1664 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1665 if (on_link)
1666 nrt->rt6i_flags &= ~RTF_GATEWAY;
1667
1668 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1669 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1670
1671 if (ip6_ins_rt(nrt))
1672 goto out;
1673
1674 netevent.old = &rt->dst;
1675 netevent.new = &nrt->dst;
1676 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1677
1678 if (rt->rt6i_flags & RTF_CACHE) {
1679 ip6_del_rt(rt);
1680 return;
1681 }
1682
1683 out:
1684 dst_release(&rt->dst);
1685 }
1686
1687 /*
1688 * Handle ICMP "packet too big" messages
1689 * i.e. Path MTU discovery
1690 */
1691
1692 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1693 struct net *net, u32 pmtu, int ifindex)
1694 {
1695 struct rt6_info *rt, *nrt;
1696 int allfrag = 0;
1697 again:
1698 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1699 if (!rt)
1700 return;
1701
1702 if (rt6_check_expired(rt)) {
1703 ip6_del_rt(rt);
1704 goto again;
1705 }
1706
1707 if (pmtu >= dst_mtu(&rt->dst))
1708 goto out;
1709
1710 if (pmtu < IPV6_MIN_MTU) {
1711 /*
1712 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1713 * MTU (1280) and a fragment header should always be included
1714 * after a node receiving Too Big message reporting PMTU is
1715 * less than the IPv6 Minimum Link MTU.
1716 */
1717 pmtu = IPV6_MIN_MTU;
1718 allfrag = 1;
1719 }
1720
1721 /* New mtu received -> path was valid.
1722 They are sent only in response to data packets,
1723 so that this nexthop apparently is reachable. --ANK
1724 */
1725 dst_confirm(&rt->dst);
1726
1727 /* Host route. If it is static, it would be better
1728 not to override it, but add new one, so that
1729 when cache entry will expire old pmtu
1730 would return automatically.
1731 */
1732 if (rt->rt6i_flags & RTF_CACHE) {
1733 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1734 if (allfrag) {
1735 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1736 features |= RTAX_FEATURE_ALLFRAG;
1737 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1738 }
1739 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1740 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1741 goto out;
1742 }
1743
1744 /* Network route.
1745 Two cases are possible:
1746 1. It is connected route. Action: COW
1747 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1748 */
1749 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1750 nrt = rt6_alloc_cow(rt, daddr, saddr);
1751 else
1752 nrt = rt6_alloc_clone(rt, daddr);
1753
1754 if (nrt) {
1755 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1756 if (allfrag) {
1757 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1758 features |= RTAX_FEATURE_ALLFRAG;
1759 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1760 }
1761
1762 /* According to RFC 1981, detecting PMTU increase shouldn't be
1763 * happened within 5 mins, the recommended timer is 10 mins.
1764 * Here this route expiration time is set to ip6_rt_mtu_expires
1765 * which is 10 mins. After 10 mins the decreased pmtu is expired
1766 * and detecting PMTU increase will be automatically happened.
1767 */
1768 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1769 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1770
1771 ip6_ins_rt(nrt);
1772 }
1773 out:
1774 dst_release(&rt->dst);
1775 }
1776
1777 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1778 struct net_device *dev, u32 pmtu)
1779 {
1780 struct net *net = dev_net(dev);
1781
1782 /*
1783 * RFC 1981 states that a node "MUST reduce the size of the packets it
1784 * is sending along the path" that caused the Packet Too Big message.
1785 * Since it's not possible in the general case to determine which
1786 * interface was used to send the original packet, we update the MTU
1787 * on the interface that will be used to send future packets. We also
1788 * update the MTU on the interface that received the Packet Too Big in
1789 * case the original packet was forced out that interface with
1790 * SO_BINDTODEVICE or similar. This is the next best thing to the
1791 * correct behaviour, which would be to update the MTU on all
1792 * interfaces.
1793 */
1794 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1795 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1796 }
1797
1798 /*
1799 * Misc support functions
1800 */
1801
1802 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1803 const struct in6_addr *dest)
1804 {
1805 struct net *net = dev_net(ort->dst.dev);
1806 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1807 ort->dst.dev, 0);
1808
1809 if (rt) {
1810 rt->dst.input = ort->dst.input;
1811 rt->dst.output = ort->dst.output;
1812 rt->dst.flags |= DST_HOST;
1813
1814 rt->rt6i_dst.addr = *dest;
1815 rt->rt6i_dst.plen = 128;
1816 dst_copy_metrics(&rt->dst, &ort->dst);
1817 rt->dst.error = ort->dst.error;
1818 rt->rt6i_idev = ort->rt6i_idev;
1819 if (rt->rt6i_idev)
1820 in6_dev_hold(rt->rt6i_idev);
1821 rt->dst.lastuse = jiffies;
1822 rt->dst.expires = 0;
1823
1824 rt->rt6i_gateway = ort->rt6i_gateway;
1825 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1826 rt->rt6i_metric = 0;
1827
1828 #ifdef CONFIG_IPV6_SUBTREES
1829 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1830 #endif
1831 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1832 rt->rt6i_table = ort->rt6i_table;
1833 }
1834 return rt;
1835 }
1836
1837 #ifdef CONFIG_IPV6_ROUTE_INFO
1838 static struct rt6_info *rt6_get_route_info(struct net *net,
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex)
1841 {
1842 struct fib6_node *fn;
1843 struct rt6_info *rt = NULL;
1844 struct fib6_table *table;
1845
1846 table = fib6_get_table(net, RT6_TABLE_INFO);
1847 if (!table)
1848 return NULL;
1849
1850 write_lock_bh(&table->tb6_lock);
1851 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1852 if (!fn)
1853 goto out;
1854
1855 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1856 if (rt->dst.dev->ifindex != ifindex)
1857 continue;
1858 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1859 continue;
1860 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1861 continue;
1862 dst_hold(&rt->dst);
1863 break;
1864 }
1865 out:
1866 write_unlock_bh(&table->tb6_lock);
1867 return rt;
1868 }
1869
1870 static struct rt6_info *rt6_add_route_info(struct net *net,
1871 const struct in6_addr *prefix, int prefixlen,
1872 const struct in6_addr *gwaddr, int ifindex,
1873 unsigned pref)
1874 {
1875 struct fib6_config cfg = {
1876 .fc_table = RT6_TABLE_INFO,
1877 .fc_metric = IP6_RT_PRIO_USER,
1878 .fc_ifindex = ifindex,
1879 .fc_dst_len = prefixlen,
1880 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1881 RTF_UP | RTF_PREF(pref),
1882 .fc_nlinfo.pid = 0,
1883 .fc_nlinfo.nlh = NULL,
1884 .fc_nlinfo.nl_net = net,
1885 };
1886
1887 cfg.fc_dst = *prefix;
1888 cfg.fc_gateway = *gwaddr;
1889
1890 /* We should treat it as a default route if prefix length is 0. */
1891 if (!prefixlen)
1892 cfg.fc_flags |= RTF_DEFAULT;
1893
1894 ip6_route_add(&cfg);
1895
1896 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1897 }
1898 #endif
1899
1900 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1901 {
1902 struct rt6_info *rt;
1903 struct fib6_table *table;
1904
1905 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1906 if (!table)
1907 return NULL;
1908
1909 write_lock_bh(&table->tb6_lock);
1910 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1911 if (dev == rt->dst.dev &&
1912 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1913 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1914 break;
1915 }
1916 if (rt)
1917 dst_hold(&rt->dst);
1918 write_unlock_bh(&table->tb6_lock);
1919 return rt;
1920 }
1921
1922 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1923 struct net_device *dev,
1924 unsigned int pref)
1925 {
1926 struct fib6_config cfg = {
1927 .fc_table = RT6_TABLE_DFLT,
1928 .fc_metric = IP6_RT_PRIO_USER,
1929 .fc_ifindex = dev->ifindex,
1930 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1931 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1932 .fc_nlinfo.pid = 0,
1933 .fc_nlinfo.nlh = NULL,
1934 .fc_nlinfo.nl_net = dev_net(dev),
1935 };
1936
1937 cfg.fc_gateway = *gwaddr;
1938
1939 ip6_route_add(&cfg);
1940
1941 return rt6_get_dflt_router(gwaddr, dev);
1942 }
1943
1944 void rt6_purge_dflt_routers(struct net *net)
1945 {
1946 struct rt6_info *rt;
1947 struct fib6_table *table;
1948
1949 /* NOTE: Keep consistent with rt6_get_dflt_router */
1950 table = fib6_get_table(net, RT6_TABLE_DFLT);
1951 if (!table)
1952 return;
1953
1954 restart:
1955 read_lock_bh(&table->tb6_lock);
1956 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1957 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1958 dst_hold(&rt->dst);
1959 read_unlock_bh(&table->tb6_lock);
1960 ip6_del_rt(rt);
1961 goto restart;
1962 }
1963 }
1964 read_unlock_bh(&table->tb6_lock);
1965 }
1966
1967 static void rtmsg_to_fib6_config(struct net *net,
1968 struct in6_rtmsg *rtmsg,
1969 struct fib6_config *cfg)
1970 {
1971 memset(cfg, 0, sizeof(*cfg));
1972
1973 cfg->fc_table = RT6_TABLE_MAIN;
1974 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1975 cfg->fc_metric = rtmsg->rtmsg_metric;
1976 cfg->fc_expires = rtmsg->rtmsg_info;
1977 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1978 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1979 cfg->fc_flags = rtmsg->rtmsg_flags;
1980
1981 cfg->fc_nlinfo.nl_net = net;
1982
1983 cfg->fc_dst = rtmsg->rtmsg_dst;
1984 cfg->fc_src = rtmsg->rtmsg_src;
1985 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1986 }
1987
1988 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1989 {
1990 struct fib6_config cfg;
1991 struct in6_rtmsg rtmsg;
1992 int err;
1993
1994 switch(cmd) {
1995 case SIOCADDRT: /* Add a route */
1996 case SIOCDELRT: /* Delete a route */
1997 if (!capable(CAP_NET_ADMIN))
1998 return -EPERM;
1999 err = copy_from_user(&rtmsg, arg,
2000 sizeof(struct in6_rtmsg));
2001 if (err)
2002 return -EFAULT;
2003
2004 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2005
2006 rtnl_lock();
2007 switch (cmd) {
2008 case SIOCADDRT:
2009 err = ip6_route_add(&cfg);
2010 break;
2011 case SIOCDELRT:
2012 err = ip6_route_del(&cfg);
2013 break;
2014 default:
2015 err = -EINVAL;
2016 }
2017 rtnl_unlock();
2018
2019 return err;
2020 }
2021
2022 return -EINVAL;
2023 }
2024
2025 /*
2026 * Drop the packet on the floor
2027 */
2028
2029 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2030 {
2031 int type;
2032 struct dst_entry *dst = skb_dst(skb);
2033 switch (ipstats_mib_noroutes) {
2034 case IPSTATS_MIB_INNOROUTES:
2035 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2036 if (type == IPV6_ADDR_ANY) {
2037 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2038 IPSTATS_MIB_INADDRERRORS);
2039 break;
2040 }
2041 /* FALLTHROUGH */
2042 case IPSTATS_MIB_OUTNOROUTES:
2043 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2044 ipstats_mib_noroutes);
2045 break;
2046 }
2047 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2048 kfree_skb(skb);
2049 return 0;
2050 }
2051
2052 static int ip6_pkt_discard(struct sk_buff *skb)
2053 {
2054 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2055 }
2056
2057 static int ip6_pkt_discard_out(struct sk_buff *skb)
2058 {
2059 skb->dev = skb_dst(skb)->dev;
2060 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2061 }
2062
2063 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2064
2065 static int ip6_pkt_prohibit(struct sk_buff *skb)
2066 {
2067 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2068 }
2069
2070 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2071 {
2072 skb->dev = skb_dst(skb)->dev;
2073 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2074 }
2075
2076 #endif
2077
2078 /*
2079 * Allocate a dst for local (unicast / anycast) address.
2080 */
2081
2082 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2083 const struct in6_addr *addr,
2084 bool anycast)
2085 {
2086 struct net *net = dev_net(idev->dev);
2087 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2088 net->loopback_dev, 0);
2089 int err;
2090
2091 if (!rt) {
2092 if (net_ratelimit())
2093 pr_warning("IPv6: Maximum number of routes reached,"
2094 " consider increasing route/max_size.\n");
2095 return ERR_PTR(-ENOMEM);
2096 }
2097
2098 in6_dev_hold(idev);
2099
2100 rt->dst.flags |= DST_HOST;
2101 rt->dst.input = ip6_input;
2102 rt->dst.output = ip6_output;
2103 rt->rt6i_idev = idev;
2104 rt->dst.obsolete = -1;
2105
2106 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2107 if (anycast)
2108 rt->rt6i_flags |= RTF_ANYCAST;
2109 else
2110 rt->rt6i_flags |= RTF_LOCAL;
2111 err = rt6_bind_neighbour(rt, rt->dst.dev);
2112 if (err) {
2113 dst_free(&rt->dst);
2114 return ERR_PTR(err);
2115 }
2116
2117 rt->rt6i_dst.addr = *addr;
2118 rt->rt6i_dst.plen = 128;
2119 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2120
2121 atomic_set(&rt->dst.__refcnt, 1);
2122
2123 return rt;
2124 }
2125
2126 int ip6_route_get_saddr(struct net *net,
2127 struct rt6_info *rt,
2128 const struct in6_addr *daddr,
2129 unsigned int prefs,
2130 struct in6_addr *saddr)
2131 {
2132 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2133 int err = 0;
2134 if (rt->rt6i_prefsrc.plen)
2135 *saddr = rt->rt6i_prefsrc.addr;
2136 else
2137 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2138 daddr, prefs, saddr);
2139 return err;
2140 }
2141
2142 /* remove deleted ip from prefsrc entries */
2143 struct arg_dev_net_ip {
2144 struct net_device *dev;
2145 struct net *net;
2146 struct in6_addr *addr;
2147 };
2148
2149 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2150 {
2151 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2152 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2153 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2154
2155 if (((void *)rt->dst.dev == dev || !dev) &&
2156 rt != net->ipv6.ip6_null_entry &&
2157 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2158 /* remove prefsrc entry */
2159 rt->rt6i_prefsrc.plen = 0;
2160 }
2161 return 0;
2162 }
2163
2164 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2165 {
2166 struct net *net = dev_net(ifp->idev->dev);
2167 struct arg_dev_net_ip adni = {
2168 .dev = ifp->idev->dev,
2169 .net = net,
2170 .addr = &ifp->addr,
2171 };
2172 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2173 }
2174
2175 struct arg_dev_net {
2176 struct net_device *dev;
2177 struct net *net;
2178 };
2179
2180 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2181 {
2182 const struct arg_dev_net *adn = arg;
2183 const struct net_device *dev = adn->dev;
2184
2185 if ((rt->dst.dev == dev || !dev) &&
2186 rt != adn->net->ipv6.ip6_null_entry)
2187 return -1;
2188
2189 return 0;
2190 }
2191
2192 void rt6_ifdown(struct net *net, struct net_device *dev)
2193 {
2194 struct arg_dev_net adn = {
2195 .dev = dev,
2196 .net = net,
2197 };
2198
2199 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2200 icmp6_clean_all(fib6_ifdown, &adn);
2201 }
2202
2203 struct rt6_mtu_change_arg
2204 {
2205 struct net_device *dev;
2206 unsigned mtu;
2207 };
2208
2209 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2210 {
2211 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2212 struct inet6_dev *idev;
2213
2214 /* In IPv6 pmtu discovery is not optional,
2215 so that RTAX_MTU lock cannot disable it.
2216 We still use this lock to block changes
2217 caused by addrconf/ndisc.
2218 */
2219
2220 idev = __in6_dev_get(arg->dev);
2221 if (!idev)
2222 return 0;
2223
2224 /* For administrative MTU increase, there is no way to discover
2225 IPv6 PMTU increase, so PMTU increase should be updated here.
2226 Since RFC 1981 doesn't include administrative MTU increase
2227 update PMTU increase is a MUST. (i.e. jumbo frame)
2228 */
2229 /*
2230 If new MTU is less than route PMTU, this new MTU will be the
2231 lowest MTU in the path, update the route PMTU to reflect PMTU
2232 decreases; if new MTU is greater than route PMTU, and the
2233 old MTU is the lowest MTU in the path, update the route PMTU
2234 to reflect the increase. In this case if the other nodes' MTU
2235 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2236 PMTU discouvery.
2237 */
2238 if (rt->dst.dev == arg->dev &&
2239 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2240 (dst_mtu(&rt->dst) >= arg->mtu ||
2241 (dst_mtu(&rt->dst) < arg->mtu &&
2242 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2243 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2244 }
2245 return 0;
2246 }
2247
2248 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2249 {
2250 struct rt6_mtu_change_arg arg = {
2251 .dev = dev,
2252 .mtu = mtu,
2253 };
2254
2255 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2256 }
2257
2258 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2259 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2260 [RTA_OIF] = { .type = NLA_U32 },
2261 [RTA_IIF] = { .type = NLA_U32 },
2262 [RTA_PRIORITY] = { .type = NLA_U32 },
2263 [RTA_METRICS] = { .type = NLA_NESTED },
2264 };
2265
2266 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2267 struct fib6_config *cfg)
2268 {
2269 struct rtmsg *rtm;
2270 struct nlattr *tb[RTA_MAX+1];
2271 int err;
2272
2273 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2274 if (err < 0)
2275 goto errout;
2276
2277 err = -EINVAL;
2278 rtm = nlmsg_data(nlh);
2279 memset(cfg, 0, sizeof(*cfg));
2280
2281 cfg->fc_table = rtm->rtm_table;
2282 cfg->fc_dst_len = rtm->rtm_dst_len;
2283 cfg->fc_src_len = rtm->rtm_src_len;
2284 cfg->fc_flags = RTF_UP;
2285 cfg->fc_protocol = rtm->rtm_protocol;
2286
2287 if (rtm->rtm_type == RTN_UNREACHABLE)
2288 cfg->fc_flags |= RTF_REJECT;
2289
2290 if (rtm->rtm_type == RTN_LOCAL)
2291 cfg->fc_flags |= RTF_LOCAL;
2292
2293 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2294 cfg->fc_nlinfo.nlh = nlh;
2295 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2296
2297 if (tb[RTA_GATEWAY]) {
2298 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2299 cfg->fc_flags |= RTF_GATEWAY;
2300 }
2301
2302 if (tb[RTA_DST]) {
2303 int plen = (rtm->rtm_dst_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_DST]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2309 }
2310
2311 if (tb[RTA_SRC]) {
2312 int plen = (rtm->rtm_src_len + 7) >> 3;
2313
2314 if (nla_len(tb[RTA_SRC]) < plen)
2315 goto errout;
2316
2317 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2318 }
2319
2320 if (tb[RTA_PREFSRC])
2321 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2322
2323 if (tb[RTA_OIF])
2324 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2325
2326 if (tb[RTA_PRIORITY])
2327 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2328
2329 if (tb[RTA_METRICS]) {
2330 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2331 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2332 }
2333
2334 if (tb[RTA_TABLE])
2335 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2336
2337 err = 0;
2338 errout:
2339 return err;
2340 }
2341
2342 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2343 {
2344 struct fib6_config cfg;
2345 int err;
2346
2347 err = rtm_to_fib6_config(skb, nlh, &cfg);
2348 if (err < 0)
2349 return err;
2350
2351 return ip6_route_del(&cfg);
2352 }
2353
2354 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2355 {
2356 struct fib6_config cfg;
2357 int err;
2358
2359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_add(&cfg);
2364 }
2365
2366 static inline size_t rt6_nlmsg_size(void)
2367 {
2368 return NLMSG_ALIGN(sizeof(struct rtmsg))
2369 + nla_total_size(16) /* RTA_SRC */
2370 + nla_total_size(16) /* RTA_DST */
2371 + nla_total_size(16) /* RTA_GATEWAY */
2372 + nla_total_size(16) /* RTA_PREFSRC */
2373 + nla_total_size(4) /* RTA_TABLE */
2374 + nla_total_size(4) /* RTA_IIF */
2375 + nla_total_size(4) /* RTA_OIF */
2376 + nla_total_size(4) /* RTA_PRIORITY */
2377 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2378 + nla_total_size(sizeof(struct rta_cacheinfo));
2379 }
2380
2381 static int rt6_fill_node(struct net *net,
2382 struct sk_buff *skb, struct rt6_info *rt,
2383 struct in6_addr *dst, struct in6_addr *src,
2384 int iif, int type, u32 pid, u32 seq,
2385 int prefix, int nowait, unsigned int flags)
2386 {
2387 const struct inet_peer *peer;
2388 struct rtmsg *rtm;
2389 struct nlmsghdr *nlh;
2390 long expires;
2391 u32 table;
2392 struct neighbour *n;
2393 u32 ts, tsage;
2394
2395 if (prefix) { /* user wants prefix routes only */
2396 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2397 /* success since this is not a prefix route */
2398 return 1;
2399 }
2400 }
2401
2402 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2403 if (!nlh)
2404 return -EMSGSIZE;
2405
2406 rtm = nlmsg_data(nlh);
2407 rtm->rtm_family = AF_INET6;
2408 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2409 rtm->rtm_src_len = rt->rt6i_src.plen;
2410 rtm->rtm_tos = 0;
2411 if (rt->rt6i_table)
2412 table = rt->rt6i_table->tb6_id;
2413 else
2414 table = RT6_TABLE_UNSPEC;
2415 rtm->rtm_table = table;
2416 if (nla_put_u32(skb, RTA_TABLE, table))
2417 goto nla_put_failure;
2418 if (rt->rt6i_flags & RTF_REJECT)
2419 rtm->rtm_type = RTN_UNREACHABLE;
2420 else if (rt->rt6i_flags & RTF_LOCAL)
2421 rtm->rtm_type = RTN_LOCAL;
2422 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2423 rtm->rtm_type = RTN_LOCAL;
2424 else
2425 rtm->rtm_type = RTN_UNICAST;
2426 rtm->rtm_flags = 0;
2427 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2428 rtm->rtm_protocol = rt->rt6i_protocol;
2429 if (rt->rt6i_flags & RTF_DYNAMIC)
2430 rtm->rtm_protocol = RTPROT_REDIRECT;
2431 else if (rt->rt6i_flags & RTF_ADDRCONF)
2432 rtm->rtm_protocol = RTPROT_KERNEL;
2433 else if (rt->rt6i_flags & RTF_DEFAULT)
2434 rtm->rtm_protocol = RTPROT_RA;
2435
2436 if (rt->rt6i_flags & RTF_CACHE)
2437 rtm->rtm_flags |= RTM_F_CLONED;
2438
2439 if (dst) {
2440 if (nla_put(skb, RTA_DST, 16, dst))
2441 goto nla_put_failure;
2442 rtm->rtm_dst_len = 128;
2443 } else if (rtm->rtm_dst_len)
2444 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2445 goto nla_put_failure;
2446 #ifdef CONFIG_IPV6_SUBTREES
2447 if (src) {
2448 if (nla_put(skb, RTA_SRC, 16, src))
2449 goto nla_put_failure;
2450 rtm->rtm_src_len = 128;
2451 } else if (rtm->rtm_src_len &&
2452 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2453 goto nla_put_failure;
2454 #endif
2455 if (iif) {
2456 #ifdef CONFIG_IPV6_MROUTE
2457 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2458 int err = ip6mr_get_route(net, skb, rtm, nowait);
2459 if (err <= 0) {
2460 if (!nowait) {
2461 if (err == 0)
2462 return 0;
2463 goto nla_put_failure;
2464 } else {
2465 if (err == -EMSGSIZE)
2466 goto nla_put_failure;
2467 }
2468 }
2469 } else
2470 #endif
2471 if (nla_put_u32(skb, RTA_IIF, iif))
2472 goto nla_put_failure;
2473 } else if (dst) {
2474 struct in6_addr saddr_buf;
2475 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2476 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2477 goto nla_put_failure;
2478 }
2479
2480 if (rt->rt6i_prefsrc.plen) {
2481 struct in6_addr saddr_buf;
2482 saddr_buf = rt->rt6i_prefsrc.addr;
2483 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2484 goto nla_put_failure;
2485 }
2486
2487 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2488 goto nla_put_failure;
2489
2490 rcu_read_lock();
2491 n = dst_get_neighbour_noref(&rt->dst);
2492 if (n) {
2493 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2494 rcu_read_unlock();
2495 goto nla_put_failure;
2496 }
2497 }
2498 rcu_read_unlock();
2499
2500 if (rt->dst.dev &&
2501 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2502 goto nla_put_failure;
2503 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2504 goto nla_put_failure;
2505 if (!(rt->rt6i_flags & RTF_EXPIRES))
2506 expires = 0;
2507 else if (rt->dst.expires - jiffies < INT_MAX)
2508 expires = rt->dst.expires - jiffies;
2509 else
2510 expires = INT_MAX;
2511
2512 peer = rt->rt6i_peer;
2513 ts = tsage = 0;
2514 if (peer && peer->tcp_ts_stamp) {
2515 ts = peer->tcp_ts;
2516 tsage = get_seconds() - peer->tcp_ts_stamp;
2517 }
2518
2519 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2520 expires, rt->dst.error) < 0)
2521 goto nla_put_failure;
2522
2523 return nlmsg_end(skb, nlh);
2524
2525 nla_put_failure:
2526 nlmsg_cancel(skb, nlh);
2527 return -EMSGSIZE;
2528 }
2529
2530 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2531 {
2532 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2533 int prefix;
2534
2535 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2536 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2537 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2538 } else
2539 prefix = 0;
2540
2541 return rt6_fill_node(arg->net,
2542 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2543 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2544 prefix, 0, NLM_F_MULTI);
2545 }
2546
2547 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2548 {
2549 struct net *net = sock_net(in_skb->sk);
2550 struct nlattr *tb[RTA_MAX+1];
2551 struct rt6_info *rt;
2552 struct sk_buff *skb;
2553 struct rtmsg *rtm;
2554 struct flowi6 fl6;
2555 int err, iif = 0, oif = 0;
2556
2557 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2558 if (err < 0)
2559 goto errout;
2560
2561 err = -EINVAL;
2562 memset(&fl6, 0, sizeof(fl6));
2563
2564 if (tb[RTA_SRC]) {
2565 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2566 goto errout;
2567
2568 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2569 }
2570
2571 if (tb[RTA_DST]) {
2572 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2573 goto errout;
2574
2575 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2576 }
2577
2578 if (tb[RTA_IIF])
2579 iif = nla_get_u32(tb[RTA_IIF]);
2580
2581 if (tb[RTA_OIF])
2582 oif = nla_get_u32(tb[RTA_OIF]);
2583
2584 if (iif) {
2585 struct net_device *dev;
2586 int flags = 0;
2587
2588 dev = __dev_get_by_index(net, iif);
2589 if (!dev) {
2590 err = -ENODEV;
2591 goto errout;
2592 }
2593
2594 fl6.flowi6_iif = iif;
2595
2596 if (!ipv6_addr_any(&fl6.saddr))
2597 flags |= RT6_LOOKUP_F_HAS_SADDR;
2598
2599 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2600 flags);
2601 } else {
2602 fl6.flowi6_oif = oif;
2603
2604 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2605 }
2606
2607 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2608 if (!skb) {
2609 dst_release(&rt->dst);
2610 err = -ENOBUFS;
2611 goto errout;
2612 }
2613
2614 /* Reserve room for dummy headers, this skb can pass
2615 through good chunk of routing engine.
2616 */
2617 skb_reset_mac_header(skb);
2618 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2619
2620 skb_dst_set(skb, &rt->dst);
2621
2622 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2623 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2624 nlh->nlmsg_seq, 0, 0, 0);
2625 if (err < 0) {
2626 kfree_skb(skb);
2627 goto errout;
2628 }
2629
2630 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2631 errout:
2632 return err;
2633 }
2634
2635 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2636 {
2637 struct sk_buff *skb;
2638 struct net *net = info->nl_net;
2639 u32 seq;
2640 int err;
2641
2642 err = -ENOBUFS;
2643 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2644
2645 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2646 if (!skb)
2647 goto errout;
2648
2649 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2650 event, info->pid, seq, 0, 0, 0);
2651 if (err < 0) {
2652 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2653 WARN_ON(err == -EMSGSIZE);
2654 kfree_skb(skb);
2655 goto errout;
2656 }
2657 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2658 info->nlh, gfp_any());
2659 return;
2660 errout:
2661 if (err < 0)
2662 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2663 }
2664
2665 static int ip6_route_dev_notify(struct notifier_block *this,
2666 unsigned long event, void *data)
2667 {
2668 struct net_device *dev = (struct net_device *)data;
2669 struct net *net = dev_net(dev);
2670
2671 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2672 net->ipv6.ip6_null_entry->dst.dev = dev;
2673 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2674 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2675 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2676 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2677 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2678 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2679 #endif
2680 }
2681
2682 return NOTIFY_OK;
2683 }
2684
2685 /*
2686 * /proc
2687 */
2688
2689 #ifdef CONFIG_PROC_FS
2690
2691 struct rt6_proc_arg
2692 {
2693 char *buffer;
2694 int offset;
2695 int length;
2696 int skip;
2697 int len;
2698 };
2699
2700 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2701 {
2702 struct seq_file *m = p_arg;
2703 struct neighbour *n;
2704
2705 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2706
2707 #ifdef CONFIG_IPV6_SUBTREES
2708 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2709 #else
2710 seq_puts(m, "00000000000000000000000000000000 00 ");
2711 #endif
2712 rcu_read_lock();
2713 n = dst_get_neighbour_noref(&rt->dst);
2714 if (n) {
2715 seq_printf(m, "%pi6", n->primary_key);
2716 } else {
2717 seq_puts(m, "00000000000000000000000000000000");
2718 }
2719 rcu_read_unlock();
2720 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2721 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2722 rt->dst.__use, rt->rt6i_flags,
2723 rt->dst.dev ? rt->dst.dev->name : "");
2724 return 0;
2725 }
2726
2727 static int ipv6_route_show(struct seq_file *m, void *v)
2728 {
2729 struct net *net = (struct net *)m->private;
2730 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2731 return 0;
2732 }
2733
2734 static int ipv6_route_open(struct inode *inode, struct file *file)
2735 {
2736 return single_open_net(inode, file, ipv6_route_show);
2737 }
2738
2739 static const struct file_operations ipv6_route_proc_fops = {
2740 .owner = THIS_MODULE,
2741 .open = ipv6_route_open,
2742 .read = seq_read,
2743 .llseek = seq_lseek,
2744 .release = single_release_net,
2745 };
2746
2747 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2748 {
2749 struct net *net = (struct net *)seq->private;
2750 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2751 net->ipv6.rt6_stats->fib_nodes,
2752 net->ipv6.rt6_stats->fib_route_nodes,
2753 net->ipv6.rt6_stats->fib_rt_alloc,
2754 net->ipv6.rt6_stats->fib_rt_entries,
2755 net->ipv6.rt6_stats->fib_rt_cache,
2756 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2757 net->ipv6.rt6_stats->fib_discarded_routes);
2758
2759 return 0;
2760 }
2761
2762 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2763 {
2764 return single_open_net(inode, file, rt6_stats_seq_show);
2765 }
2766
2767 static const struct file_operations rt6_stats_seq_fops = {
2768 .owner = THIS_MODULE,
2769 .open = rt6_stats_seq_open,
2770 .read = seq_read,
2771 .llseek = seq_lseek,
2772 .release = single_release_net,
2773 };
2774 #endif /* CONFIG_PROC_FS */
2775
2776 #ifdef CONFIG_SYSCTL
2777
2778 static
2779 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2780 void __user *buffer, size_t *lenp, loff_t *ppos)
2781 {
2782 struct net *net;
2783 int delay;
2784 if (!write)
2785 return -EINVAL;
2786
2787 net = (struct net *)ctl->extra1;
2788 delay = net->ipv6.sysctl.flush_delay;
2789 proc_dointvec(ctl, write, buffer, lenp, ppos);
2790 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2791 return 0;
2792 }
2793
2794 ctl_table ipv6_route_table_template[] = {
2795 {
2796 .procname = "flush",
2797 .data = &init_net.ipv6.sysctl.flush_delay,
2798 .maxlen = sizeof(int),
2799 .mode = 0200,
2800 .proc_handler = ipv6_sysctl_rtcache_flush
2801 },
2802 {
2803 .procname = "gc_thresh",
2804 .data = &ip6_dst_ops_template.gc_thresh,
2805 .maxlen = sizeof(int),
2806 .mode = 0644,
2807 .proc_handler = proc_dointvec,
2808 },
2809 {
2810 .procname = "max_size",
2811 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2812 .maxlen = sizeof(int),
2813 .mode = 0644,
2814 .proc_handler = proc_dointvec,
2815 },
2816 {
2817 .procname = "gc_min_interval",
2818 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2819 .maxlen = sizeof(int),
2820 .mode = 0644,
2821 .proc_handler = proc_dointvec_jiffies,
2822 },
2823 {
2824 .procname = "gc_timeout",
2825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2826 .maxlen = sizeof(int),
2827 .mode = 0644,
2828 .proc_handler = proc_dointvec_jiffies,
2829 },
2830 {
2831 .procname = "gc_interval",
2832 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2833 .maxlen = sizeof(int),
2834 .mode = 0644,
2835 .proc_handler = proc_dointvec_jiffies,
2836 },
2837 {
2838 .procname = "gc_elasticity",
2839 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2840 .maxlen = sizeof(int),
2841 .mode = 0644,
2842 .proc_handler = proc_dointvec,
2843 },
2844 {
2845 .procname = "mtu_expires",
2846 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2847 .maxlen = sizeof(int),
2848 .mode = 0644,
2849 .proc_handler = proc_dointvec_jiffies,
2850 },
2851 {
2852 .procname = "min_adv_mss",
2853 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2854 .maxlen = sizeof(int),
2855 .mode = 0644,
2856 .proc_handler = proc_dointvec,
2857 },
2858 {
2859 .procname = "gc_min_interval_ms",
2860 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2861 .maxlen = sizeof(int),
2862 .mode = 0644,
2863 .proc_handler = proc_dointvec_ms_jiffies,
2864 },
2865 { }
2866 };
2867
2868 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2869 {
2870 struct ctl_table *table;
2871
2872 table = kmemdup(ipv6_route_table_template,
2873 sizeof(ipv6_route_table_template),
2874 GFP_KERNEL);
2875
2876 if (table) {
2877 table[0].data = &net->ipv6.sysctl.flush_delay;
2878 table[0].extra1 = net;
2879 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2880 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2881 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2882 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2883 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2884 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2885 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2886 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2887 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2888 }
2889
2890 return table;
2891 }
2892 #endif
2893
2894 static int __net_init ip6_route_net_init(struct net *net)
2895 {
2896 int ret = -ENOMEM;
2897
2898 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2899 sizeof(net->ipv6.ip6_dst_ops));
2900
2901 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2902 goto out_ip6_dst_ops;
2903
2904 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2905 sizeof(*net->ipv6.ip6_null_entry),
2906 GFP_KERNEL);
2907 if (!net->ipv6.ip6_null_entry)
2908 goto out_ip6_dst_entries;
2909 net->ipv6.ip6_null_entry->dst.path =
2910 (struct dst_entry *)net->ipv6.ip6_null_entry;
2911 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2912 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2913 ip6_template_metrics, true);
2914
2915 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2916 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2917 sizeof(*net->ipv6.ip6_prohibit_entry),
2918 GFP_KERNEL);
2919 if (!net->ipv6.ip6_prohibit_entry)
2920 goto out_ip6_null_entry;
2921 net->ipv6.ip6_prohibit_entry->dst.path =
2922 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2923 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2924 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2925 ip6_template_metrics, true);
2926
2927 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2928 sizeof(*net->ipv6.ip6_blk_hole_entry),
2929 GFP_KERNEL);
2930 if (!net->ipv6.ip6_blk_hole_entry)
2931 goto out_ip6_prohibit_entry;
2932 net->ipv6.ip6_blk_hole_entry->dst.path =
2933 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2934 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2935 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2936 ip6_template_metrics, true);
2937 #endif
2938
2939 net->ipv6.sysctl.flush_delay = 0;
2940 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2941 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2942 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2943 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2944 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2945 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2946 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2947
2948 #ifdef CONFIG_PROC_FS
2949 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2950 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2951 #endif
2952 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2953
2954 ret = 0;
2955 out:
2956 return ret;
2957
2958 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2959 out_ip6_prohibit_entry:
2960 kfree(net->ipv6.ip6_prohibit_entry);
2961 out_ip6_null_entry:
2962 kfree(net->ipv6.ip6_null_entry);
2963 #endif
2964 out_ip6_dst_entries:
2965 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2966 out_ip6_dst_ops:
2967 goto out;
2968 }
2969
2970 static void __net_exit ip6_route_net_exit(struct net *net)
2971 {
2972 #ifdef CONFIG_PROC_FS
2973 proc_net_remove(net, "ipv6_route");
2974 proc_net_remove(net, "rt6_stats");
2975 #endif
2976 kfree(net->ipv6.ip6_null_entry);
2977 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2978 kfree(net->ipv6.ip6_prohibit_entry);
2979 kfree(net->ipv6.ip6_blk_hole_entry);
2980 #endif
2981 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2982 }
2983
2984 static struct pernet_operations ip6_route_net_ops = {
2985 .init = ip6_route_net_init,
2986 .exit = ip6_route_net_exit,
2987 };
2988
2989 static struct notifier_block ip6_route_dev_notifier = {
2990 .notifier_call = ip6_route_dev_notify,
2991 .priority = 0,
2992 };
2993
2994 int __init ip6_route_init(void)
2995 {
2996 int ret;
2997
2998 ret = -ENOMEM;
2999 ip6_dst_ops_template.kmem_cachep =
3000 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3001 SLAB_HWCACHE_ALIGN, NULL);
3002 if (!ip6_dst_ops_template.kmem_cachep)
3003 goto out;
3004
3005 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3006 if (ret)
3007 goto out_kmem_cache;
3008
3009 ret = register_pernet_subsys(&ip6_route_net_ops);
3010 if (ret)
3011 goto out_dst_entries;
3012
3013 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3014
3015 /* Registering of the loopback is done before this portion of code,
3016 * the loopback reference in rt6_info will not be taken, do it
3017 * manually for init_net */
3018 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3019 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3021 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3022 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3023 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3024 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3025 #endif
3026 ret = fib6_init();
3027 if (ret)
3028 goto out_register_subsys;
3029
3030 ret = xfrm6_init();
3031 if (ret)
3032 goto out_fib6_init;
3033
3034 ret = fib6_rules_init();
3035 if (ret)
3036 goto xfrm6_init;
3037
3038 ret = -ENOBUFS;
3039 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3040 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3041 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3042 goto fib6_rules_init;
3043
3044 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3045 if (ret)
3046 goto fib6_rules_init;
3047
3048 out:
3049 return ret;
3050
3051 fib6_rules_init:
3052 fib6_rules_cleanup();
3053 xfrm6_init:
3054 xfrm6_fini();
3055 out_fib6_init:
3056 fib6_gc_cleanup();
3057 out_register_subsys:
3058 unregister_pernet_subsys(&ip6_route_net_ops);
3059 out_dst_entries:
3060 dst_entries_destroy(&ip6_dst_blackhole_ops);
3061 out_kmem_cache:
3062 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3063 goto out;
3064 }
3065
3066 void ip6_route_cleanup(void)
3067 {
3068 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3069 fib6_rules_cleanup();
3070 xfrm6_fini();
3071 fib6_gc_cleanup();
3072 unregister_pernet_subsys(&ip6_route_net_ops);
3073 dst_entries_destroy(&ip6_dst_blackhole_ops);
3074 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3075 }
This page took 0.177678 seconds and 5 git commands to generate.