inetpeer: Add redirect and PMTU discovery cached info.
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
09640e63 158 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
ec831ea7 161 .default_mtu = ip6_blackhole_default_mtu,
14e50e57 162 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
163};
164
62fa8a84
DM
165static const u32 ip6_template_metrics[RTAX_MAX] = {
166 [RTAX_HOPLIMIT - 1] = 255,
167};
168
bdb3289f 169static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .obsolete = -1,
174 .error = -ENETUNREACH,
d8d1f30b
CG
175 .input = ip6_pkt_discard,
176 .output = ip6_pkt_discard_out,
1da177e4
LT
177 },
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 179 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
101367c2
TG
184#ifdef CONFIG_IPV6_MULTIPLE_TABLES
185
6723ab54
DM
186static int ip6_pkt_prohibit(struct sk_buff *skb);
187static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 188
280a34c8 189static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -EACCES,
d8d1f30b
CG
195 .input = ip6_pkt_prohibit,
196 .output = ip6_pkt_prohibit_out,
101367c2
TG
197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 199 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
bdb3289f 204static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
205 .dst = {
206 .__refcnt = ATOMIC_INIT(1),
207 .__use = 1,
208 .obsolete = -1,
209 .error = -EINVAL,
d8d1f30b
CG
210 .input = dst_discard,
211 .output = dst_discard,
101367c2
TG
212 },
213 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 214 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
215 .rt6i_metric = ~(u32) 0,
216 .rt6i_ref = ATOMIC_INIT(1),
217};
218
219#endif
220
1da177e4 221/* allocate dst with ip6_dst_ops */
f2fc6a54 222static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 223{
f2fc6a54 224 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
225}
226
227static void ip6_dst_destroy(struct dst_entry *dst)
228{
229 struct rt6_info *rt = (struct rt6_info *)dst;
230 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 231 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
232
233 if (idev != NULL) {
234 rt->rt6i_idev = NULL;
235 in6_dev_put(idev);
1ab1457c 236 }
b3419363 237 if (peer) {
b3419363
DM
238 rt->rt6i_peer = NULL;
239 inet_putpeer(peer);
240 }
241}
242
243void rt6_bind_peer(struct rt6_info *rt, int create)
244{
245 struct inet_peer *peer;
246
b3419363
DM
247 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
248 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
249 inet_putpeer(peer);
1da177e4
LT
250}
251
252static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
253 int how)
254{
255 struct rt6_info *rt = (struct rt6_info *)dst;
256 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 257 struct net_device *loopback_dev =
c346dca1 258 dev_net(dev)->loopback_dev;
1da177e4 259
5a3e55d6
DL
260 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
261 struct inet6_dev *loopback_idev =
262 in6_dev_get(loopback_dev);
1da177e4
LT
263 if (loopback_idev != NULL) {
264 rt->rt6i_idev = loopback_idev;
265 in6_dev_put(idev);
266 }
267 }
268}
269
270static __inline__ int rt6_check_expired(const struct rt6_info *rt)
271{
a02cec21
ED
272 return (rt->rt6i_flags & RTF_EXPIRES) &&
273 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
274}
275
c71099ac
TG
276static inline int rt6_need_strict(struct in6_addr *daddr)
277{
a02cec21
ED
278 return ipv6_addr_type(daddr) &
279 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
280}
281
1da177e4 282/*
c71099ac 283 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
284 */
285
8ed67789
DL
286static inline struct rt6_info *rt6_device_match(struct net *net,
287 struct rt6_info *rt,
dd3abc4e 288 struct in6_addr *saddr,
1da177e4 289 int oif,
d420895e 290 int flags)
1da177e4
LT
291{
292 struct rt6_info *local = NULL;
293 struct rt6_info *sprt;
294
dd3abc4e
YH
295 if (!oif && ipv6_addr_any(saddr))
296 goto out;
297
d8d1f30b 298 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
299 struct net_device *dev = sprt->rt6i_dev;
300
301 if (oif) {
1da177e4
LT
302 if (dev->ifindex == oif)
303 return sprt;
304 if (dev->flags & IFF_LOOPBACK) {
305 if (sprt->rt6i_idev == NULL ||
306 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 307 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 308 continue;
1ab1457c 309 if (local && (!oif ||
1da177e4
LT
310 local->rt6i_idev->dev->ifindex == oif))
311 continue;
312 }
313 local = sprt;
314 }
dd3abc4e
YH
315 } else {
316 if (ipv6_chk_addr(net, saddr, dev,
317 flags & RT6_LOOKUP_F_IFACE))
318 return sprt;
1da177e4 319 }
dd3abc4e 320 }
1da177e4 321
dd3abc4e 322 if (oif) {
1da177e4
LT
323 if (local)
324 return local;
325
d420895e 326 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 327 return net->ipv6.ip6_null_entry;
1da177e4 328 }
dd3abc4e 329out:
1da177e4
LT
330 return rt;
331}
332
27097255
YH
333#ifdef CONFIG_IPV6_ROUTER_PREF
334static void rt6_probe(struct rt6_info *rt)
335{
336 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
337 /*
338 * Okay, this does not seem to be appropriate
339 * for now, however, we need to check if it
340 * is really so; aka Router Reachability Probing.
341 *
342 * Router Reachability Probe MUST be rate-limited
343 * to no more than one per minute.
344 */
345 if (!neigh || (neigh->nud_state & NUD_VALID))
346 return;
347 read_lock_bh(&neigh->lock);
348 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 349 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
350 struct in6_addr mcaddr;
351 struct in6_addr *target;
352
353 neigh->updated = jiffies;
354 read_unlock_bh(&neigh->lock);
355
356 target = (struct in6_addr *)&neigh->primary_key;
357 addrconf_addr_solict_mult(target, &mcaddr);
358 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
359 } else
360 read_unlock_bh(&neigh->lock);
361}
362#else
363static inline void rt6_probe(struct rt6_info *rt)
364{
27097255
YH
365}
366#endif
367
1da177e4 368/*
554cfb7e 369 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 370 */
b6f99a21 371static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
372{
373 struct net_device *dev = rt->rt6i_dev;
161980f4 374 if (!oif || dev->ifindex == oif)
554cfb7e 375 return 2;
161980f4
DM
376 if ((dev->flags & IFF_LOOPBACK) &&
377 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
378 return 1;
379 return 0;
554cfb7e 380}
1da177e4 381
b6f99a21 382static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 383{
554cfb7e 384 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 385 int m;
4d0c5911
YH
386 if (rt->rt6i_flags & RTF_NONEXTHOP ||
387 !(rt->rt6i_flags & RTF_GATEWAY))
388 m = 1;
389 else if (neigh) {
554cfb7e
YH
390 read_lock_bh(&neigh->lock);
391 if (neigh->nud_state & NUD_VALID)
4d0c5911 392 m = 2;
398bcbeb
YH
393#ifdef CONFIG_IPV6_ROUTER_PREF
394 else if (neigh->nud_state & NUD_FAILED)
395 m = 0;
396#endif
397 else
ea73ee23 398 m = 1;
554cfb7e 399 read_unlock_bh(&neigh->lock);
398bcbeb
YH
400 } else
401 m = 0;
554cfb7e 402 return m;
1da177e4
LT
403}
404
554cfb7e
YH
405static int rt6_score_route(struct rt6_info *rt, int oif,
406 int strict)
1da177e4 407{
4d0c5911 408 int m, n;
1ab1457c 409
4d0c5911 410 m = rt6_check_dev(rt, oif);
77d16f45 411 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 412 return -1;
ebacaaa0
YH
413#ifdef CONFIG_IPV6_ROUTER_PREF
414 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
415#endif
4d0c5911 416 n = rt6_check_neigh(rt);
557e92ef 417 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
418 return -1;
419 return m;
420}
421
f11e6659
DM
422static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
423 int *mpri, struct rt6_info *match)
554cfb7e 424{
f11e6659
DM
425 int m;
426
427 if (rt6_check_expired(rt))
428 goto out;
429
430 m = rt6_score_route(rt, oif, strict);
431 if (m < 0)
432 goto out;
433
434 if (m > *mpri) {
435 if (strict & RT6_LOOKUP_F_REACHABLE)
436 rt6_probe(match);
437 *mpri = m;
438 match = rt;
439 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
440 rt6_probe(rt);
441 }
442
443out:
444 return match;
445}
446
447static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
448 struct rt6_info *rr_head,
449 u32 metric, int oif, int strict)
450{
451 struct rt6_info *rt, *match;
554cfb7e 452 int mpri = -1;
1da177e4 453
f11e6659
DM
454 match = NULL;
455 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 456 rt = rt->dst.rt6_next)
f11e6659
DM
457 match = find_match(rt, oif, strict, &mpri, match);
458 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 459 rt = rt->dst.rt6_next)
f11e6659 460 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 461
f11e6659
DM
462 return match;
463}
1da177e4 464
f11e6659
DM
465static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
466{
467 struct rt6_info *match, *rt0;
8ed67789 468 struct net *net;
1da177e4 469
f11e6659 470 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 471 __func__, fn->leaf, oif);
554cfb7e 472
f11e6659
DM
473 rt0 = fn->rr_ptr;
474 if (!rt0)
475 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 476
f11e6659 477 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 478
554cfb7e 479 if (!match &&
f11e6659 480 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 481 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 482
554cfb7e 483 /* no entries matched; do round-robin */
f11e6659
DM
484 if (!next || next->rt6i_metric != rt0->rt6i_metric)
485 next = fn->leaf;
486
487 if (next != rt0)
488 fn->rr_ptr = next;
1da177e4 489 }
1da177e4 490
f11e6659 491 RT6_TRACE("%s() => %p\n",
0dc47877 492 __func__, match);
1da177e4 493
c346dca1 494 net = dev_net(rt0->rt6i_dev);
a02cec21 495 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
496}
497
70ceb4f5
YH
498#ifdef CONFIG_IPV6_ROUTE_INFO
499int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
500 struct in6_addr *gwaddr)
501{
c346dca1 502 struct net *net = dev_net(dev);
70ceb4f5
YH
503 struct route_info *rinfo = (struct route_info *) opt;
504 struct in6_addr prefix_buf, *prefix;
505 unsigned int pref;
4bed72e4 506 unsigned long lifetime;
70ceb4f5
YH
507 struct rt6_info *rt;
508
509 if (len < sizeof(struct route_info)) {
510 return -EINVAL;
511 }
512
513 /* Sanity check for prefix_len and length */
514 if (rinfo->length > 3) {
515 return -EINVAL;
516 } else if (rinfo->prefix_len > 128) {
517 return -EINVAL;
518 } else if (rinfo->prefix_len > 64) {
519 if (rinfo->length < 2) {
520 return -EINVAL;
521 }
522 } else if (rinfo->prefix_len > 0) {
523 if (rinfo->length < 1) {
524 return -EINVAL;
525 }
526 }
527
528 pref = rinfo->route_pref;
529 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 530 return -EINVAL;
70ceb4f5 531
4bed72e4 532 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
533
534 if (rinfo->length == 3)
535 prefix = (struct in6_addr *)rinfo->prefix;
536 else {
537 /* this function is safe */
538 ipv6_addr_prefix(&prefix_buf,
539 (struct in6_addr *)rinfo->prefix,
540 rinfo->prefix_len);
541 prefix = &prefix_buf;
542 }
543
efa2cea0
DL
544 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
545 dev->ifindex);
70ceb4f5
YH
546
547 if (rt && !lifetime) {
e0a1ad73 548 ip6_del_rt(rt);
70ceb4f5
YH
549 rt = NULL;
550 }
551
552 if (!rt && lifetime)
efa2cea0 553 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
554 pref);
555 else if (rt)
556 rt->rt6i_flags = RTF_ROUTEINFO |
557 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
558
559 if (rt) {
4bed72e4 560 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
561 rt->rt6i_flags &= ~RTF_EXPIRES;
562 } else {
563 rt->rt6i_expires = jiffies + HZ * lifetime;
564 rt->rt6i_flags |= RTF_EXPIRES;
565 }
d8d1f30b 566 dst_release(&rt->dst);
70ceb4f5
YH
567 }
568 return 0;
569}
570#endif
571
8ed67789 572#define BACKTRACK(__net, saddr) \
982f56f3 573do { \
8ed67789 574 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 575 struct fib6_node *pn; \
e0eda7bb 576 while (1) { \
982f56f3
YH
577 if (fn->fn_flags & RTN_TL_ROOT) \
578 goto out; \
579 pn = fn->parent; \
580 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 581 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
582 else \
583 fn = pn; \
584 if (fn->fn_flags & RTN_RTINFO) \
585 goto restart; \
c71099ac 586 } \
c71099ac 587 } \
982f56f3 588} while(0)
c71099ac 589
8ed67789
DL
590static struct rt6_info *ip6_pol_route_lookup(struct net *net,
591 struct fib6_table *table,
c71099ac 592 struct flowi *fl, int flags)
1da177e4
LT
593{
594 struct fib6_node *fn;
595 struct rt6_info *rt;
596
c71099ac
TG
597 read_lock_bh(&table->tb6_lock);
598 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
599restart:
600 rt = fn->leaf;
dd3abc4e 601 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 602 BACKTRACK(net, &fl->fl6_src);
c71099ac 603out:
d8d1f30b 604 dst_use(&rt->dst, jiffies);
c71099ac 605 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
606 return rt;
607
608}
609
9acd9f3a
YH
610struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
611 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
612{
613 struct flowi fl = {
614 .oif = oif,
5811662b 615 .fl6_dst = *daddr,
c71099ac
TG
616 };
617 struct dst_entry *dst;
77d16f45 618 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 619
adaa70bb
TG
620 if (saddr) {
621 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
622 flags |= RT6_LOOKUP_F_HAS_SADDR;
623 }
624
606a2b48 625 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
626 if (dst->error == 0)
627 return (struct rt6_info *) dst;
628
629 dst_release(dst);
630
1da177e4
LT
631 return NULL;
632}
633
7159039a
YH
634EXPORT_SYMBOL(rt6_lookup);
635
c71099ac 636/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
637 It takes new route entry, the addition fails by any reason the
638 route is freed. In any case, if caller does not hold it, it may
639 be destroyed.
640 */
641
86872cb5 642static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
643{
644 int err;
c71099ac 645 struct fib6_table *table;
1da177e4 646
c71099ac
TG
647 table = rt->rt6i_table;
648 write_lock_bh(&table->tb6_lock);
86872cb5 649 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 650 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
651
652 return err;
653}
654
40e22e8f
TG
655int ip6_ins_rt(struct rt6_info *rt)
656{
4d1169c1 657 struct nl_info info = {
c346dca1 658 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 659 };
528c4ceb 660 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
661}
662
95a9a5ba
YH
663static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
664 struct in6_addr *saddr)
1da177e4 665{
1da177e4
LT
666 struct rt6_info *rt;
667
668 /*
669 * Clone the route.
670 */
671
672 rt = ip6_rt_copy(ort);
673
674 if (rt) {
14deae41
DM
675 struct neighbour *neigh;
676 int attempts = !in_softirq();
677
58c4fb86
YH
678 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
679 if (rt->rt6i_dst.plen != 128 &&
680 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
681 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 682 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 683 }
1da177e4 684
58c4fb86 685 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
686 rt->rt6i_dst.plen = 128;
687 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 688 rt->dst.flags |= DST_HOST;
1da177e4
LT
689
690#ifdef CONFIG_IPV6_SUBTREES
691 if (rt->rt6i_src.plen && saddr) {
692 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
693 rt->rt6i_src.plen = 128;
694 }
695#endif
696
14deae41
DM
697 retry:
698 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
699 if (IS_ERR(neigh)) {
700 struct net *net = dev_net(rt->rt6i_dev);
701 int saved_rt_min_interval =
702 net->ipv6.sysctl.ip6_rt_gc_min_interval;
703 int saved_rt_elasticity =
704 net->ipv6.sysctl.ip6_rt_gc_elasticity;
705
706 if (attempts-- > 0) {
707 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
708 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
709
86393e52 710 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
711
712 net->ipv6.sysctl.ip6_rt_gc_elasticity =
713 saved_rt_elasticity;
714 net->ipv6.sysctl.ip6_rt_gc_min_interval =
715 saved_rt_min_interval;
716 goto retry;
717 }
718
719 if (net_ratelimit())
720 printk(KERN_WARNING
7e1b33e5 721 "ipv6: Neighbour table overflow.\n");
d8d1f30b 722 dst_free(&rt->dst);
14deae41
DM
723 return NULL;
724 }
725 rt->rt6i_nexthop = neigh;
1da177e4 726
95a9a5ba 727 }
1da177e4 728
95a9a5ba
YH
729 return rt;
730}
1da177e4 731
299d9939
YH
732static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
733{
734 struct rt6_info *rt = ip6_rt_copy(ort);
735 if (rt) {
736 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
737 rt->rt6i_dst.plen = 128;
738 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 739 rt->dst.flags |= DST_HOST;
299d9939
YH
740 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
741 }
742 return rt;
743}
744
8ed67789
DL
745static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
746 struct flowi *fl, int flags)
1da177e4
LT
747{
748 struct fib6_node *fn;
519fbd87 749 struct rt6_info *rt, *nrt;
c71099ac 750 int strict = 0;
1da177e4 751 int attempts = 3;
519fbd87 752 int err;
53b7997f 753 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 754
77d16f45 755 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
756
757relookup:
c71099ac 758 read_lock_bh(&table->tb6_lock);
1da177e4 759
8238dd06 760restart_2:
c71099ac 761 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
762
763restart:
4acad72d 764 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
765
766 BACKTRACK(net, &fl->fl6_src);
767 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 768 rt->rt6i_flags & RTF_CACHE)
1ddef044 769 goto out;
1da177e4 770
d8d1f30b 771 dst_hold(&rt->dst);
c71099ac 772 read_unlock_bh(&table->tb6_lock);
fb9de91e 773
519fbd87 774 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 775 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 776 else
c71099ac 777 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 778
d8d1f30b 779 dst_release(&rt->dst);
8ed67789 780 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 781
d8d1f30b 782 dst_hold(&rt->dst);
519fbd87 783 if (nrt) {
40e22e8f 784 err = ip6_ins_rt(nrt);
519fbd87 785 if (!err)
1da177e4 786 goto out2;
1da177e4 787 }
1da177e4 788
519fbd87
YH
789 if (--attempts <= 0)
790 goto out2;
791
792 /*
c71099ac 793 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
794 * released someone could insert this route. Relookup.
795 */
d8d1f30b 796 dst_release(&rt->dst);
519fbd87
YH
797 goto relookup;
798
799out:
8238dd06
YH
800 if (reachable) {
801 reachable = 0;
802 goto restart_2;
803 }
d8d1f30b 804 dst_hold(&rt->dst);
c71099ac 805 read_unlock_bh(&table->tb6_lock);
1da177e4 806out2:
d8d1f30b
CG
807 rt->dst.lastuse = jiffies;
808 rt->dst.__use++;
c71099ac
TG
809
810 return rt;
1da177e4
LT
811}
812
8ed67789 813static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
814 struct flowi *fl, int flags)
815{
8ed67789 816 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
817}
818
c71099ac
TG
819void ip6_route_input(struct sk_buff *skb)
820{
0660e03f 821 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 822 struct net *net = dev_net(skb->dev);
adaa70bb 823 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
824 struct flowi fl = {
825 .iif = skb->dev->ifindex,
5811662b
CG
826 .fl6_dst = iph->daddr,
827 .fl6_src = iph->saddr,
828 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 829 .mark = skb->mark,
c71099ac
TG
830 .proto = iph->nexthdr,
831 };
adaa70bb 832
1d6e55f1 833 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 834 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 835
adf30907 836 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
837}
838
8ed67789 839static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 840 struct flowi *fl, int flags)
1da177e4 841{
8ed67789 842 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
843}
844
4591db4f
DL
845struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
846 struct flowi *fl)
c71099ac
TG
847{
848 int flags = 0;
849
6057fd78 850 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 851 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 852
adaa70bb
TG
853 if (!ipv6_addr_any(&fl->fl6_src))
854 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
855 else if (sk)
856 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 857
4591db4f 858 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
859}
860
7159039a 861EXPORT_SYMBOL(ip6_route_output);
1da177e4 862
14e50e57
DM
863int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
864{
865 struct rt6_info *ort = (struct rt6_info *) *dstp;
866 struct rt6_info *rt = (struct rt6_info *)
867 dst_alloc(&ip6_dst_blackhole_ops);
868 struct dst_entry *new = NULL;
869
870 if (rt) {
d8d1f30b 871 new = &rt->dst;
14e50e57
DM
872
873 atomic_set(&new->__refcnt, 1);
874 new->__use = 1;
352e512c
HX
875 new->input = dst_discard;
876 new->output = dst_discard;
14e50e57 877
defb3519 878 dst_copy_metrics(new, &ort->dst);
d8d1f30b 879 new->dev = ort->dst.dev;
14e50e57
DM
880 if (new->dev)
881 dev_hold(new->dev);
882 rt->rt6i_idev = ort->rt6i_idev;
883 if (rt->rt6i_idev)
884 in6_dev_hold(rt->rt6i_idev);
885 rt->rt6i_expires = 0;
886
887 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
888 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
889 rt->rt6i_metric = 0;
890
891 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
892#ifdef CONFIG_IPV6_SUBTREES
893 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
894#endif
895
896 dst_free(new);
897 }
898
899 dst_release(*dstp);
900 *dstp = new;
a02cec21 901 return new ? 0 : -ENOMEM;
14e50e57
DM
902}
903EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
904
1da177e4
LT
905/*
906 * Destination cache support functions
907 */
908
909static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
910{
911 struct rt6_info *rt;
912
913 rt = (struct rt6_info *) dst;
914
10414444 915 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
916 return dst;
917
918 return NULL;
919}
920
921static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
922{
923 struct rt6_info *rt = (struct rt6_info *) dst;
924
925 if (rt) {
54c1a859
YH
926 if (rt->rt6i_flags & RTF_CACHE) {
927 if (rt6_check_expired(rt)) {
928 ip6_del_rt(rt);
929 dst = NULL;
930 }
931 } else {
1da177e4 932 dst_release(dst);
54c1a859
YH
933 dst = NULL;
934 }
1da177e4 935 }
54c1a859 936 return dst;
1da177e4
LT
937}
938
939static void ip6_link_failure(struct sk_buff *skb)
940{
941 struct rt6_info *rt;
942
3ffe533c 943 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 944
adf30907 945 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
946 if (rt) {
947 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 948 dst_set_expires(&rt->dst, 0);
1da177e4
LT
949 rt->rt6i_flags |= RTF_EXPIRES;
950 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
951 rt->rt6i_node->fn_sernum = -1;
952 }
953}
954
955static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
956{
957 struct rt6_info *rt6 = (struct rt6_info*)dst;
958
959 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
960 rt6->rt6i_flags |= RTF_MODIFIED;
961 if (mtu < IPV6_MIN_MTU) {
defb3519 962 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 963 mtu = IPV6_MIN_MTU;
defb3519
DM
964 features |= RTAX_FEATURE_ALLFRAG;
965 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 966 }
defb3519 967 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
968 }
969}
970
0dbaee3b 971static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 972{
0dbaee3b
DM
973 struct net_device *dev = dst->dev;
974 unsigned int mtu = dst_mtu(dst);
975 struct net *net = dev_net(dev);
976
1da177e4
LT
977 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
978
5578689a
DL
979 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
980 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
981
982 /*
1ab1457c
YH
983 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
984 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
985 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
986 * rely only on pmtu discovery"
987 */
988 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
989 mtu = IPV6_MAXPLEN;
990 return mtu;
991}
992
d33e4553
DM
993static unsigned int ip6_default_mtu(const struct dst_entry *dst)
994{
995 unsigned int mtu = IPV6_MIN_MTU;
996 struct inet6_dev *idev;
997
998 rcu_read_lock();
999 idev = __in6_dev_get(dst->dev);
1000 if (idev)
1001 mtu = idev->cnf.mtu6;
1002 rcu_read_unlock();
1003
1004 return mtu;
1005}
1006
3b00944c
YH
1007static struct dst_entry *icmp6_dst_gc_list;
1008static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1009
3b00944c 1010struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1011 struct neighbour *neigh,
9acd9f3a 1012 const struct in6_addr *addr)
1da177e4
LT
1013{
1014 struct rt6_info *rt;
1015 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1016 struct net *net = dev_net(dev);
1da177e4
LT
1017
1018 if (unlikely(idev == NULL))
1019 return NULL;
1020
86393e52 1021 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1022 if (unlikely(rt == NULL)) {
1023 in6_dev_put(idev);
1024 goto out;
1025 }
1026
1027 dev_hold(dev);
1028 if (neigh)
1029 neigh_hold(neigh);
14deae41 1030 else {
1da177e4 1031 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1032 if (IS_ERR(neigh))
1033 neigh = NULL;
1034 }
1da177e4
LT
1035
1036 rt->rt6i_dev = dev;
1037 rt->rt6i_idev = idev;
1038 rt->rt6i_nexthop = neigh;
d8d1f30b 1039 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1040 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1041 rt->dst.output = ip6_output;
1da177e4
LT
1042
1043#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1044 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1045 ? DST_HOST
1da177e4
LT
1046 : 0;
1047 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1048 rt->rt6i_dst.plen = 128;
1049#endif
1050
3b00944c 1051 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1052 rt->dst.next = icmp6_dst_gc_list;
1053 icmp6_dst_gc_list = &rt->dst;
3b00944c 1054 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1055
5578689a 1056 fib6_force_start_gc(net);
1da177e4
LT
1057
1058out:
d8d1f30b 1059 return &rt->dst;
1da177e4
LT
1060}
1061
3d0f24a7 1062int icmp6_dst_gc(void)
1da177e4
LT
1063{
1064 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1065 int more = 0;
1da177e4
LT
1066
1067 next = NULL;
5d0bbeeb 1068
3b00944c
YH
1069 spin_lock_bh(&icmp6_dst_lock);
1070 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1071
1da177e4
LT
1072 while ((dst = *pprev) != NULL) {
1073 if (!atomic_read(&dst->__refcnt)) {
1074 *pprev = dst->next;
1075 dst_free(dst);
1da177e4
LT
1076 } else {
1077 pprev = &dst->next;
3d0f24a7 1078 ++more;
1da177e4
LT
1079 }
1080 }
1081
3b00944c 1082 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1083
3d0f24a7 1084 return more;
1da177e4
LT
1085}
1086
1e493d19
DM
1087static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1088 void *arg)
1089{
1090 struct dst_entry *dst, **pprev;
1091
1092 spin_lock_bh(&icmp6_dst_lock);
1093 pprev = &icmp6_dst_gc_list;
1094 while ((dst = *pprev) != NULL) {
1095 struct rt6_info *rt = (struct rt6_info *) dst;
1096 if (func(rt, arg)) {
1097 *pprev = dst->next;
1098 dst_free(dst);
1099 } else {
1100 pprev = &dst->next;
1101 }
1102 }
1103 spin_unlock_bh(&icmp6_dst_lock);
1104}
1105
569d3645 1106static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1107{
1da177e4 1108 unsigned long now = jiffies;
86393e52 1109 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1110 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1111 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1112 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1113 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1114 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1115 int entries;
7019b78e 1116
fc66f95c 1117 entries = dst_entries_get_fast(ops);
7019b78e 1118 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1119 entries <= rt_max_size)
1da177e4
LT
1120 goto out;
1121
6891a346
BT
1122 net->ipv6.ip6_rt_gc_expire++;
1123 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1124 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1125 entries = dst_entries_get_slow(ops);
1126 if (entries < ops->gc_thresh)
7019b78e 1127 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1128out:
7019b78e 1129 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1130 return entries > rt_max_size;
1da177e4
LT
1131}
1132
1133/* Clean host part of a prefix. Not necessary in radix tree,
1134 but results in cleaner routing tables.
1135
1136 Remove it only when all the things will work!
1137 */
1138
6b75d090 1139int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1140{
5170ae82 1141 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1142 if (hoplimit == 0) {
6b75d090 1143 struct net_device *dev = dst->dev;
c68f24cc
ED
1144 struct inet6_dev *idev;
1145
1146 rcu_read_lock();
1147 idev = __in6_dev_get(dev);
1148 if (idev)
6b75d090 1149 hoplimit = idev->cnf.hop_limit;
c68f24cc 1150 else
53b7997f 1151 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1152 rcu_read_unlock();
1da177e4
LT
1153 }
1154 return hoplimit;
1155}
abbf46ae 1156EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1157
1158/*
1159 *
1160 */
1161
86872cb5 1162int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1163{
1164 int err;
5578689a 1165 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1166 struct rt6_info *rt = NULL;
1167 struct net_device *dev = NULL;
1168 struct inet6_dev *idev = NULL;
c71099ac 1169 struct fib6_table *table;
1da177e4
LT
1170 int addr_type;
1171
86872cb5 1172 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1173 return -EINVAL;
1174#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1175 if (cfg->fc_src_len)
1da177e4
LT
1176 return -EINVAL;
1177#endif
86872cb5 1178 if (cfg->fc_ifindex) {
1da177e4 1179 err = -ENODEV;
5578689a 1180 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1181 if (!dev)
1182 goto out;
1183 idev = in6_dev_get(dev);
1184 if (!idev)
1185 goto out;
1186 }
1187
86872cb5
TG
1188 if (cfg->fc_metric == 0)
1189 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1190
5578689a 1191 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1192 if (table == NULL) {
1193 err = -ENOBUFS;
1194 goto out;
1195 }
1196
86393e52 1197 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1198
1199 if (rt == NULL) {
1200 err = -ENOMEM;
1201 goto out;
1202 }
1203
d8d1f30b 1204 rt->dst.obsolete = -1;
6f704992
YH
1205 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1206 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1207 0;
1da177e4 1208
86872cb5
TG
1209 if (cfg->fc_protocol == RTPROT_UNSPEC)
1210 cfg->fc_protocol = RTPROT_BOOT;
1211 rt->rt6i_protocol = cfg->fc_protocol;
1212
1213 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1214
1215 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1216 rt->dst.input = ip6_mc_input;
ab79ad14
1217 else if (cfg->fc_flags & RTF_LOCAL)
1218 rt->dst.input = ip6_input;
1da177e4 1219 else
d8d1f30b 1220 rt->dst.input = ip6_forward;
1da177e4 1221
d8d1f30b 1222 rt->dst.output = ip6_output;
1da177e4 1223
86872cb5
TG
1224 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1225 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1226 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1227 rt->dst.flags = DST_HOST;
1da177e4
LT
1228
1229#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1230 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1231 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1232#endif
1233
86872cb5 1234 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1235
1236 /* We cannot add true routes via loopback here,
1237 they would result in kernel looping; promote them to reject routes
1238 */
86872cb5 1239 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1240 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1241 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1242 /* hold loopback dev/idev if we haven't done so. */
5578689a 1243 if (dev != net->loopback_dev) {
1da177e4
LT
1244 if (dev) {
1245 dev_put(dev);
1246 in6_dev_put(idev);
1247 }
5578689a 1248 dev = net->loopback_dev;
1da177e4
LT
1249 dev_hold(dev);
1250 idev = in6_dev_get(dev);
1251 if (!idev) {
1252 err = -ENODEV;
1253 goto out;
1254 }
1255 }
d8d1f30b
CG
1256 rt->dst.output = ip6_pkt_discard_out;
1257 rt->dst.input = ip6_pkt_discard;
1258 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1259 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1260 goto install_route;
1261 }
1262
86872cb5 1263 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1264 struct in6_addr *gw_addr;
1265 int gwa_type;
1266
86872cb5
TG
1267 gw_addr = &cfg->fc_gateway;
1268 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1269 gwa_type = ipv6_addr_type(gw_addr);
1270
1271 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1272 struct rt6_info *grt;
1273
1274 /* IPv6 strictly inhibits using not link-local
1275 addresses as nexthop address.
1276 Otherwise, router will not able to send redirects.
1277 It is very good, but in some (rare!) circumstances
1278 (SIT, PtP, NBMA NOARP links) it is handy to allow
1279 some exceptions. --ANK
1280 */
1281 err = -EINVAL;
1282 if (!(gwa_type&IPV6_ADDR_UNICAST))
1283 goto out;
1284
5578689a 1285 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1286
1287 err = -EHOSTUNREACH;
1288 if (grt == NULL)
1289 goto out;
1290 if (dev) {
1291 if (dev != grt->rt6i_dev) {
d8d1f30b 1292 dst_release(&grt->dst);
1da177e4
LT
1293 goto out;
1294 }
1295 } else {
1296 dev = grt->rt6i_dev;
1297 idev = grt->rt6i_idev;
1298 dev_hold(dev);
1299 in6_dev_hold(grt->rt6i_idev);
1300 }
1301 if (!(grt->rt6i_flags&RTF_GATEWAY))
1302 err = 0;
d8d1f30b 1303 dst_release(&grt->dst);
1da177e4
LT
1304
1305 if (err)
1306 goto out;
1307 }
1308 err = -EINVAL;
1309 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1310 goto out;
1311 }
1312
1313 err = -ENODEV;
1314 if (dev == NULL)
1315 goto out;
1316
86872cb5 1317 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1318 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1319 if (IS_ERR(rt->rt6i_nexthop)) {
1320 err = PTR_ERR(rt->rt6i_nexthop);
1321 rt->rt6i_nexthop = NULL;
1322 goto out;
1323 }
1324 }
1325
86872cb5 1326 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1327
1328install_route:
86872cb5
TG
1329 if (cfg->fc_mx) {
1330 struct nlattr *nla;
1331 int remaining;
1332
1333 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1334 int type = nla_type(nla);
86872cb5
TG
1335
1336 if (type) {
1337 if (type > RTAX_MAX) {
1da177e4
LT
1338 err = -EINVAL;
1339 goto out;
1340 }
86872cb5 1341
defb3519 1342 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1343 }
1da177e4
LT
1344 }
1345 }
1346
d8d1f30b 1347 rt->dst.dev = dev;
1da177e4 1348 rt->rt6i_idev = idev;
c71099ac 1349 rt->rt6i_table = table;
63152fc0 1350
c346dca1 1351 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1352
86872cb5 1353 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1354
1355out:
1356 if (dev)
1357 dev_put(dev);
1358 if (idev)
1359 in6_dev_put(idev);
1360 if (rt)
d8d1f30b 1361 dst_free(&rt->dst);
1da177e4
LT
1362 return err;
1363}
1364
86872cb5 1365static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1366{
1367 int err;
c71099ac 1368 struct fib6_table *table;
c346dca1 1369 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1370
8ed67789 1371 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1372 return -ENOENT;
1373
c71099ac
TG
1374 table = rt->rt6i_table;
1375 write_lock_bh(&table->tb6_lock);
1da177e4 1376
86872cb5 1377 err = fib6_del(rt, info);
d8d1f30b 1378 dst_release(&rt->dst);
1da177e4 1379
c71099ac 1380 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1381
1382 return err;
1383}
1384
e0a1ad73
TG
1385int ip6_del_rt(struct rt6_info *rt)
1386{
4d1169c1 1387 struct nl_info info = {
c346dca1 1388 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1389 };
528c4ceb 1390 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1391}
1392
86872cb5 1393static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1394{
c71099ac 1395 struct fib6_table *table;
1da177e4
LT
1396 struct fib6_node *fn;
1397 struct rt6_info *rt;
1398 int err = -ESRCH;
1399
5578689a 1400 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1401 if (table == NULL)
1402 return err;
1403
1404 read_lock_bh(&table->tb6_lock);
1da177e4 1405
c71099ac 1406 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1407 &cfg->fc_dst, cfg->fc_dst_len,
1408 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1409
1da177e4 1410 if (fn) {
d8d1f30b 1411 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1412 if (cfg->fc_ifindex &&
1da177e4 1413 (rt->rt6i_dev == NULL ||
86872cb5 1414 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1415 continue;
86872cb5
TG
1416 if (cfg->fc_flags & RTF_GATEWAY &&
1417 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1418 continue;
86872cb5 1419 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1420 continue;
d8d1f30b 1421 dst_hold(&rt->dst);
c71099ac 1422 read_unlock_bh(&table->tb6_lock);
1da177e4 1423
86872cb5 1424 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1425 }
1426 }
c71099ac 1427 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1428
1429 return err;
1430}
1431
1432/*
1433 * Handle redirects
1434 */
a6279458
YH
1435struct ip6rd_flowi {
1436 struct flowi fl;
1437 struct in6_addr gateway;
1438};
1439
8ed67789
DL
1440static struct rt6_info *__ip6_route_redirect(struct net *net,
1441 struct fib6_table *table,
a6279458
YH
1442 struct flowi *fl,
1443 int flags)
1da177e4 1444{
a6279458
YH
1445 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1446 struct rt6_info *rt;
e843b9e1 1447 struct fib6_node *fn;
c71099ac 1448
1da177e4 1449 /*
e843b9e1
YH
1450 * Get the "current" route for this destination and
1451 * check if the redirect has come from approriate router.
1452 *
1453 * RFC 2461 specifies that redirects should only be
1454 * accepted if they come from the nexthop to the target.
1455 * Due to the way the routes are chosen, this notion
1456 * is a bit fuzzy and one might need to check all possible
1457 * routes.
1da177e4 1458 */
1da177e4 1459
c71099ac 1460 read_lock_bh(&table->tb6_lock);
a6279458 1461 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1462restart:
d8d1f30b 1463 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1464 /*
1465 * Current route is on-link; redirect is always invalid.
1466 *
1467 * Seems, previous statement is not true. It could
1468 * be node, which looks for us as on-link (f.e. proxy ndisc)
1469 * But then router serving it might decide, that we should
1470 * know truth 8)8) --ANK (980726).
1471 */
1472 if (rt6_check_expired(rt))
1473 continue;
1474 if (!(rt->rt6i_flags & RTF_GATEWAY))
1475 continue;
a6279458 1476 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1477 continue;
a6279458 1478 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1479 continue;
1480 break;
1481 }
a6279458 1482
cb15d9c2 1483 if (!rt)
8ed67789
DL
1484 rt = net->ipv6.ip6_null_entry;
1485 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1486out:
d8d1f30b 1487 dst_hold(&rt->dst);
a6279458 1488
c71099ac 1489 read_unlock_bh(&table->tb6_lock);
e843b9e1 1490
a6279458
YH
1491 return rt;
1492};
1493
1494static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1495 struct in6_addr *src,
1496 struct in6_addr *gateway,
1497 struct net_device *dev)
1498{
adaa70bb 1499 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1500 struct net *net = dev_net(dev);
a6279458
YH
1501 struct ip6rd_flowi rdfl = {
1502 .fl = {
1503 .oif = dev->ifindex,
5811662b
CG
1504 .fl6_dst = *dest,
1505 .fl6_src = *src,
a6279458 1506 },
a6279458 1507 };
adaa70bb 1508
86c36ce4
BH
1509 ipv6_addr_copy(&rdfl.gateway, gateway);
1510
adaa70bb
TG
1511 if (rt6_need_strict(dest))
1512 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1513
5578689a 1514 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1515 flags, __ip6_route_redirect);
a6279458
YH
1516}
1517
1518void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1519 struct in6_addr *saddr,
1520 struct neighbour *neigh, u8 *lladdr, int on_link)
1521{
1522 struct rt6_info *rt, *nrt = NULL;
1523 struct netevent_redirect netevent;
c346dca1 1524 struct net *net = dev_net(neigh->dev);
a6279458
YH
1525
1526 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1527
8ed67789 1528 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1529 if (net_ratelimit())
1530 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1531 "for redirect target\n");
a6279458 1532 goto out;
1da177e4
LT
1533 }
1534
1da177e4
LT
1535 /*
1536 * We have finally decided to accept it.
1537 */
1538
1ab1457c 1539 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1540 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1541 NEIGH_UPDATE_F_OVERRIDE|
1542 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1543 NEIGH_UPDATE_F_ISROUTER))
1544 );
1545
1546 /*
1547 * Redirect received -> path was valid.
1548 * Look, redirects are sent only in response to data packets,
1549 * so that this nexthop apparently is reachable. --ANK
1550 */
d8d1f30b 1551 dst_confirm(&rt->dst);
1da177e4
LT
1552
1553 /* Duplicate redirect: silently ignore. */
d8d1f30b 1554 if (neigh == rt->dst.neighbour)
1da177e4
LT
1555 goto out;
1556
1557 nrt = ip6_rt_copy(rt);
1558 if (nrt == NULL)
1559 goto out;
1560
1561 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1562 if (on_link)
1563 nrt->rt6i_flags &= ~RTF_GATEWAY;
1564
1565 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1566 nrt->rt6i_dst.plen = 128;
d8d1f30b 1567 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1568
1569 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1570 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1571
40e22e8f 1572 if (ip6_ins_rt(nrt))
1da177e4
LT
1573 goto out;
1574
d8d1f30b
CG
1575 netevent.old = &rt->dst;
1576 netevent.new = &nrt->dst;
8d71740c
TT
1577 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1578
1da177e4 1579 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1580 ip6_del_rt(rt);
1da177e4
LT
1581 return;
1582 }
1583
1584out:
d8d1f30b 1585 dst_release(&rt->dst);
1da177e4
LT
1586}
1587
1588/*
1589 * Handle ICMP "packet too big" messages
1590 * i.e. Path MTU discovery
1591 */
1592
ae878ae2
1593static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1594 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1595{
1596 struct rt6_info *rt, *nrt;
1597 int allfrag = 0;
d3052b55 1598again:
ae878ae2 1599 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1600 if (rt == NULL)
1601 return;
1602
d3052b55
AV
1603 if (rt6_check_expired(rt)) {
1604 ip6_del_rt(rt);
1605 goto again;
1606 }
1607
d8d1f30b 1608 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1609 goto out;
1610
1611 if (pmtu < IPV6_MIN_MTU) {
1612 /*
1ab1457c 1613 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1614 * MTU (1280) and a fragment header should always be included
1615 * after a node receiving Too Big message reporting PMTU is
1616 * less than the IPv6 Minimum Link MTU.
1617 */
1618 pmtu = IPV6_MIN_MTU;
1619 allfrag = 1;
1620 }
1621
1622 /* New mtu received -> path was valid.
1623 They are sent only in response to data packets,
1624 so that this nexthop apparently is reachable. --ANK
1625 */
d8d1f30b 1626 dst_confirm(&rt->dst);
1da177e4
LT
1627
1628 /* Host route. If it is static, it would be better
1629 not to override it, but add new one, so that
1630 when cache entry will expire old pmtu
1631 would return automatically.
1632 */
1633 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1634 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1635 if (allfrag) {
1636 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1637 features |= RTAX_FEATURE_ALLFRAG;
1638 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1639 }
d8d1f30b 1640 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1641 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1642 goto out;
1643 }
1644
1645 /* Network route.
1646 Two cases are possible:
1647 1. It is connected route. Action: COW
1648 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1649 */
d5315b50 1650 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1651 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1652 else
1653 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1654
d5315b50 1655 if (nrt) {
defb3519
DM
1656 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1657 if (allfrag) {
1658 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1659 features |= RTAX_FEATURE_ALLFRAG;
1660 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1661 }
a1e78363
YH
1662
1663 /* According to RFC 1981, detecting PMTU increase shouldn't be
1664 * happened within 5 mins, the recommended timer is 10 mins.
1665 * Here this route expiration time is set to ip6_rt_mtu_expires
1666 * which is 10 mins. After 10 mins the decreased pmtu is expired
1667 * and detecting PMTU increase will be automatically happened.
1668 */
d8d1f30b 1669 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1670 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1671
40e22e8f 1672 ip6_ins_rt(nrt);
1da177e4 1673 }
1da177e4 1674out:
d8d1f30b 1675 dst_release(&rt->dst);
1da177e4
LT
1676}
1677
ae878ae2
1678void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1679 struct net_device *dev, u32 pmtu)
1680{
1681 struct net *net = dev_net(dev);
1682
1683 /*
1684 * RFC 1981 states that a node "MUST reduce the size of the packets it
1685 * is sending along the path" that caused the Packet Too Big message.
1686 * Since it's not possible in the general case to determine which
1687 * interface was used to send the original packet, we update the MTU
1688 * on the interface that will be used to send future packets. We also
1689 * update the MTU on the interface that received the Packet Too Big in
1690 * case the original packet was forced out that interface with
1691 * SO_BINDTODEVICE or similar. This is the next best thing to the
1692 * correct behaviour, which would be to update the MTU on all
1693 * interfaces.
1694 */
1695 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1696 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1697}
1698
1da177e4
LT
1699/*
1700 * Misc support functions
1701 */
1702
1703static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1704{
c346dca1 1705 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1706 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1707
1708 if (rt) {
d8d1f30b
CG
1709 rt->dst.input = ort->dst.input;
1710 rt->dst.output = ort->dst.output;
1711
defb3519 1712 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1713 rt->dst.error = ort->dst.error;
1714 rt->dst.dev = ort->dst.dev;
1715 if (rt->dst.dev)
1716 dev_hold(rt->dst.dev);
1da177e4
LT
1717 rt->rt6i_idev = ort->rt6i_idev;
1718 if (rt->rt6i_idev)
1719 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1720 rt->dst.lastuse = jiffies;
1da177e4
LT
1721 rt->rt6i_expires = 0;
1722
1723 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1724 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1725 rt->rt6i_metric = 0;
1726
1727 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1728#ifdef CONFIG_IPV6_SUBTREES
1729 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1730#endif
c71099ac 1731 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1732 }
1733 return rt;
1734}
1735
70ceb4f5 1736#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1737static struct rt6_info *rt6_get_route_info(struct net *net,
1738 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1739 struct in6_addr *gwaddr, int ifindex)
1740{
1741 struct fib6_node *fn;
1742 struct rt6_info *rt = NULL;
c71099ac
TG
1743 struct fib6_table *table;
1744
efa2cea0 1745 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1746 if (table == NULL)
1747 return NULL;
70ceb4f5 1748
c71099ac
TG
1749 write_lock_bh(&table->tb6_lock);
1750 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1751 if (!fn)
1752 goto out;
1753
d8d1f30b 1754 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1755 if (rt->rt6i_dev->ifindex != ifindex)
1756 continue;
1757 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1758 continue;
1759 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1760 continue;
d8d1f30b 1761 dst_hold(&rt->dst);
70ceb4f5
YH
1762 break;
1763 }
1764out:
c71099ac 1765 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1766 return rt;
1767}
1768
efa2cea0
DL
1769static struct rt6_info *rt6_add_route_info(struct net *net,
1770 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1771 struct in6_addr *gwaddr, int ifindex,
1772 unsigned pref)
1773{
86872cb5
TG
1774 struct fib6_config cfg = {
1775 .fc_table = RT6_TABLE_INFO,
238fc7ea 1776 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1777 .fc_ifindex = ifindex,
1778 .fc_dst_len = prefixlen,
1779 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1780 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1781 .fc_nlinfo.pid = 0,
1782 .fc_nlinfo.nlh = NULL,
1783 .fc_nlinfo.nl_net = net,
86872cb5
TG
1784 };
1785
1786 ipv6_addr_copy(&cfg.fc_dst, prefix);
1787 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1788
e317da96
YH
1789 /* We should treat it as a default route if prefix length is 0. */
1790 if (!prefixlen)
86872cb5 1791 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1792
86872cb5 1793 ip6_route_add(&cfg);
70ceb4f5 1794
efa2cea0 1795 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1796}
1797#endif
1798
1da177e4 1799struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1800{
1da177e4 1801 struct rt6_info *rt;
c71099ac 1802 struct fib6_table *table;
1da177e4 1803
c346dca1 1804 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1805 if (table == NULL)
1806 return NULL;
1da177e4 1807
c71099ac 1808 write_lock_bh(&table->tb6_lock);
d8d1f30b 1809 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1810 if (dev == rt->rt6i_dev &&
045927ff 1811 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1812 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1813 break;
1814 }
1815 if (rt)
d8d1f30b 1816 dst_hold(&rt->dst);
c71099ac 1817 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1818 return rt;
1819}
1820
1821struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1822 struct net_device *dev,
1823 unsigned int pref)
1da177e4 1824{
86872cb5
TG
1825 struct fib6_config cfg = {
1826 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1827 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1828 .fc_ifindex = dev->ifindex,
1829 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1830 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1831 .fc_nlinfo.pid = 0,
1832 .fc_nlinfo.nlh = NULL,
c346dca1 1833 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1834 };
1da177e4 1835
86872cb5 1836 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1837
86872cb5 1838 ip6_route_add(&cfg);
1da177e4 1839
1da177e4
LT
1840 return rt6_get_dflt_router(gwaddr, dev);
1841}
1842
7b4da532 1843void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1844{
1845 struct rt6_info *rt;
c71099ac
TG
1846 struct fib6_table *table;
1847
1848 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1849 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1850 if (table == NULL)
1851 return;
1da177e4
LT
1852
1853restart:
c71099ac 1854 read_lock_bh(&table->tb6_lock);
d8d1f30b 1855 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1856 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1857 dst_hold(&rt->dst);
c71099ac 1858 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1859 ip6_del_rt(rt);
1da177e4
LT
1860 goto restart;
1861 }
1862 }
c71099ac 1863 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1864}
1865
5578689a
DL
1866static void rtmsg_to_fib6_config(struct net *net,
1867 struct in6_rtmsg *rtmsg,
86872cb5
TG
1868 struct fib6_config *cfg)
1869{
1870 memset(cfg, 0, sizeof(*cfg));
1871
1872 cfg->fc_table = RT6_TABLE_MAIN;
1873 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1874 cfg->fc_metric = rtmsg->rtmsg_metric;
1875 cfg->fc_expires = rtmsg->rtmsg_info;
1876 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1877 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1878 cfg->fc_flags = rtmsg->rtmsg_flags;
1879
5578689a 1880 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1881
86872cb5
TG
1882 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1883 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1884 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1885}
1886
5578689a 1887int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1888{
86872cb5 1889 struct fib6_config cfg;
1da177e4
LT
1890 struct in6_rtmsg rtmsg;
1891 int err;
1892
1893 switch(cmd) {
1894 case SIOCADDRT: /* Add a route */
1895 case SIOCDELRT: /* Delete a route */
1896 if (!capable(CAP_NET_ADMIN))
1897 return -EPERM;
1898 err = copy_from_user(&rtmsg, arg,
1899 sizeof(struct in6_rtmsg));
1900 if (err)
1901 return -EFAULT;
86872cb5 1902
5578689a 1903 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1904
1da177e4
LT
1905 rtnl_lock();
1906 switch (cmd) {
1907 case SIOCADDRT:
86872cb5 1908 err = ip6_route_add(&cfg);
1da177e4
LT
1909 break;
1910 case SIOCDELRT:
86872cb5 1911 err = ip6_route_del(&cfg);
1da177e4
LT
1912 break;
1913 default:
1914 err = -EINVAL;
1915 }
1916 rtnl_unlock();
1917
1918 return err;
3ff50b79 1919 }
1da177e4
LT
1920
1921 return -EINVAL;
1922}
1923
1924/*
1925 * Drop the packet on the floor
1926 */
1927
d5fdd6ba 1928static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1929{
612f09e8 1930 int type;
adf30907 1931 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1932 switch (ipstats_mib_noroutes) {
1933 case IPSTATS_MIB_INNOROUTES:
0660e03f 1934 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1935 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1936 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1937 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1938 break;
1939 }
1940 /* FALLTHROUGH */
1941 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1942 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1943 ipstats_mib_noroutes);
612f09e8
YH
1944 break;
1945 }
3ffe533c 1946 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1947 kfree_skb(skb);
1948 return 0;
1949}
1950
9ce8ade0
TG
1951static int ip6_pkt_discard(struct sk_buff *skb)
1952{
612f09e8 1953 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1954}
1955
20380731 1956static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1957{
adf30907 1958 skb->dev = skb_dst(skb)->dev;
612f09e8 1959 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1960}
1961
6723ab54
DM
1962#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1963
9ce8ade0
TG
1964static int ip6_pkt_prohibit(struct sk_buff *skb)
1965{
612f09e8 1966 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1967}
1968
1969static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1970{
adf30907 1971 skb->dev = skb_dst(skb)->dev;
612f09e8 1972 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1973}
1974
6723ab54
DM
1975#endif
1976
1da177e4
LT
1977/*
1978 * Allocate a dst for local (unicast / anycast) address.
1979 */
1980
1981struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1982 const struct in6_addr *addr,
1983 int anycast)
1984{
c346dca1 1985 struct net *net = dev_net(idev->dev);
86393e52 1986 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1987 struct neighbour *neigh;
1da177e4 1988
40385653
BG
1989 if (rt == NULL) {
1990 if (net_ratelimit())
1991 pr_warning("IPv6: Maximum number of routes reached,"
1992 " consider increasing route/max_size.\n");
1da177e4 1993 return ERR_PTR(-ENOMEM);
40385653 1994 }
1da177e4 1995
5578689a 1996 dev_hold(net->loopback_dev);
1da177e4
LT
1997 in6_dev_hold(idev);
1998
d8d1f30b
CG
1999 rt->dst.flags = DST_HOST;
2000 rt->dst.input = ip6_input;
2001 rt->dst.output = ip6_output;
5578689a 2002 rt->rt6i_dev = net->loopback_dev;
1da177e4 2003 rt->rt6i_idev = idev;
defb3519 2004 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 2005 rt->dst.obsolete = -1;
1da177e4
LT
2006
2007 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2008 if (anycast)
2009 rt->rt6i_flags |= RTF_ANYCAST;
2010 else
1da177e4 2011 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2012 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2013 if (IS_ERR(neigh)) {
d8d1f30b 2014 dst_free(&rt->dst);
14deae41
DM
2015
2016 /* We are casting this because that is the return
2017 * value type. But an errno encoded pointer is the
2018 * same regardless of the underlying pointer type,
2019 * and that's what we are returning. So this is OK.
2020 */
2021 return (struct rt6_info *) neigh;
1da177e4 2022 }
14deae41 2023 rt->rt6i_nexthop = neigh;
1da177e4
LT
2024
2025 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2026 rt->rt6i_dst.plen = 128;
5578689a 2027 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2028
d8d1f30b 2029 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2030
2031 return rt;
2032}
2033
8ed67789
DL
2034struct arg_dev_net {
2035 struct net_device *dev;
2036 struct net *net;
2037};
2038
1da177e4
LT
2039static int fib6_ifdown(struct rt6_info *rt, void *arg)
2040{
bc3ef660 2041 const struct arg_dev_net *adn = arg;
2042 const struct net_device *dev = adn->dev;
8ed67789 2043
bc3ef660 2044 if ((rt->rt6i_dev == dev || dev == NULL) &&
2045 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2046 RT6_TRACE("deleted by ifdown %p\n", rt);
2047 return -1;
2048 }
2049 return 0;
2050}
2051
f3db4851 2052void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2053{
8ed67789
DL
2054 struct arg_dev_net adn = {
2055 .dev = dev,
2056 .net = net,
2057 };
2058
2059 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2060 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2061}
2062
2063struct rt6_mtu_change_arg
2064{
2065 struct net_device *dev;
2066 unsigned mtu;
2067};
2068
2069static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2070{
2071 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2072 struct inet6_dev *idev;
2073
2074 /* In IPv6 pmtu discovery is not optional,
2075 so that RTAX_MTU lock cannot disable it.
2076 We still use this lock to block changes
2077 caused by addrconf/ndisc.
2078 */
2079
2080 idev = __in6_dev_get(arg->dev);
2081 if (idev == NULL)
2082 return 0;
2083
2084 /* For administrative MTU increase, there is no way to discover
2085 IPv6 PMTU increase, so PMTU increase should be updated here.
2086 Since RFC 1981 doesn't include administrative MTU increase
2087 update PMTU increase is a MUST. (i.e. jumbo frame)
2088 */
2089 /*
2090 If new MTU is less than route PMTU, this new MTU will be the
2091 lowest MTU in the path, update the route PMTU to reflect PMTU
2092 decreases; if new MTU is greater than route PMTU, and the
2093 old MTU is the lowest MTU in the path, update the route PMTU
2094 to reflect the increase. In this case if the other nodes' MTU
2095 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2096 PMTU discouvery.
2097 */
2098 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2099 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2100 (dst_mtu(&rt->dst) >= arg->mtu ||
2101 (dst_mtu(&rt->dst) < arg->mtu &&
2102 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2103 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2104 }
1da177e4
LT
2105 return 0;
2106}
2107
2108void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2109{
c71099ac
TG
2110 struct rt6_mtu_change_arg arg = {
2111 .dev = dev,
2112 .mtu = mtu,
2113 };
1da177e4 2114
c346dca1 2115 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2116}
2117
ef7c79ed 2118static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2119 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2120 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2121 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2122 [RTA_PRIORITY] = { .type = NLA_U32 },
2123 [RTA_METRICS] = { .type = NLA_NESTED },
2124};
2125
2126static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2127 struct fib6_config *cfg)
1da177e4 2128{
86872cb5
TG
2129 struct rtmsg *rtm;
2130 struct nlattr *tb[RTA_MAX+1];
2131 int err;
1da177e4 2132
86872cb5
TG
2133 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2134 if (err < 0)
2135 goto errout;
1da177e4 2136
86872cb5
TG
2137 err = -EINVAL;
2138 rtm = nlmsg_data(nlh);
2139 memset(cfg, 0, sizeof(*cfg));
2140
2141 cfg->fc_table = rtm->rtm_table;
2142 cfg->fc_dst_len = rtm->rtm_dst_len;
2143 cfg->fc_src_len = rtm->rtm_src_len;
2144 cfg->fc_flags = RTF_UP;
2145 cfg->fc_protocol = rtm->rtm_protocol;
2146
2147 if (rtm->rtm_type == RTN_UNREACHABLE)
2148 cfg->fc_flags |= RTF_REJECT;
2149
ab79ad14
2150 if (rtm->rtm_type == RTN_LOCAL)
2151 cfg->fc_flags |= RTF_LOCAL;
2152
86872cb5
TG
2153 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2154 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2155 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2156
2157 if (tb[RTA_GATEWAY]) {
2158 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2159 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2160 }
86872cb5
TG
2161
2162 if (tb[RTA_DST]) {
2163 int plen = (rtm->rtm_dst_len + 7) >> 3;
2164
2165 if (nla_len(tb[RTA_DST]) < plen)
2166 goto errout;
2167
2168 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2169 }
86872cb5
TG
2170
2171 if (tb[RTA_SRC]) {
2172 int plen = (rtm->rtm_src_len + 7) >> 3;
2173
2174 if (nla_len(tb[RTA_SRC]) < plen)
2175 goto errout;
2176
2177 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2178 }
86872cb5
TG
2179
2180 if (tb[RTA_OIF])
2181 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2182
2183 if (tb[RTA_PRIORITY])
2184 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2185
2186 if (tb[RTA_METRICS]) {
2187 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2188 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2189 }
86872cb5
TG
2190
2191 if (tb[RTA_TABLE])
2192 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2193
2194 err = 0;
2195errout:
2196 return err;
1da177e4
LT
2197}
2198
c127ea2c 2199static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2200{
86872cb5
TG
2201 struct fib6_config cfg;
2202 int err;
1da177e4 2203
86872cb5
TG
2204 err = rtm_to_fib6_config(skb, nlh, &cfg);
2205 if (err < 0)
2206 return err;
2207
2208 return ip6_route_del(&cfg);
1da177e4
LT
2209}
2210
c127ea2c 2211static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2212{
86872cb5
TG
2213 struct fib6_config cfg;
2214 int err;
1da177e4 2215
86872cb5
TG
2216 err = rtm_to_fib6_config(skb, nlh, &cfg);
2217 if (err < 0)
2218 return err;
2219
2220 return ip6_route_add(&cfg);
1da177e4
LT
2221}
2222
339bf98f
TG
2223static inline size_t rt6_nlmsg_size(void)
2224{
2225 return NLMSG_ALIGN(sizeof(struct rtmsg))
2226 + nla_total_size(16) /* RTA_SRC */
2227 + nla_total_size(16) /* RTA_DST */
2228 + nla_total_size(16) /* RTA_GATEWAY */
2229 + nla_total_size(16) /* RTA_PREFSRC */
2230 + nla_total_size(4) /* RTA_TABLE */
2231 + nla_total_size(4) /* RTA_IIF */
2232 + nla_total_size(4) /* RTA_OIF */
2233 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2234 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2235 + nla_total_size(sizeof(struct rta_cacheinfo));
2236}
2237
191cd582
BH
2238static int rt6_fill_node(struct net *net,
2239 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2240 struct in6_addr *dst, struct in6_addr *src,
2241 int iif, int type, u32 pid, u32 seq,
7bc570c8 2242 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2243{
2244 struct rtmsg *rtm;
2d7202bf 2245 struct nlmsghdr *nlh;
e3703b3d 2246 long expires;
9e762a4a 2247 u32 table;
1da177e4
LT
2248
2249 if (prefix) { /* user wants prefix routes only */
2250 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2251 /* success since this is not a prefix route */
2252 return 1;
2253 }
2254 }
2255
2d7202bf
TG
2256 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2257 if (nlh == NULL)
26932566 2258 return -EMSGSIZE;
2d7202bf
TG
2259
2260 rtm = nlmsg_data(nlh);
1da177e4
LT
2261 rtm->rtm_family = AF_INET6;
2262 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2263 rtm->rtm_src_len = rt->rt6i_src.plen;
2264 rtm->rtm_tos = 0;
c71099ac 2265 if (rt->rt6i_table)
9e762a4a 2266 table = rt->rt6i_table->tb6_id;
c71099ac 2267 else
9e762a4a
PM
2268 table = RT6_TABLE_UNSPEC;
2269 rtm->rtm_table = table;
2d7202bf 2270 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2271 if (rt->rt6i_flags&RTF_REJECT)
2272 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2273 else if (rt->rt6i_flags&RTF_LOCAL)
2274 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2275 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2276 rtm->rtm_type = RTN_LOCAL;
2277 else
2278 rtm->rtm_type = RTN_UNICAST;
2279 rtm->rtm_flags = 0;
2280 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2281 rtm->rtm_protocol = rt->rt6i_protocol;
2282 if (rt->rt6i_flags&RTF_DYNAMIC)
2283 rtm->rtm_protocol = RTPROT_REDIRECT;
2284 else if (rt->rt6i_flags & RTF_ADDRCONF)
2285 rtm->rtm_protocol = RTPROT_KERNEL;
2286 else if (rt->rt6i_flags&RTF_DEFAULT)
2287 rtm->rtm_protocol = RTPROT_RA;
2288
2289 if (rt->rt6i_flags&RTF_CACHE)
2290 rtm->rtm_flags |= RTM_F_CLONED;
2291
2292 if (dst) {
2d7202bf 2293 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2294 rtm->rtm_dst_len = 128;
1da177e4 2295 } else if (rtm->rtm_dst_len)
2d7202bf 2296 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2297#ifdef CONFIG_IPV6_SUBTREES
2298 if (src) {
2d7202bf 2299 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2300 rtm->rtm_src_len = 128;
1da177e4 2301 } else if (rtm->rtm_src_len)
2d7202bf 2302 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2303#endif
7bc570c8
YH
2304 if (iif) {
2305#ifdef CONFIG_IPV6_MROUTE
2306 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2307 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2308 if (err <= 0) {
2309 if (!nowait) {
2310 if (err == 0)
2311 return 0;
2312 goto nla_put_failure;
2313 } else {
2314 if (err == -EMSGSIZE)
2315 goto nla_put_failure;
2316 }
2317 }
2318 } else
2319#endif
2320 NLA_PUT_U32(skb, RTA_IIF, iif);
2321 } else if (dst) {
d8d1f30b 2322 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2323 struct in6_addr saddr_buf;
191cd582 2324 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2325 dst, 0, &saddr_buf) == 0)
2d7202bf 2326 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2327 }
2d7202bf 2328
defb3519 2329 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2330 goto nla_put_failure;
2331
d8d1f30b
CG
2332 if (rt->dst.neighbour)
2333 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2334
d8d1f30b 2335 if (rt->dst.dev)
2d7202bf
TG
2336 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2337
2338 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2339
36e3deae
YH
2340 if (!(rt->rt6i_flags & RTF_EXPIRES))
2341 expires = 0;
2342 else if (rt->rt6i_expires - jiffies < INT_MAX)
2343 expires = rt->rt6i_expires - jiffies;
2344 else
2345 expires = INT_MAX;
69cdf8f9 2346
d8d1f30b
CG
2347 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2348 expires, rt->dst.error) < 0)
e3703b3d 2349 goto nla_put_failure;
2d7202bf
TG
2350
2351 return nlmsg_end(skb, nlh);
2352
2353nla_put_failure:
26932566
PM
2354 nlmsg_cancel(skb, nlh);
2355 return -EMSGSIZE;
1da177e4
LT
2356}
2357
1b43af54 2358int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2359{
2360 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2361 int prefix;
2362
2d7202bf
TG
2363 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2364 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2365 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2366 } else
2367 prefix = 0;
2368
191cd582
BH
2369 return rt6_fill_node(arg->net,
2370 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2371 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2372 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2373}
2374
c127ea2c 2375static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2376{
3b1e0a65 2377 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2378 struct nlattr *tb[RTA_MAX+1];
2379 struct rt6_info *rt;
1da177e4 2380 struct sk_buff *skb;
ab364a6f 2381 struct rtmsg *rtm;
1da177e4 2382 struct flowi fl;
ab364a6f 2383 int err, iif = 0;
1da177e4 2384
ab364a6f
TG
2385 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2386 if (err < 0)
2387 goto errout;
1da177e4 2388
ab364a6f 2389 err = -EINVAL;
1da177e4 2390 memset(&fl, 0, sizeof(fl));
1da177e4 2391
ab364a6f
TG
2392 if (tb[RTA_SRC]) {
2393 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2394 goto errout;
2395
2396 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2397 }
2398
2399 if (tb[RTA_DST]) {
2400 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2401 goto errout;
2402
2403 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2404 }
2405
2406 if (tb[RTA_IIF])
2407 iif = nla_get_u32(tb[RTA_IIF]);
2408
2409 if (tb[RTA_OIF])
2410 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2411
2412 if (iif) {
2413 struct net_device *dev;
5578689a 2414 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2415 if (!dev) {
2416 err = -ENODEV;
ab364a6f 2417 goto errout;
1da177e4
LT
2418 }
2419 }
2420
ab364a6f
TG
2421 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2422 if (skb == NULL) {
2423 err = -ENOBUFS;
2424 goto errout;
2425 }
1da177e4 2426
ab364a6f
TG
2427 /* Reserve room for dummy headers, this skb can pass
2428 through good chunk of routing engine.
2429 */
459a98ed 2430 skb_reset_mac_header(skb);
ab364a6f 2431 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2432
8a3edd80 2433 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2434 skb_dst_set(skb, &rt->dst);
1da177e4 2435
191cd582 2436 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2437 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2438 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2439 if (err < 0) {
ab364a6f
TG
2440 kfree_skb(skb);
2441 goto errout;
1da177e4
LT
2442 }
2443
5578689a 2444 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2445errout:
1da177e4 2446 return err;
1da177e4
LT
2447}
2448
86872cb5 2449void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2450{
2451 struct sk_buff *skb;
5578689a 2452 struct net *net = info->nl_net;
528c4ceb
DL
2453 u32 seq;
2454 int err;
2455
2456 err = -ENOBUFS;
2457 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2458
339bf98f 2459 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2460 if (skb == NULL)
2461 goto errout;
2462
191cd582 2463 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2464 event, info->pid, seq, 0, 0, 0);
26932566
PM
2465 if (err < 0) {
2466 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2467 WARN_ON(err == -EMSGSIZE);
2468 kfree_skb(skb);
2469 goto errout;
2470 }
1ce85fe4
PNA
2471 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2472 info->nlh, gfp_any());
2473 return;
21713ebc
TG
2474errout:
2475 if (err < 0)
5578689a 2476 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2477}
2478
8ed67789
DL
2479static int ip6_route_dev_notify(struct notifier_block *this,
2480 unsigned long event, void *data)
2481{
2482 struct net_device *dev = (struct net_device *)data;
c346dca1 2483 struct net *net = dev_net(dev);
8ed67789
DL
2484
2485 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2486 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2487 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2488#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2489 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2490 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2491 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2492 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2493#endif
2494 }
2495
2496 return NOTIFY_OK;
2497}
2498
1da177e4
LT
2499/*
2500 * /proc
2501 */
2502
2503#ifdef CONFIG_PROC_FS
2504
1da177e4
LT
2505struct rt6_proc_arg
2506{
2507 char *buffer;
2508 int offset;
2509 int length;
2510 int skip;
2511 int len;
2512};
2513
2514static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2515{
33120b30 2516 struct seq_file *m = p_arg;
1da177e4 2517
4b7a4274 2518 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2519
2520#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2521 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2522#else
33120b30 2523 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2524#endif
2525
2526 if (rt->rt6i_nexthop) {
4b7a4274 2527 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2528 } else {
33120b30 2529 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2530 }
33120b30 2531 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2532 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2533 rt->dst.__use, rt->rt6i_flags,
33120b30 2534 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2535 return 0;
2536}
2537
33120b30 2538static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2539{
f3db4851
DL
2540 struct net *net = (struct net *)m->private;
2541 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2542 return 0;
2543}
1da177e4 2544
33120b30
AD
2545static int ipv6_route_open(struct inode *inode, struct file *file)
2546{
de05c557 2547 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2548}
2549
33120b30
AD
2550static const struct file_operations ipv6_route_proc_fops = {
2551 .owner = THIS_MODULE,
2552 .open = ipv6_route_open,
2553 .read = seq_read,
2554 .llseek = seq_lseek,
b6fcbdb4 2555 .release = single_release_net,
33120b30
AD
2556};
2557
1da177e4
LT
2558static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2559{
69ddb805 2560 struct net *net = (struct net *)seq->private;
1da177e4 2561 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2562 net->ipv6.rt6_stats->fib_nodes,
2563 net->ipv6.rt6_stats->fib_route_nodes,
2564 net->ipv6.rt6_stats->fib_rt_alloc,
2565 net->ipv6.rt6_stats->fib_rt_entries,
2566 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2567 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2568 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2569
2570 return 0;
2571}
2572
2573static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2574{
de05c557 2575 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2576}
2577
9a32144e 2578static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2579 .owner = THIS_MODULE,
2580 .open = rt6_stats_seq_open,
2581 .read = seq_read,
2582 .llseek = seq_lseek,
b6fcbdb4 2583 .release = single_release_net,
1da177e4
LT
2584};
2585#endif /* CONFIG_PROC_FS */
2586
2587#ifdef CONFIG_SYSCTL
2588
1da177e4 2589static
8d65af78 2590int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2591 void __user *buffer, size_t *lenp, loff_t *ppos)
2592{
5b7c931d
DL
2593 struct net *net = current->nsproxy->net_ns;
2594 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2595 if (write) {
8d65af78 2596 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2597 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2598 return 0;
2599 } else
2600 return -EINVAL;
2601}
2602
760f2d01 2603ctl_table ipv6_route_table_template[] = {
1ab1457c 2604 {
1da177e4 2605 .procname = "flush",
4990509f 2606 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2607 .maxlen = sizeof(int),
89c8b3a1 2608 .mode = 0200,
6d9f239a 2609 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2610 },
2611 {
1da177e4 2612 .procname = "gc_thresh",
9a7ec3a9 2613 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2614 .maxlen = sizeof(int),
2615 .mode = 0644,
6d9f239a 2616 .proc_handler = proc_dointvec,
1da177e4
LT
2617 },
2618 {
1da177e4 2619 .procname = "max_size",
4990509f 2620 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2621 .maxlen = sizeof(int),
2622 .mode = 0644,
6d9f239a 2623 .proc_handler = proc_dointvec,
1da177e4
LT
2624 },
2625 {
1da177e4 2626 .procname = "gc_min_interval",
4990509f 2627 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2628 .maxlen = sizeof(int),
2629 .mode = 0644,
6d9f239a 2630 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2631 },
2632 {
1da177e4 2633 .procname = "gc_timeout",
4990509f 2634 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2635 .maxlen = sizeof(int),
2636 .mode = 0644,
6d9f239a 2637 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2638 },
2639 {
1da177e4 2640 .procname = "gc_interval",
4990509f 2641 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2642 .maxlen = sizeof(int),
2643 .mode = 0644,
6d9f239a 2644 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2645 },
2646 {
1da177e4 2647 .procname = "gc_elasticity",
4990509f 2648 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2649 .maxlen = sizeof(int),
2650 .mode = 0644,
f3d3f616 2651 .proc_handler = proc_dointvec,
1da177e4
LT
2652 },
2653 {
1da177e4 2654 .procname = "mtu_expires",
4990509f 2655 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2656 .maxlen = sizeof(int),
2657 .mode = 0644,
6d9f239a 2658 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2659 },
2660 {
1da177e4 2661 .procname = "min_adv_mss",
4990509f 2662 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2663 .maxlen = sizeof(int),
2664 .mode = 0644,
f3d3f616 2665 .proc_handler = proc_dointvec,
1da177e4
LT
2666 },
2667 {
1da177e4 2668 .procname = "gc_min_interval_ms",
4990509f 2669 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2670 .maxlen = sizeof(int),
2671 .mode = 0644,
6d9f239a 2672 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2673 },
f8572d8f 2674 { }
1da177e4
LT
2675};
2676
2c8c1e72 2677struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2678{
2679 struct ctl_table *table;
2680
2681 table = kmemdup(ipv6_route_table_template,
2682 sizeof(ipv6_route_table_template),
2683 GFP_KERNEL);
5ee09105
YH
2684
2685 if (table) {
2686 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2687 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2688 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2689 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2690 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2691 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2692 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2693 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2694 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2695 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2696 }
2697
760f2d01
DL
2698 return table;
2699}
1da177e4
LT
2700#endif
2701
2c8c1e72 2702static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2703{
633d424b 2704 int ret = -ENOMEM;
8ed67789 2705
86393e52
AD
2706 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2707 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2708
fc66f95c
ED
2709 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2710 goto out_ip6_dst_ops;
2711
8ed67789
DL
2712 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2713 sizeof(*net->ipv6.ip6_null_entry),
2714 GFP_KERNEL);
2715 if (!net->ipv6.ip6_null_entry)
fc66f95c 2716 goto out_ip6_dst_entries;
d8d1f30b 2717 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2718 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2719 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2720 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2721 ip6_template_metrics, true);
8ed67789
DL
2722
2723#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2724 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2725 sizeof(*net->ipv6.ip6_prohibit_entry),
2726 GFP_KERNEL);
68fffc67
PZ
2727 if (!net->ipv6.ip6_prohibit_entry)
2728 goto out_ip6_null_entry;
d8d1f30b 2729 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2730 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2731 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2732 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2733 ip6_template_metrics, true);
8ed67789
DL
2734
2735 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2736 sizeof(*net->ipv6.ip6_blk_hole_entry),
2737 GFP_KERNEL);
68fffc67
PZ
2738 if (!net->ipv6.ip6_blk_hole_entry)
2739 goto out_ip6_prohibit_entry;
d8d1f30b 2740 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2741 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2742 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2743 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2744 ip6_template_metrics, true);
8ed67789
DL
2745#endif
2746
b339a47c
PZ
2747 net->ipv6.sysctl.flush_delay = 0;
2748 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2749 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2750 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2751 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2752 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2753 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2754 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2755
cdb18761
DL
2756#ifdef CONFIG_PROC_FS
2757 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2758 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2759#endif
6891a346
BT
2760 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2761
8ed67789
DL
2762 ret = 0;
2763out:
2764 return ret;
f2fc6a54 2765
68fffc67
PZ
2766#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2767out_ip6_prohibit_entry:
2768 kfree(net->ipv6.ip6_prohibit_entry);
2769out_ip6_null_entry:
2770 kfree(net->ipv6.ip6_null_entry);
2771#endif
fc66f95c
ED
2772out_ip6_dst_entries:
2773 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2774out_ip6_dst_ops:
f2fc6a54 2775 goto out;
cdb18761
DL
2776}
2777
2c8c1e72 2778static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2779{
2780#ifdef CONFIG_PROC_FS
2781 proc_net_remove(net, "ipv6_route");
2782 proc_net_remove(net, "rt6_stats");
2783#endif
8ed67789
DL
2784 kfree(net->ipv6.ip6_null_entry);
2785#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2786 kfree(net->ipv6.ip6_prohibit_entry);
2787 kfree(net->ipv6.ip6_blk_hole_entry);
2788#endif
41bb78b4 2789 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2790}
2791
2792static struct pernet_operations ip6_route_net_ops = {
2793 .init = ip6_route_net_init,
2794 .exit = ip6_route_net_exit,
2795};
2796
8ed67789
DL
2797static struct notifier_block ip6_route_dev_notifier = {
2798 .notifier_call = ip6_route_dev_notify,
2799 .priority = 0,
2800};
2801
433d49c3 2802int __init ip6_route_init(void)
1da177e4 2803{
433d49c3
DL
2804 int ret;
2805
9a7ec3a9
DL
2806 ret = -ENOMEM;
2807 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2808 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2809 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2810 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2811 goto out;
14e50e57 2812
fc66f95c 2813 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2814 if (ret)
bdb3289f 2815 goto out_kmem_cache;
bdb3289f 2816
fc66f95c
ED
2817 ret = register_pernet_subsys(&ip6_route_net_ops);
2818 if (ret)
2819 goto out_dst_entries;
2820
5dc121e9
AE
2821 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2822
8ed67789
DL
2823 /* Registering of the loopback is done before this portion of code,
2824 * the loopback reference in rt6_info will not be taken, do it
2825 * manually for init_net */
d8d1f30b 2826 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2827 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2828 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2829 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2830 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2831 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2832 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2833 #endif
433d49c3
DL
2834 ret = fib6_init();
2835 if (ret)
8ed67789 2836 goto out_register_subsys;
433d49c3 2837
433d49c3
DL
2838 ret = xfrm6_init();
2839 if (ret)
cdb18761 2840 goto out_fib6_init;
c35b7e72 2841
433d49c3
DL
2842 ret = fib6_rules_init();
2843 if (ret)
2844 goto xfrm6_init;
7e5449c2 2845
433d49c3
DL
2846 ret = -ENOBUFS;
2847 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2848 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2849 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2850 goto fib6_rules_init;
c127ea2c 2851
8ed67789 2852 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2853 if (ret)
2854 goto fib6_rules_init;
8ed67789 2855
433d49c3
DL
2856out:
2857 return ret;
2858
2859fib6_rules_init:
433d49c3
DL
2860 fib6_rules_cleanup();
2861xfrm6_init:
433d49c3 2862 xfrm6_fini();
433d49c3 2863out_fib6_init:
433d49c3 2864 fib6_gc_cleanup();
8ed67789
DL
2865out_register_subsys:
2866 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2867out_dst_entries:
2868 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2869out_kmem_cache:
f2fc6a54 2870 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2871 goto out;
1da177e4
LT
2872}
2873
2874void ip6_route_cleanup(void)
2875{
8ed67789 2876 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2877 fib6_rules_cleanup();
1da177e4 2878 xfrm6_fini();
1da177e4 2879 fib6_gc_cleanup();
8ed67789 2880 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2881 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2882 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2883}
This page took 0.842808 seconds and 5 git commands to generate.