net: Make dst_alloc() take more explicit initializations.
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 91static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 94 unsigned pref);
efa2cea0 95static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
0972ddb2
HB
156static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 unsigned long old)
158{
159 return NULL;
160}
161
14e50e57
DM
162static struct dst_ops ip6_dst_blackhole_ops = {
163 .family = AF_INET6,
09640e63 164 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
165 .destroy = ip6_dst_destroy,
166 .check = ip6_dst_check,
ec831ea7 167 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 168 .default_advmss = ip6_default_advmss,
14e50e57 169 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 170 .cow_metrics = ip6_rt_blackhole_cow_metrics,
14e50e57
DM
171};
172
62fa8a84
DM
173static const u32 ip6_template_metrics[RTAX_MAX] = {
174 [RTAX_HOPLIMIT - 1] = 255,
175};
176
bdb3289f 177static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
178 .dst = {
179 .__refcnt = ATOMIC_INIT(1),
180 .__use = 1,
181 .obsolete = -1,
182 .error = -ENETUNREACH,
d8d1f30b
CG
183 .input = ip6_pkt_discard,
184 .output = ip6_pkt_discard_out,
1da177e4
LT
185 },
186 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 187 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
190};
191
101367c2
TG
192#ifdef CONFIG_IPV6_MULTIPLE_TABLES
193
6723ab54
DM
194static int ip6_pkt_prohibit(struct sk_buff *skb);
195static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 196
280a34c8 197static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
198 .dst = {
199 .__refcnt = ATOMIC_INIT(1),
200 .__use = 1,
201 .obsolete = -1,
202 .error = -EACCES,
d8d1f30b
CG
203 .input = ip6_pkt_prohibit,
204 .output = ip6_pkt_prohibit_out,
101367c2
TG
205 },
206 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 207 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
208 .rt6i_metric = ~(u32) 0,
209 .rt6i_ref = ATOMIC_INIT(1),
210};
211
bdb3289f 212static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
213 .dst = {
214 .__refcnt = ATOMIC_INIT(1),
215 .__use = 1,
216 .obsolete = -1,
217 .error = -EINVAL,
d8d1f30b
CG
218 .input = dst_discard,
219 .output = dst_discard,
101367c2
TG
220 },
221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 222 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
223 .rt6i_metric = ~(u32) 0,
224 .rt6i_ref = ATOMIC_INIT(1),
225};
226
227#endif
228
1da177e4 229/* allocate dst with ip6_dst_ops */
5c1e6aa3
DM
230static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
231 struct net_device *dev)
1da177e4 232{
5c1e6aa3 233 return (struct rt6_info *)dst_alloc(ops, dev, 0, 0, 0);
1da177e4
LT
234}
235
236static void ip6_dst_destroy(struct dst_entry *dst)
237{
238 struct rt6_info *rt = (struct rt6_info *)dst;
239 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 240 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
241
242 if (idev != NULL) {
243 rt->rt6i_idev = NULL;
244 in6_dev_put(idev);
1ab1457c 245 }
b3419363 246 if (peer) {
b3419363
DM
247 rt->rt6i_peer = NULL;
248 inet_putpeer(peer);
249 }
250}
251
6431cbc2
DM
252static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
253
254static u32 rt6_peer_genid(void)
255{
256 return atomic_read(&__rt6_peer_genid);
257}
258
b3419363
DM
259void rt6_bind_peer(struct rt6_info *rt, int create)
260{
261 struct inet_peer *peer;
262
b3419363
DM
263 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
264 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
265 inet_putpeer(peer);
6431cbc2
DM
266 else
267 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
268}
269
270static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
271 int how)
272{
273 struct rt6_info *rt = (struct rt6_info *)dst;
274 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 275 struct net_device *loopback_dev =
c346dca1 276 dev_net(dev)->loopback_dev;
1da177e4 277
5a3e55d6
DL
278 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
279 struct inet6_dev *loopback_idev =
280 in6_dev_get(loopback_dev);
1da177e4
LT
281 if (loopback_idev != NULL) {
282 rt->rt6i_idev = loopback_idev;
283 in6_dev_put(idev);
284 }
285 }
286}
287
288static __inline__ int rt6_check_expired(const struct rt6_info *rt)
289{
a02cec21
ED
290 return (rt->rt6i_flags & RTF_EXPIRES) &&
291 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
292}
293
b71d1d42 294static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 295{
a02cec21
ED
296 return ipv6_addr_type(daddr) &
297 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
298}
299
1da177e4 300/*
c71099ac 301 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
302 */
303
8ed67789
DL
304static inline struct rt6_info *rt6_device_match(struct net *net,
305 struct rt6_info *rt,
b71d1d42 306 const struct in6_addr *saddr,
1da177e4 307 int oif,
d420895e 308 int flags)
1da177e4
LT
309{
310 struct rt6_info *local = NULL;
311 struct rt6_info *sprt;
312
dd3abc4e
YH
313 if (!oif && ipv6_addr_any(saddr))
314 goto out;
315
d8d1f30b 316 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
317 struct net_device *dev = sprt->rt6i_dev;
318
319 if (oif) {
1da177e4
LT
320 if (dev->ifindex == oif)
321 return sprt;
322 if (dev->flags & IFF_LOOPBACK) {
323 if (sprt->rt6i_idev == NULL ||
324 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 325 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 326 continue;
1ab1457c 327 if (local && (!oif ||
1da177e4
LT
328 local->rt6i_idev->dev->ifindex == oif))
329 continue;
330 }
331 local = sprt;
332 }
dd3abc4e
YH
333 } else {
334 if (ipv6_chk_addr(net, saddr, dev,
335 flags & RT6_LOOKUP_F_IFACE))
336 return sprt;
1da177e4 337 }
dd3abc4e 338 }
1da177e4 339
dd3abc4e 340 if (oif) {
1da177e4
LT
341 if (local)
342 return local;
343
d420895e 344 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 345 return net->ipv6.ip6_null_entry;
1da177e4 346 }
dd3abc4e 347out:
1da177e4
LT
348 return rt;
349}
350
27097255
YH
351#ifdef CONFIG_IPV6_ROUTER_PREF
352static void rt6_probe(struct rt6_info *rt)
353{
354 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
355 /*
356 * Okay, this does not seem to be appropriate
357 * for now, however, we need to check if it
358 * is really so; aka Router Reachability Probing.
359 *
360 * Router Reachability Probe MUST be rate-limited
361 * to no more than one per minute.
362 */
363 if (!neigh || (neigh->nud_state & NUD_VALID))
364 return;
365 read_lock_bh(&neigh->lock);
366 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 367 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
368 struct in6_addr mcaddr;
369 struct in6_addr *target;
370
371 neigh->updated = jiffies;
372 read_unlock_bh(&neigh->lock);
373
374 target = (struct in6_addr *)&neigh->primary_key;
375 addrconf_addr_solict_mult(target, &mcaddr);
376 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
377 } else
378 read_unlock_bh(&neigh->lock);
379}
380#else
381static inline void rt6_probe(struct rt6_info *rt)
382{
27097255
YH
383}
384#endif
385
1da177e4 386/*
554cfb7e 387 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 388 */
b6f99a21 389static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
390{
391 struct net_device *dev = rt->rt6i_dev;
161980f4 392 if (!oif || dev->ifindex == oif)
554cfb7e 393 return 2;
161980f4
DM
394 if ((dev->flags & IFF_LOOPBACK) &&
395 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
396 return 1;
397 return 0;
554cfb7e 398}
1da177e4 399
b6f99a21 400static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 401{
554cfb7e 402 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 403 int m;
4d0c5911
YH
404 if (rt->rt6i_flags & RTF_NONEXTHOP ||
405 !(rt->rt6i_flags & RTF_GATEWAY))
406 m = 1;
407 else if (neigh) {
554cfb7e
YH
408 read_lock_bh(&neigh->lock);
409 if (neigh->nud_state & NUD_VALID)
4d0c5911 410 m = 2;
398bcbeb
YH
411#ifdef CONFIG_IPV6_ROUTER_PREF
412 else if (neigh->nud_state & NUD_FAILED)
413 m = 0;
414#endif
415 else
ea73ee23 416 m = 1;
554cfb7e 417 read_unlock_bh(&neigh->lock);
398bcbeb
YH
418 } else
419 m = 0;
554cfb7e 420 return m;
1da177e4
LT
421}
422
554cfb7e
YH
423static int rt6_score_route(struct rt6_info *rt, int oif,
424 int strict)
1da177e4 425{
4d0c5911 426 int m, n;
1ab1457c 427
4d0c5911 428 m = rt6_check_dev(rt, oif);
77d16f45 429 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 430 return -1;
ebacaaa0
YH
431#ifdef CONFIG_IPV6_ROUTER_PREF
432 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
433#endif
4d0c5911 434 n = rt6_check_neigh(rt);
557e92ef 435 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
436 return -1;
437 return m;
438}
439
f11e6659
DM
440static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
441 int *mpri, struct rt6_info *match)
554cfb7e 442{
f11e6659
DM
443 int m;
444
445 if (rt6_check_expired(rt))
446 goto out;
447
448 m = rt6_score_route(rt, oif, strict);
449 if (m < 0)
450 goto out;
451
452 if (m > *mpri) {
453 if (strict & RT6_LOOKUP_F_REACHABLE)
454 rt6_probe(match);
455 *mpri = m;
456 match = rt;
457 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
458 rt6_probe(rt);
459 }
460
461out:
462 return match;
463}
464
465static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
466 struct rt6_info *rr_head,
467 u32 metric, int oif, int strict)
468{
469 struct rt6_info *rt, *match;
554cfb7e 470 int mpri = -1;
1da177e4 471
f11e6659
DM
472 match = NULL;
473 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 474 rt = rt->dst.rt6_next)
f11e6659
DM
475 match = find_match(rt, oif, strict, &mpri, match);
476 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 477 rt = rt->dst.rt6_next)
f11e6659 478 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 479
f11e6659
DM
480 return match;
481}
1da177e4 482
f11e6659
DM
483static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
484{
485 struct rt6_info *match, *rt0;
8ed67789 486 struct net *net;
1da177e4 487
f11e6659 488 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 489 __func__, fn->leaf, oif);
554cfb7e 490
f11e6659
DM
491 rt0 = fn->rr_ptr;
492 if (!rt0)
493 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 494
f11e6659 495 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 496
554cfb7e 497 if (!match &&
f11e6659 498 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 499 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 500
554cfb7e 501 /* no entries matched; do round-robin */
f11e6659
DM
502 if (!next || next->rt6i_metric != rt0->rt6i_metric)
503 next = fn->leaf;
504
505 if (next != rt0)
506 fn->rr_ptr = next;
1da177e4 507 }
1da177e4 508
f11e6659 509 RT6_TRACE("%s() => %p\n",
0dc47877 510 __func__, match);
1da177e4 511
c346dca1 512 net = dev_net(rt0->rt6i_dev);
a02cec21 513 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
514}
515
70ceb4f5
YH
516#ifdef CONFIG_IPV6_ROUTE_INFO
517int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 518 const struct in6_addr *gwaddr)
70ceb4f5 519{
c346dca1 520 struct net *net = dev_net(dev);
70ceb4f5
YH
521 struct route_info *rinfo = (struct route_info *) opt;
522 struct in6_addr prefix_buf, *prefix;
523 unsigned int pref;
4bed72e4 524 unsigned long lifetime;
70ceb4f5
YH
525 struct rt6_info *rt;
526
527 if (len < sizeof(struct route_info)) {
528 return -EINVAL;
529 }
530
531 /* Sanity check for prefix_len and length */
532 if (rinfo->length > 3) {
533 return -EINVAL;
534 } else if (rinfo->prefix_len > 128) {
535 return -EINVAL;
536 } else if (rinfo->prefix_len > 64) {
537 if (rinfo->length < 2) {
538 return -EINVAL;
539 }
540 } else if (rinfo->prefix_len > 0) {
541 if (rinfo->length < 1) {
542 return -EINVAL;
543 }
544 }
545
546 pref = rinfo->route_pref;
547 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 548 return -EINVAL;
70ceb4f5 549
4bed72e4 550 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
551
552 if (rinfo->length == 3)
553 prefix = (struct in6_addr *)rinfo->prefix;
554 else {
555 /* this function is safe */
556 ipv6_addr_prefix(&prefix_buf,
557 (struct in6_addr *)rinfo->prefix,
558 rinfo->prefix_len);
559 prefix = &prefix_buf;
560 }
561
efa2cea0
DL
562 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
563 dev->ifindex);
70ceb4f5
YH
564
565 if (rt && !lifetime) {
e0a1ad73 566 ip6_del_rt(rt);
70ceb4f5
YH
567 rt = NULL;
568 }
569
570 if (!rt && lifetime)
efa2cea0 571 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
572 pref);
573 else if (rt)
574 rt->rt6i_flags = RTF_ROUTEINFO |
575 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
576
577 if (rt) {
4bed72e4 578 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
579 rt->rt6i_flags &= ~RTF_EXPIRES;
580 } else {
581 rt->rt6i_expires = jiffies + HZ * lifetime;
582 rt->rt6i_flags |= RTF_EXPIRES;
583 }
d8d1f30b 584 dst_release(&rt->dst);
70ceb4f5
YH
585 }
586 return 0;
587}
588#endif
589
8ed67789 590#define BACKTRACK(__net, saddr) \
982f56f3 591do { \
8ed67789 592 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 593 struct fib6_node *pn; \
e0eda7bb 594 while (1) { \
982f56f3
YH
595 if (fn->fn_flags & RTN_TL_ROOT) \
596 goto out; \
597 pn = fn->parent; \
598 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 599 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
600 else \
601 fn = pn; \
602 if (fn->fn_flags & RTN_RTINFO) \
603 goto restart; \
c71099ac 604 } \
c71099ac 605 } \
982f56f3 606} while(0)
c71099ac 607
8ed67789
DL
608static struct rt6_info *ip6_pol_route_lookup(struct net *net,
609 struct fib6_table *table,
4c9483b2 610 struct flowi6 *fl6, int flags)
1da177e4
LT
611{
612 struct fib6_node *fn;
613 struct rt6_info *rt;
614
c71099ac 615 read_lock_bh(&table->tb6_lock);
4c9483b2 616 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
617restart:
618 rt = fn->leaf;
4c9483b2
DM
619 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
620 BACKTRACK(net, &fl6->saddr);
c71099ac 621out:
d8d1f30b 622 dst_use(&rt->dst, jiffies);
c71099ac 623 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
624 return rt;
625
626}
627
9acd9f3a
YH
628struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
629 const struct in6_addr *saddr, int oif, int strict)
c71099ac 630{
4c9483b2
DM
631 struct flowi6 fl6 = {
632 .flowi6_oif = oif,
633 .daddr = *daddr,
c71099ac
TG
634 };
635 struct dst_entry *dst;
77d16f45 636 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 637
adaa70bb 638 if (saddr) {
4c9483b2 639 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
640 flags |= RT6_LOOKUP_F_HAS_SADDR;
641 }
642
4c9483b2 643 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
644 if (dst->error == 0)
645 return (struct rt6_info *) dst;
646
647 dst_release(dst);
648
1da177e4
LT
649 return NULL;
650}
651
7159039a
YH
652EXPORT_SYMBOL(rt6_lookup);
653
c71099ac 654/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
655 It takes new route entry, the addition fails by any reason the
656 route is freed. In any case, if caller does not hold it, it may
657 be destroyed.
658 */
659
86872cb5 660static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
661{
662 int err;
c71099ac 663 struct fib6_table *table;
1da177e4 664
c71099ac
TG
665 table = rt->rt6i_table;
666 write_lock_bh(&table->tb6_lock);
86872cb5 667 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 668 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
669
670 return err;
671}
672
40e22e8f
TG
673int ip6_ins_rt(struct rt6_info *rt)
674{
4d1169c1 675 struct nl_info info = {
c346dca1 676 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 677 };
528c4ceb 678 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
679}
680
b71d1d42
ED
681static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
682 const struct in6_addr *saddr)
1da177e4 683{
1da177e4
LT
684 struct rt6_info *rt;
685
686 /*
687 * Clone the route.
688 */
689
690 rt = ip6_rt_copy(ort);
691
692 if (rt) {
14deae41
DM
693 struct neighbour *neigh;
694 int attempts = !in_softirq();
695
58c4fb86
YH
696 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
697 if (rt->rt6i_dst.plen != 128 &&
698 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
699 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 700 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 701 }
1da177e4 702
58c4fb86 703 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
704 rt->rt6i_dst.plen = 128;
705 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 706 rt->dst.flags |= DST_HOST;
1da177e4
LT
707
708#ifdef CONFIG_IPV6_SUBTREES
709 if (rt->rt6i_src.plen && saddr) {
710 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
711 rt->rt6i_src.plen = 128;
712 }
713#endif
714
14deae41
DM
715 retry:
716 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
717 if (IS_ERR(neigh)) {
718 struct net *net = dev_net(rt->rt6i_dev);
719 int saved_rt_min_interval =
720 net->ipv6.sysctl.ip6_rt_gc_min_interval;
721 int saved_rt_elasticity =
722 net->ipv6.sysctl.ip6_rt_gc_elasticity;
723
724 if (attempts-- > 0) {
725 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
726 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
727
86393e52 728 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
729
730 net->ipv6.sysctl.ip6_rt_gc_elasticity =
731 saved_rt_elasticity;
732 net->ipv6.sysctl.ip6_rt_gc_min_interval =
733 saved_rt_min_interval;
734 goto retry;
735 }
736
737 if (net_ratelimit())
738 printk(KERN_WARNING
7e1b33e5 739 "ipv6: Neighbour table overflow.\n");
d8d1f30b 740 dst_free(&rt->dst);
14deae41
DM
741 return NULL;
742 }
743 rt->rt6i_nexthop = neigh;
1da177e4 744
95a9a5ba 745 }
1da177e4 746
95a9a5ba
YH
747 return rt;
748}
1da177e4 749
b71d1d42 750static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
299d9939
YH
751{
752 struct rt6_info *rt = ip6_rt_copy(ort);
753 if (rt) {
754 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
755 rt->rt6i_dst.plen = 128;
756 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 757 rt->dst.flags |= DST_HOST;
299d9939
YH
758 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
759 }
760 return rt;
761}
762
8ed67789 763static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 764 struct flowi6 *fl6, int flags)
1da177e4
LT
765{
766 struct fib6_node *fn;
519fbd87 767 struct rt6_info *rt, *nrt;
c71099ac 768 int strict = 0;
1da177e4 769 int attempts = 3;
519fbd87 770 int err;
53b7997f 771 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 772
77d16f45 773 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
774
775relookup:
c71099ac 776 read_lock_bh(&table->tb6_lock);
1da177e4 777
8238dd06 778restart_2:
4c9483b2 779 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
780
781restart:
4acad72d 782 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 783
4c9483b2 784 BACKTRACK(net, &fl6->saddr);
8ed67789 785 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 786 rt->rt6i_flags & RTF_CACHE)
1ddef044 787 goto out;
1da177e4 788
d8d1f30b 789 dst_hold(&rt->dst);
c71099ac 790 read_unlock_bh(&table->tb6_lock);
fb9de91e 791
519fbd87 792 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 793 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 794 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 795 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
796 else
797 goto out2;
e40cf353 798
d8d1f30b 799 dst_release(&rt->dst);
8ed67789 800 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 801
d8d1f30b 802 dst_hold(&rt->dst);
519fbd87 803 if (nrt) {
40e22e8f 804 err = ip6_ins_rt(nrt);
519fbd87 805 if (!err)
1da177e4 806 goto out2;
1da177e4 807 }
1da177e4 808
519fbd87
YH
809 if (--attempts <= 0)
810 goto out2;
811
812 /*
c71099ac 813 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
814 * released someone could insert this route. Relookup.
815 */
d8d1f30b 816 dst_release(&rt->dst);
519fbd87
YH
817 goto relookup;
818
819out:
8238dd06
YH
820 if (reachable) {
821 reachable = 0;
822 goto restart_2;
823 }
d8d1f30b 824 dst_hold(&rt->dst);
c71099ac 825 read_unlock_bh(&table->tb6_lock);
1da177e4 826out2:
d8d1f30b
CG
827 rt->dst.lastuse = jiffies;
828 rt->dst.__use++;
c71099ac
TG
829
830 return rt;
1da177e4
LT
831}
832
8ed67789 833static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 834 struct flowi6 *fl6, int flags)
4acad72d 835{
4c9483b2 836 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
837}
838
c71099ac
TG
839void ip6_route_input(struct sk_buff *skb)
840{
b71d1d42 841 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 842 struct net *net = dev_net(skb->dev);
adaa70bb 843 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
844 struct flowi6 fl6 = {
845 .flowi6_iif = skb->dev->ifindex,
846 .daddr = iph->daddr,
847 .saddr = iph->saddr,
848 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
849 .flowi6_mark = skb->mark,
850 .flowi6_proto = iph->nexthdr,
c71099ac 851 };
adaa70bb 852
1d6e55f1 853 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 854 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 855
4c9483b2 856 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
857}
858
8ed67789 859static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 860 struct flowi6 *fl6, int flags)
1da177e4 861{
4c9483b2 862 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
863}
864
9c7a4f9c 865struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 866 struct flowi6 *fl6)
c71099ac
TG
867{
868 int flags = 0;
869
4c9483b2 870 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 871 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 872
4c9483b2 873 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 874 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
875 else if (sk)
876 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 877
4c9483b2 878 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
879}
880
7159039a 881EXPORT_SYMBOL(ip6_route_output);
1da177e4 882
2774c131 883struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 884{
5c1e6aa3 885 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
886 struct dst_entry *new = NULL;
887
5c1e6aa3 888 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 889 if (rt) {
d8d1f30b 890 new = &rt->dst;
14e50e57 891
14e50e57 892 new->__use = 1;
352e512c
HX
893 new->input = dst_discard;
894 new->output = dst_discard;
14e50e57 895
defb3519 896 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
897 rt->rt6i_idev = ort->rt6i_idev;
898 if (rt->rt6i_idev)
899 in6_dev_hold(rt->rt6i_idev);
900 rt->rt6i_expires = 0;
901
902 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
903 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
904 rt->rt6i_metric = 0;
905
906 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
907#ifdef CONFIG_IPV6_SUBTREES
908 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
909#endif
910
911 dst_free(new);
912 }
913
69ead7af
DM
914 dst_release(dst_orig);
915 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 916}
14e50e57 917
1da177e4
LT
918/*
919 * Destination cache support functions
920 */
921
922static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
923{
924 struct rt6_info *rt;
925
926 rt = (struct rt6_info *) dst;
927
6431cbc2
DM
928 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
929 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
930 if (!rt->rt6i_peer)
931 rt6_bind_peer(rt, 0);
932 rt->rt6i_peer_genid = rt6_peer_genid();
933 }
1da177e4 934 return dst;
6431cbc2 935 }
1da177e4
LT
936 return NULL;
937}
938
939static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
940{
941 struct rt6_info *rt = (struct rt6_info *) dst;
942
943 if (rt) {
54c1a859
YH
944 if (rt->rt6i_flags & RTF_CACHE) {
945 if (rt6_check_expired(rt)) {
946 ip6_del_rt(rt);
947 dst = NULL;
948 }
949 } else {
1da177e4 950 dst_release(dst);
54c1a859
YH
951 dst = NULL;
952 }
1da177e4 953 }
54c1a859 954 return dst;
1da177e4
LT
955}
956
957static void ip6_link_failure(struct sk_buff *skb)
958{
959 struct rt6_info *rt;
960
3ffe533c 961 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 962
adf30907 963 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
964 if (rt) {
965 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 966 dst_set_expires(&rt->dst, 0);
1da177e4
LT
967 rt->rt6i_flags |= RTF_EXPIRES;
968 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
969 rt->rt6i_node->fn_sernum = -1;
970 }
971}
972
973static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
974{
975 struct rt6_info *rt6 = (struct rt6_info*)dst;
976
977 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
978 rt6->rt6i_flags |= RTF_MODIFIED;
979 if (mtu < IPV6_MIN_MTU) {
defb3519 980 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 981 mtu = IPV6_MIN_MTU;
defb3519
DM
982 features |= RTAX_FEATURE_ALLFRAG;
983 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 984 }
defb3519 985 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
986 }
987}
988
0dbaee3b 989static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 990{
0dbaee3b
DM
991 struct net_device *dev = dst->dev;
992 unsigned int mtu = dst_mtu(dst);
993 struct net *net = dev_net(dev);
994
1da177e4
LT
995 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
996
5578689a
DL
997 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
998 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
999
1000 /*
1ab1457c
YH
1001 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1002 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1003 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1004 * rely only on pmtu discovery"
1005 */
1006 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1007 mtu = IPV6_MAXPLEN;
1008 return mtu;
1009}
1010
d33e4553
DM
1011static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1012{
1013 unsigned int mtu = IPV6_MIN_MTU;
1014 struct inet6_dev *idev;
1015
1016 rcu_read_lock();
1017 idev = __in6_dev_get(dst->dev);
1018 if (idev)
1019 mtu = idev->cnf.mtu6;
1020 rcu_read_unlock();
1021
1022 return mtu;
1023}
1024
3b00944c
YH
1025static struct dst_entry *icmp6_dst_gc_list;
1026static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1027
3b00944c 1028struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1029 struct neighbour *neigh,
9acd9f3a 1030 const struct in6_addr *addr)
1da177e4
LT
1031{
1032 struct rt6_info *rt;
1033 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1034 struct net *net = dev_net(dev);
1da177e4
LT
1035
1036 if (unlikely(idev == NULL))
1037 return NULL;
1038
5c1e6aa3 1039 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
1da177e4
LT
1040 if (unlikely(rt == NULL)) {
1041 in6_dev_put(idev);
1042 goto out;
1043 }
1044
1da177e4
LT
1045 if (neigh)
1046 neigh_hold(neigh);
14deae41 1047 else {
1da177e4 1048 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1049 if (IS_ERR(neigh))
1050 neigh = NULL;
1051 }
1da177e4 1052
1da177e4
LT
1053 rt->rt6i_idev = idev;
1054 rt->rt6i_nexthop = neigh;
d8d1f30b 1055 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1056 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1057 rt->dst.output = ip6_output;
1da177e4
LT
1058
1059#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1060 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1061 ? DST_HOST
1da177e4
LT
1062 : 0;
1063 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1064 rt->rt6i_dst.plen = 128;
1065#endif
1066
3b00944c 1067 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1068 rt->dst.next = icmp6_dst_gc_list;
1069 icmp6_dst_gc_list = &rt->dst;
3b00944c 1070 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1071
5578689a 1072 fib6_force_start_gc(net);
1da177e4
LT
1073
1074out:
d8d1f30b 1075 return &rt->dst;
1da177e4
LT
1076}
1077
3d0f24a7 1078int icmp6_dst_gc(void)
1da177e4 1079{
e9476e95 1080 struct dst_entry *dst, **pprev;
3d0f24a7 1081 int more = 0;
1da177e4 1082
3b00944c
YH
1083 spin_lock_bh(&icmp6_dst_lock);
1084 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1085
1da177e4
LT
1086 while ((dst = *pprev) != NULL) {
1087 if (!atomic_read(&dst->__refcnt)) {
1088 *pprev = dst->next;
1089 dst_free(dst);
1da177e4
LT
1090 } else {
1091 pprev = &dst->next;
3d0f24a7 1092 ++more;
1da177e4
LT
1093 }
1094 }
1095
3b00944c 1096 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1097
3d0f24a7 1098 return more;
1da177e4
LT
1099}
1100
1e493d19
DM
1101static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1102 void *arg)
1103{
1104 struct dst_entry *dst, **pprev;
1105
1106 spin_lock_bh(&icmp6_dst_lock);
1107 pprev = &icmp6_dst_gc_list;
1108 while ((dst = *pprev) != NULL) {
1109 struct rt6_info *rt = (struct rt6_info *) dst;
1110 if (func(rt, arg)) {
1111 *pprev = dst->next;
1112 dst_free(dst);
1113 } else {
1114 pprev = &dst->next;
1115 }
1116 }
1117 spin_unlock_bh(&icmp6_dst_lock);
1118}
1119
569d3645 1120static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1121{
1da177e4 1122 unsigned long now = jiffies;
86393e52 1123 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1124 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1125 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1126 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1127 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1128 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1129 int entries;
7019b78e 1130
fc66f95c 1131 entries = dst_entries_get_fast(ops);
7019b78e 1132 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1133 entries <= rt_max_size)
1da177e4
LT
1134 goto out;
1135
6891a346
BT
1136 net->ipv6.ip6_rt_gc_expire++;
1137 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1138 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1139 entries = dst_entries_get_slow(ops);
1140 if (entries < ops->gc_thresh)
7019b78e 1141 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1142out:
7019b78e 1143 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1144 return entries > rt_max_size;
1da177e4
LT
1145}
1146
1147/* Clean host part of a prefix. Not necessary in radix tree,
1148 but results in cleaner routing tables.
1149
1150 Remove it only when all the things will work!
1151 */
1152
6b75d090 1153int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1154{
5170ae82 1155 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1156 if (hoplimit == 0) {
6b75d090 1157 struct net_device *dev = dst->dev;
c68f24cc
ED
1158 struct inet6_dev *idev;
1159
1160 rcu_read_lock();
1161 idev = __in6_dev_get(dev);
1162 if (idev)
6b75d090 1163 hoplimit = idev->cnf.hop_limit;
c68f24cc 1164 else
53b7997f 1165 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1166 rcu_read_unlock();
1da177e4
LT
1167 }
1168 return hoplimit;
1169}
abbf46ae 1170EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1171
1172/*
1173 *
1174 */
1175
86872cb5 1176int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1177{
1178 int err;
5578689a 1179 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1180 struct rt6_info *rt = NULL;
1181 struct net_device *dev = NULL;
1182 struct inet6_dev *idev = NULL;
c71099ac 1183 struct fib6_table *table;
1da177e4
LT
1184 int addr_type;
1185
86872cb5 1186 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1187 return -EINVAL;
1188#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1189 if (cfg->fc_src_len)
1da177e4
LT
1190 return -EINVAL;
1191#endif
86872cb5 1192 if (cfg->fc_ifindex) {
1da177e4 1193 err = -ENODEV;
5578689a 1194 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1195 if (!dev)
1196 goto out;
1197 idev = in6_dev_get(dev);
1198 if (!idev)
1199 goto out;
1200 }
1201
86872cb5
TG
1202 if (cfg->fc_metric == 0)
1203 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1204
5578689a 1205 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1206 if (table == NULL) {
1207 err = -ENOBUFS;
1208 goto out;
1209 }
1210
5c1e6aa3 1211 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
1da177e4
LT
1212
1213 if (rt == NULL) {
1214 err = -ENOMEM;
1215 goto out;
1216 }
1217
d8d1f30b 1218 rt->dst.obsolete = -1;
6f704992
YH
1219 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1220 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1221 0;
1da177e4 1222
86872cb5
TG
1223 if (cfg->fc_protocol == RTPROT_UNSPEC)
1224 cfg->fc_protocol = RTPROT_BOOT;
1225 rt->rt6i_protocol = cfg->fc_protocol;
1226
1227 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1228
1229 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1230 rt->dst.input = ip6_mc_input;
ab79ad14
1231 else if (cfg->fc_flags & RTF_LOCAL)
1232 rt->dst.input = ip6_input;
1da177e4 1233 else
d8d1f30b 1234 rt->dst.input = ip6_forward;
1da177e4 1235
d8d1f30b 1236 rt->dst.output = ip6_output;
1da177e4 1237
86872cb5
TG
1238 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1239 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1240 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1241 rt->dst.flags = DST_HOST;
1da177e4
LT
1242
1243#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1244 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1245 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1246#endif
1247
86872cb5 1248 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1249
1250 /* We cannot add true routes via loopback here,
1251 they would result in kernel looping; promote them to reject routes
1252 */
86872cb5 1253 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1254 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1255 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1256 /* hold loopback dev/idev if we haven't done so. */
5578689a 1257 if (dev != net->loopback_dev) {
1da177e4
LT
1258 if (dev) {
1259 dev_put(dev);
1260 in6_dev_put(idev);
1261 }
5578689a 1262 dev = net->loopback_dev;
1da177e4
LT
1263 dev_hold(dev);
1264 idev = in6_dev_get(dev);
1265 if (!idev) {
1266 err = -ENODEV;
1267 goto out;
1268 }
1269 }
d8d1f30b
CG
1270 rt->dst.output = ip6_pkt_discard_out;
1271 rt->dst.input = ip6_pkt_discard;
1272 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1273 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1274 goto install_route;
1275 }
1276
86872cb5 1277 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1278 const struct in6_addr *gw_addr;
1da177e4
LT
1279 int gwa_type;
1280
86872cb5
TG
1281 gw_addr = &cfg->fc_gateway;
1282 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1283 gwa_type = ipv6_addr_type(gw_addr);
1284
1285 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1286 struct rt6_info *grt;
1287
1288 /* IPv6 strictly inhibits using not link-local
1289 addresses as nexthop address.
1290 Otherwise, router will not able to send redirects.
1291 It is very good, but in some (rare!) circumstances
1292 (SIT, PtP, NBMA NOARP links) it is handy to allow
1293 some exceptions. --ANK
1294 */
1295 err = -EINVAL;
1296 if (!(gwa_type&IPV6_ADDR_UNICAST))
1297 goto out;
1298
5578689a 1299 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1300
1301 err = -EHOSTUNREACH;
1302 if (grt == NULL)
1303 goto out;
1304 if (dev) {
1305 if (dev != grt->rt6i_dev) {
d8d1f30b 1306 dst_release(&grt->dst);
1da177e4
LT
1307 goto out;
1308 }
1309 } else {
1310 dev = grt->rt6i_dev;
1311 idev = grt->rt6i_idev;
1312 dev_hold(dev);
1313 in6_dev_hold(grt->rt6i_idev);
1314 }
1315 if (!(grt->rt6i_flags&RTF_GATEWAY))
1316 err = 0;
d8d1f30b 1317 dst_release(&grt->dst);
1da177e4
LT
1318
1319 if (err)
1320 goto out;
1321 }
1322 err = -EINVAL;
1323 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1324 goto out;
1325 }
1326
1327 err = -ENODEV;
1328 if (dev == NULL)
1329 goto out;
1330
c3968a85
DW
1331 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1332 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1333 err = -EINVAL;
1334 goto out;
1335 }
1336 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1337 rt->rt6i_prefsrc.plen = 128;
1338 } else
1339 rt->rt6i_prefsrc.plen = 0;
1340
86872cb5 1341 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1342 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1343 if (IS_ERR(rt->rt6i_nexthop)) {
1344 err = PTR_ERR(rt->rt6i_nexthop);
1345 rt->rt6i_nexthop = NULL;
1346 goto out;
1347 }
1348 }
1349
86872cb5 1350 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1351
1352install_route:
86872cb5
TG
1353 if (cfg->fc_mx) {
1354 struct nlattr *nla;
1355 int remaining;
1356
1357 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1358 int type = nla_type(nla);
86872cb5
TG
1359
1360 if (type) {
1361 if (type > RTAX_MAX) {
1da177e4
LT
1362 err = -EINVAL;
1363 goto out;
1364 }
86872cb5 1365
defb3519 1366 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1367 }
1da177e4
LT
1368 }
1369 }
1370
d8d1f30b 1371 rt->dst.dev = dev;
1da177e4 1372 rt->rt6i_idev = idev;
c71099ac 1373 rt->rt6i_table = table;
63152fc0 1374
c346dca1 1375 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1376
86872cb5 1377 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1378
1379out:
1380 if (dev)
1381 dev_put(dev);
1382 if (idev)
1383 in6_dev_put(idev);
1384 if (rt)
d8d1f30b 1385 dst_free(&rt->dst);
1da177e4
LT
1386 return err;
1387}
1388
86872cb5 1389static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1390{
1391 int err;
c71099ac 1392 struct fib6_table *table;
c346dca1 1393 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1394
8ed67789 1395 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1396 return -ENOENT;
1397
c71099ac
TG
1398 table = rt->rt6i_table;
1399 write_lock_bh(&table->tb6_lock);
1da177e4 1400
86872cb5 1401 err = fib6_del(rt, info);
d8d1f30b 1402 dst_release(&rt->dst);
1da177e4 1403
c71099ac 1404 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1405
1406 return err;
1407}
1408
e0a1ad73
TG
1409int ip6_del_rt(struct rt6_info *rt)
1410{
4d1169c1 1411 struct nl_info info = {
c346dca1 1412 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1413 };
528c4ceb 1414 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1415}
1416
86872cb5 1417static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1418{
c71099ac 1419 struct fib6_table *table;
1da177e4
LT
1420 struct fib6_node *fn;
1421 struct rt6_info *rt;
1422 int err = -ESRCH;
1423
5578689a 1424 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1425 if (table == NULL)
1426 return err;
1427
1428 read_lock_bh(&table->tb6_lock);
1da177e4 1429
c71099ac 1430 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1431 &cfg->fc_dst, cfg->fc_dst_len,
1432 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1433
1da177e4 1434 if (fn) {
d8d1f30b 1435 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1436 if (cfg->fc_ifindex &&
1da177e4 1437 (rt->rt6i_dev == NULL ||
86872cb5 1438 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1439 continue;
86872cb5
TG
1440 if (cfg->fc_flags & RTF_GATEWAY &&
1441 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1442 continue;
86872cb5 1443 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1444 continue;
d8d1f30b 1445 dst_hold(&rt->dst);
c71099ac 1446 read_unlock_bh(&table->tb6_lock);
1da177e4 1447
86872cb5 1448 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1449 }
1450 }
c71099ac 1451 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1452
1453 return err;
1454}
1455
1456/*
1457 * Handle redirects
1458 */
a6279458 1459struct ip6rd_flowi {
4c9483b2 1460 struct flowi6 fl6;
a6279458
YH
1461 struct in6_addr gateway;
1462};
1463
8ed67789
DL
1464static struct rt6_info *__ip6_route_redirect(struct net *net,
1465 struct fib6_table *table,
4c9483b2 1466 struct flowi6 *fl6,
a6279458 1467 int flags)
1da177e4 1468{
4c9483b2 1469 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1470 struct rt6_info *rt;
e843b9e1 1471 struct fib6_node *fn;
c71099ac 1472
1da177e4 1473 /*
e843b9e1
YH
1474 * Get the "current" route for this destination and
1475 * check if the redirect has come from approriate router.
1476 *
1477 * RFC 2461 specifies that redirects should only be
1478 * accepted if they come from the nexthop to the target.
1479 * Due to the way the routes are chosen, this notion
1480 * is a bit fuzzy and one might need to check all possible
1481 * routes.
1da177e4 1482 */
1da177e4 1483
c71099ac 1484 read_lock_bh(&table->tb6_lock);
4c9483b2 1485 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1486restart:
d8d1f30b 1487 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1488 /*
1489 * Current route is on-link; redirect is always invalid.
1490 *
1491 * Seems, previous statement is not true. It could
1492 * be node, which looks for us as on-link (f.e. proxy ndisc)
1493 * But then router serving it might decide, that we should
1494 * know truth 8)8) --ANK (980726).
1495 */
1496 if (rt6_check_expired(rt))
1497 continue;
1498 if (!(rt->rt6i_flags & RTF_GATEWAY))
1499 continue;
4c9483b2 1500 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1501 continue;
a6279458 1502 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1503 continue;
1504 break;
1505 }
a6279458 1506
cb15d9c2 1507 if (!rt)
8ed67789 1508 rt = net->ipv6.ip6_null_entry;
4c9483b2 1509 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1510out:
d8d1f30b 1511 dst_hold(&rt->dst);
a6279458 1512
c71099ac 1513 read_unlock_bh(&table->tb6_lock);
e843b9e1 1514
a6279458
YH
1515 return rt;
1516};
1517
b71d1d42
ED
1518static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1519 const struct in6_addr *src,
1520 const struct in6_addr *gateway,
a6279458
YH
1521 struct net_device *dev)
1522{
adaa70bb 1523 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1524 struct net *net = dev_net(dev);
a6279458 1525 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1526 .fl6 = {
1527 .flowi6_oif = dev->ifindex,
1528 .daddr = *dest,
1529 .saddr = *src,
a6279458 1530 },
a6279458 1531 };
adaa70bb 1532
86c36ce4
BH
1533 ipv6_addr_copy(&rdfl.gateway, gateway);
1534
adaa70bb
TG
1535 if (rt6_need_strict(dest))
1536 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1537
4c9483b2 1538 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1539 flags, __ip6_route_redirect);
a6279458
YH
1540}
1541
b71d1d42
ED
1542void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1543 const struct in6_addr *saddr,
a6279458
YH
1544 struct neighbour *neigh, u8 *lladdr, int on_link)
1545{
1546 struct rt6_info *rt, *nrt = NULL;
1547 struct netevent_redirect netevent;
c346dca1 1548 struct net *net = dev_net(neigh->dev);
a6279458
YH
1549
1550 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1551
8ed67789 1552 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1553 if (net_ratelimit())
1554 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1555 "for redirect target\n");
a6279458 1556 goto out;
1da177e4
LT
1557 }
1558
1da177e4
LT
1559 /*
1560 * We have finally decided to accept it.
1561 */
1562
1ab1457c 1563 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1564 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1565 NEIGH_UPDATE_F_OVERRIDE|
1566 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1567 NEIGH_UPDATE_F_ISROUTER))
1568 );
1569
1570 /*
1571 * Redirect received -> path was valid.
1572 * Look, redirects are sent only in response to data packets,
1573 * so that this nexthop apparently is reachable. --ANK
1574 */
d8d1f30b 1575 dst_confirm(&rt->dst);
1da177e4
LT
1576
1577 /* Duplicate redirect: silently ignore. */
d8d1f30b 1578 if (neigh == rt->dst.neighbour)
1da177e4
LT
1579 goto out;
1580
1581 nrt = ip6_rt_copy(rt);
1582 if (nrt == NULL)
1583 goto out;
1584
1585 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1586 if (on_link)
1587 nrt->rt6i_flags &= ~RTF_GATEWAY;
1588
1589 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1590 nrt->rt6i_dst.plen = 128;
d8d1f30b 1591 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1592
1593 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1594 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1595
40e22e8f 1596 if (ip6_ins_rt(nrt))
1da177e4
LT
1597 goto out;
1598
d8d1f30b
CG
1599 netevent.old = &rt->dst;
1600 netevent.new = &nrt->dst;
8d71740c
TT
1601 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1602
1da177e4 1603 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1604 ip6_del_rt(rt);
1da177e4
LT
1605 return;
1606 }
1607
1608out:
d8d1f30b 1609 dst_release(&rt->dst);
1da177e4
LT
1610}
1611
1612/*
1613 * Handle ICMP "packet too big" messages
1614 * i.e. Path MTU discovery
1615 */
1616
b71d1d42 1617static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1618 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1619{
1620 struct rt6_info *rt, *nrt;
1621 int allfrag = 0;
d3052b55 1622again:
ae878ae2 1623 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1624 if (rt == NULL)
1625 return;
1626
d3052b55
AV
1627 if (rt6_check_expired(rt)) {
1628 ip6_del_rt(rt);
1629 goto again;
1630 }
1631
d8d1f30b 1632 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1633 goto out;
1634
1635 if (pmtu < IPV6_MIN_MTU) {
1636 /*
1ab1457c 1637 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1638 * MTU (1280) and a fragment header should always be included
1639 * after a node receiving Too Big message reporting PMTU is
1640 * less than the IPv6 Minimum Link MTU.
1641 */
1642 pmtu = IPV6_MIN_MTU;
1643 allfrag = 1;
1644 }
1645
1646 /* New mtu received -> path was valid.
1647 They are sent only in response to data packets,
1648 so that this nexthop apparently is reachable. --ANK
1649 */
d8d1f30b 1650 dst_confirm(&rt->dst);
1da177e4
LT
1651
1652 /* Host route. If it is static, it would be better
1653 not to override it, but add new one, so that
1654 when cache entry will expire old pmtu
1655 would return automatically.
1656 */
1657 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1658 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1659 if (allfrag) {
1660 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1661 features |= RTAX_FEATURE_ALLFRAG;
1662 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1663 }
d8d1f30b 1664 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1665 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1666 goto out;
1667 }
1668
1669 /* Network route.
1670 Two cases are possible:
1671 1. It is connected route. Action: COW
1672 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1673 */
d5315b50 1674 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1675 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1676 else
1677 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1678
d5315b50 1679 if (nrt) {
defb3519
DM
1680 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1681 if (allfrag) {
1682 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1683 features |= RTAX_FEATURE_ALLFRAG;
1684 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1685 }
a1e78363
YH
1686
1687 /* According to RFC 1981, detecting PMTU increase shouldn't be
1688 * happened within 5 mins, the recommended timer is 10 mins.
1689 * Here this route expiration time is set to ip6_rt_mtu_expires
1690 * which is 10 mins. After 10 mins the decreased pmtu is expired
1691 * and detecting PMTU increase will be automatically happened.
1692 */
d8d1f30b 1693 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1694 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1695
40e22e8f 1696 ip6_ins_rt(nrt);
1da177e4 1697 }
1da177e4 1698out:
d8d1f30b 1699 dst_release(&rt->dst);
1da177e4
LT
1700}
1701
b71d1d42 1702void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1703 struct net_device *dev, u32 pmtu)
1704{
1705 struct net *net = dev_net(dev);
1706
1707 /*
1708 * RFC 1981 states that a node "MUST reduce the size of the packets it
1709 * is sending along the path" that caused the Packet Too Big message.
1710 * Since it's not possible in the general case to determine which
1711 * interface was used to send the original packet, we update the MTU
1712 * on the interface that will be used to send future packets. We also
1713 * update the MTU on the interface that received the Packet Too Big in
1714 * case the original packet was forced out that interface with
1715 * SO_BINDTODEVICE or similar. This is the next best thing to the
1716 * correct behaviour, which would be to update the MTU on all
1717 * interfaces.
1718 */
1719 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1720 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1721}
1722
1da177e4
LT
1723/*
1724 * Misc support functions
1725 */
1726
1727static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1728{
c346dca1 1729 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3
DM
1730 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1731 ort->dst.dev);
1da177e4
LT
1732
1733 if (rt) {
d8d1f30b
CG
1734 rt->dst.input = ort->dst.input;
1735 rt->dst.output = ort->dst.output;
1736
defb3519 1737 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1738 rt->dst.error = ort->dst.error;
1da177e4
LT
1739 rt->rt6i_idev = ort->rt6i_idev;
1740 if (rt->rt6i_idev)
1741 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1742 rt->dst.lastuse = jiffies;
1da177e4
LT
1743 rt->rt6i_expires = 0;
1744
1745 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1746 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1747 rt->rt6i_metric = 0;
1748
1749 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1750#ifdef CONFIG_IPV6_SUBTREES
1751 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1752#endif
c71099ac 1753 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1754 }
1755 return rt;
1756}
1757
70ceb4f5 1758#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1759static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1760 const struct in6_addr *prefix, int prefixlen,
1761 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1762{
1763 struct fib6_node *fn;
1764 struct rt6_info *rt = NULL;
c71099ac
TG
1765 struct fib6_table *table;
1766
efa2cea0 1767 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1768 if (table == NULL)
1769 return NULL;
70ceb4f5 1770
c71099ac
TG
1771 write_lock_bh(&table->tb6_lock);
1772 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1773 if (!fn)
1774 goto out;
1775
d8d1f30b 1776 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1777 if (rt->rt6i_dev->ifindex != ifindex)
1778 continue;
1779 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1780 continue;
1781 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1782 continue;
d8d1f30b 1783 dst_hold(&rt->dst);
70ceb4f5
YH
1784 break;
1785 }
1786out:
c71099ac 1787 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1788 return rt;
1789}
1790
efa2cea0 1791static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1792 const struct in6_addr *prefix, int prefixlen,
1793 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1794 unsigned pref)
1795{
86872cb5
TG
1796 struct fib6_config cfg = {
1797 .fc_table = RT6_TABLE_INFO,
238fc7ea 1798 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1799 .fc_ifindex = ifindex,
1800 .fc_dst_len = prefixlen,
1801 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1802 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1803 .fc_nlinfo.pid = 0,
1804 .fc_nlinfo.nlh = NULL,
1805 .fc_nlinfo.nl_net = net,
86872cb5
TG
1806 };
1807
1808 ipv6_addr_copy(&cfg.fc_dst, prefix);
1809 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1810
e317da96
YH
1811 /* We should treat it as a default route if prefix length is 0. */
1812 if (!prefixlen)
86872cb5 1813 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1814
86872cb5 1815 ip6_route_add(&cfg);
70ceb4f5 1816
efa2cea0 1817 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1818}
1819#endif
1820
b71d1d42 1821struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1822{
1da177e4 1823 struct rt6_info *rt;
c71099ac 1824 struct fib6_table *table;
1da177e4 1825
c346dca1 1826 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1827 if (table == NULL)
1828 return NULL;
1da177e4 1829
c71099ac 1830 write_lock_bh(&table->tb6_lock);
d8d1f30b 1831 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1832 if (dev == rt->rt6i_dev &&
045927ff 1833 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1834 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1835 break;
1836 }
1837 if (rt)
d8d1f30b 1838 dst_hold(&rt->dst);
c71099ac 1839 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1840 return rt;
1841}
1842
b71d1d42 1843struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1844 struct net_device *dev,
1845 unsigned int pref)
1da177e4 1846{
86872cb5
TG
1847 struct fib6_config cfg = {
1848 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1849 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1850 .fc_ifindex = dev->ifindex,
1851 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1852 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1853 .fc_nlinfo.pid = 0,
1854 .fc_nlinfo.nlh = NULL,
c346dca1 1855 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1856 };
1da177e4 1857
86872cb5 1858 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1859
86872cb5 1860 ip6_route_add(&cfg);
1da177e4 1861
1da177e4
LT
1862 return rt6_get_dflt_router(gwaddr, dev);
1863}
1864
7b4da532 1865void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1866{
1867 struct rt6_info *rt;
c71099ac
TG
1868 struct fib6_table *table;
1869
1870 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1871 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1872 if (table == NULL)
1873 return;
1da177e4
LT
1874
1875restart:
c71099ac 1876 read_lock_bh(&table->tb6_lock);
d8d1f30b 1877 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1878 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1879 dst_hold(&rt->dst);
c71099ac 1880 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1881 ip6_del_rt(rt);
1da177e4
LT
1882 goto restart;
1883 }
1884 }
c71099ac 1885 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1886}
1887
5578689a
DL
1888static void rtmsg_to_fib6_config(struct net *net,
1889 struct in6_rtmsg *rtmsg,
86872cb5
TG
1890 struct fib6_config *cfg)
1891{
1892 memset(cfg, 0, sizeof(*cfg));
1893
1894 cfg->fc_table = RT6_TABLE_MAIN;
1895 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1896 cfg->fc_metric = rtmsg->rtmsg_metric;
1897 cfg->fc_expires = rtmsg->rtmsg_info;
1898 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1899 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1900 cfg->fc_flags = rtmsg->rtmsg_flags;
1901
5578689a 1902 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1903
86872cb5
TG
1904 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1905 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1906 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1907}
1908
5578689a 1909int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1910{
86872cb5 1911 struct fib6_config cfg;
1da177e4
LT
1912 struct in6_rtmsg rtmsg;
1913 int err;
1914
1915 switch(cmd) {
1916 case SIOCADDRT: /* Add a route */
1917 case SIOCDELRT: /* Delete a route */
1918 if (!capable(CAP_NET_ADMIN))
1919 return -EPERM;
1920 err = copy_from_user(&rtmsg, arg,
1921 sizeof(struct in6_rtmsg));
1922 if (err)
1923 return -EFAULT;
86872cb5 1924
5578689a 1925 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1926
1da177e4
LT
1927 rtnl_lock();
1928 switch (cmd) {
1929 case SIOCADDRT:
86872cb5 1930 err = ip6_route_add(&cfg);
1da177e4
LT
1931 break;
1932 case SIOCDELRT:
86872cb5 1933 err = ip6_route_del(&cfg);
1da177e4
LT
1934 break;
1935 default:
1936 err = -EINVAL;
1937 }
1938 rtnl_unlock();
1939
1940 return err;
3ff50b79 1941 }
1da177e4
LT
1942
1943 return -EINVAL;
1944}
1945
1946/*
1947 * Drop the packet on the floor
1948 */
1949
d5fdd6ba 1950static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1951{
612f09e8 1952 int type;
adf30907 1953 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1954 switch (ipstats_mib_noroutes) {
1955 case IPSTATS_MIB_INNOROUTES:
0660e03f 1956 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1957 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1958 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1959 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1960 break;
1961 }
1962 /* FALLTHROUGH */
1963 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1964 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1965 ipstats_mib_noroutes);
612f09e8
YH
1966 break;
1967 }
3ffe533c 1968 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1969 kfree_skb(skb);
1970 return 0;
1971}
1972
9ce8ade0
TG
1973static int ip6_pkt_discard(struct sk_buff *skb)
1974{
612f09e8 1975 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1976}
1977
20380731 1978static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1979{
adf30907 1980 skb->dev = skb_dst(skb)->dev;
612f09e8 1981 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1982}
1983
6723ab54
DM
1984#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1985
9ce8ade0
TG
1986static int ip6_pkt_prohibit(struct sk_buff *skb)
1987{
612f09e8 1988 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1989}
1990
1991static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1992{
adf30907 1993 skb->dev = skb_dst(skb)->dev;
612f09e8 1994 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1995}
1996
6723ab54
DM
1997#endif
1998
1da177e4
LT
1999/*
2000 * Allocate a dst for local (unicast / anycast) address.
2001 */
2002
2003struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2004 const struct in6_addr *addr,
2005 int anycast)
2006{
c346dca1 2007 struct net *net = dev_net(idev->dev);
5c1e6aa3
DM
2008 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2009 net->loopback_dev);
14deae41 2010 struct neighbour *neigh;
1da177e4 2011
40385653
BG
2012 if (rt == NULL) {
2013 if (net_ratelimit())
2014 pr_warning("IPv6: Maximum number of routes reached,"
2015 " consider increasing route/max_size.\n");
1da177e4 2016 return ERR_PTR(-ENOMEM);
40385653 2017 }
1da177e4 2018
1da177e4
LT
2019 in6_dev_hold(idev);
2020
d8d1f30b
CG
2021 rt->dst.flags = DST_HOST;
2022 rt->dst.input = ip6_input;
2023 rt->dst.output = ip6_output;
1da177e4 2024 rt->rt6i_idev = idev;
d8d1f30b 2025 rt->dst.obsolete = -1;
1da177e4
LT
2026
2027 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2028 if (anycast)
2029 rt->rt6i_flags |= RTF_ANYCAST;
2030 else
1da177e4 2031 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2032 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2033 if (IS_ERR(neigh)) {
d8d1f30b 2034 dst_free(&rt->dst);
14deae41 2035
29546a64 2036 return ERR_CAST(neigh);
1da177e4 2037 }
14deae41 2038 rt->rt6i_nexthop = neigh;
1da177e4
LT
2039
2040 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2041 rt->rt6i_dst.plen = 128;
5578689a 2042 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2043
d8d1f30b 2044 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2045
2046 return rt;
2047}
2048
c3968a85
DW
2049int ip6_route_get_saddr(struct net *net,
2050 struct rt6_info *rt,
b71d1d42 2051 const struct in6_addr *daddr,
c3968a85
DW
2052 unsigned int prefs,
2053 struct in6_addr *saddr)
2054{
2055 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2056 int err = 0;
2057 if (rt->rt6i_prefsrc.plen)
2058 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2059 else
2060 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2061 daddr, prefs, saddr);
2062 return err;
2063}
2064
2065/* remove deleted ip from prefsrc entries */
2066struct arg_dev_net_ip {
2067 struct net_device *dev;
2068 struct net *net;
2069 struct in6_addr *addr;
2070};
2071
2072static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2073{
2074 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2075 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2076 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2077
2078 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2079 rt != net->ipv6.ip6_null_entry &&
2080 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2081 /* remove prefsrc entry */
2082 rt->rt6i_prefsrc.plen = 0;
2083 }
2084 return 0;
2085}
2086
2087void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2088{
2089 struct net *net = dev_net(ifp->idev->dev);
2090 struct arg_dev_net_ip adni = {
2091 .dev = ifp->idev->dev,
2092 .net = net,
2093 .addr = &ifp->addr,
2094 };
2095 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2096}
2097
8ed67789
DL
2098struct arg_dev_net {
2099 struct net_device *dev;
2100 struct net *net;
2101};
2102
1da177e4
LT
2103static int fib6_ifdown(struct rt6_info *rt, void *arg)
2104{
bc3ef660 2105 const struct arg_dev_net *adn = arg;
2106 const struct net_device *dev = adn->dev;
8ed67789 2107
bc3ef660 2108 if ((rt->rt6i_dev == dev || dev == NULL) &&
2109 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2110 RT6_TRACE("deleted by ifdown %p\n", rt);
2111 return -1;
2112 }
2113 return 0;
2114}
2115
f3db4851 2116void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2117{
8ed67789
DL
2118 struct arg_dev_net adn = {
2119 .dev = dev,
2120 .net = net,
2121 };
2122
2123 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2124 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2125}
2126
2127struct rt6_mtu_change_arg
2128{
2129 struct net_device *dev;
2130 unsigned mtu;
2131};
2132
2133static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2134{
2135 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2136 struct inet6_dev *idev;
2137
2138 /* In IPv6 pmtu discovery is not optional,
2139 so that RTAX_MTU lock cannot disable it.
2140 We still use this lock to block changes
2141 caused by addrconf/ndisc.
2142 */
2143
2144 idev = __in6_dev_get(arg->dev);
2145 if (idev == NULL)
2146 return 0;
2147
2148 /* For administrative MTU increase, there is no way to discover
2149 IPv6 PMTU increase, so PMTU increase should be updated here.
2150 Since RFC 1981 doesn't include administrative MTU increase
2151 update PMTU increase is a MUST. (i.e. jumbo frame)
2152 */
2153 /*
2154 If new MTU is less than route PMTU, this new MTU will be the
2155 lowest MTU in the path, update the route PMTU to reflect PMTU
2156 decreases; if new MTU is greater than route PMTU, and the
2157 old MTU is the lowest MTU in the path, update the route PMTU
2158 to reflect the increase. In this case if the other nodes' MTU
2159 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2160 PMTU discouvery.
2161 */
2162 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2163 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2164 (dst_mtu(&rt->dst) >= arg->mtu ||
2165 (dst_mtu(&rt->dst) < arg->mtu &&
2166 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2167 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2168 }
1da177e4
LT
2169 return 0;
2170}
2171
2172void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2173{
c71099ac
TG
2174 struct rt6_mtu_change_arg arg = {
2175 .dev = dev,
2176 .mtu = mtu,
2177 };
1da177e4 2178
c346dca1 2179 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2180}
2181
ef7c79ed 2182static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2183 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2184 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2185 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2186 [RTA_PRIORITY] = { .type = NLA_U32 },
2187 [RTA_METRICS] = { .type = NLA_NESTED },
2188};
2189
2190static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2191 struct fib6_config *cfg)
1da177e4 2192{
86872cb5
TG
2193 struct rtmsg *rtm;
2194 struct nlattr *tb[RTA_MAX+1];
2195 int err;
1da177e4 2196
86872cb5
TG
2197 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2198 if (err < 0)
2199 goto errout;
1da177e4 2200
86872cb5
TG
2201 err = -EINVAL;
2202 rtm = nlmsg_data(nlh);
2203 memset(cfg, 0, sizeof(*cfg));
2204
2205 cfg->fc_table = rtm->rtm_table;
2206 cfg->fc_dst_len = rtm->rtm_dst_len;
2207 cfg->fc_src_len = rtm->rtm_src_len;
2208 cfg->fc_flags = RTF_UP;
2209 cfg->fc_protocol = rtm->rtm_protocol;
2210
2211 if (rtm->rtm_type == RTN_UNREACHABLE)
2212 cfg->fc_flags |= RTF_REJECT;
2213
ab79ad14
2214 if (rtm->rtm_type == RTN_LOCAL)
2215 cfg->fc_flags |= RTF_LOCAL;
2216
86872cb5
TG
2217 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2218 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2219 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2220
2221 if (tb[RTA_GATEWAY]) {
2222 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2223 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2224 }
86872cb5
TG
2225
2226 if (tb[RTA_DST]) {
2227 int plen = (rtm->rtm_dst_len + 7) >> 3;
2228
2229 if (nla_len(tb[RTA_DST]) < plen)
2230 goto errout;
2231
2232 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2233 }
86872cb5
TG
2234
2235 if (tb[RTA_SRC]) {
2236 int plen = (rtm->rtm_src_len + 7) >> 3;
2237
2238 if (nla_len(tb[RTA_SRC]) < plen)
2239 goto errout;
2240
2241 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2242 }
86872cb5 2243
c3968a85
DW
2244 if (tb[RTA_PREFSRC])
2245 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2246
86872cb5
TG
2247 if (tb[RTA_OIF])
2248 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2249
2250 if (tb[RTA_PRIORITY])
2251 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2252
2253 if (tb[RTA_METRICS]) {
2254 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2255 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2256 }
86872cb5
TG
2257
2258 if (tb[RTA_TABLE])
2259 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2260
2261 err = 0;
2262errout:
2263 return err;
1da177e4
LT
2264}
2265
c127ea2c 2266static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2267{
86872cb5
TG
2268 struct fib6_config cfg;
2269 int err;
1da177e4 2270
86872cb5
TG
2271 err = rtm_to_fib6_config(skb, nlh, &cfg);
2272 if (err < 0)
2273 return err;
2274
2275 return ip6_route_del(&cfg);
1da177e4
LT
2276}
2277
c127ea2c 2278static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2279{
86872cb5
TG
2280 struct fib6_config cfg;
2281 int err;
1da177e4 2282
86872cb5
TG
2283 err = rtm_to_fib6_config(skb, nlh, &cfg);
2284 if (err < 0)
2285 return err;
2286
2287 return ip6_route_add(&cfg);
1da177e4
LT
2288}
2289
339bf98f
TG
2290static inline size_t rt6_nlmsg_size(void)
2291{
2292 return NLMSG_ALIGN(sizeof(struct rtmsg))
2293 + nla_total_size(16) /* RTA_SRC */
2294 + nla_total_size(16) /* RTA_DST */
2295 + nla_total_size(16) /* RTA_GATEWAY */
2296 + nla_total_size(16) /* RTA_PREFSRC */
2297 + nla_total_size(4) /* RTA_TABLE */
2298 + nla_total_size(4) /* RTA_IIF */
2299 + nla_total_size(4) /* RTA_OIF */
2300 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2301 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2302 + nla_total_size(sizeof(struct rta_cacheinfo));
2303}
2304
191cd582
BH
2305static int rt6_fill_node(struct net *net,
2306 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2307 struct in6_addr *dst, struct in6_addr *src,
2308 int iif, int type, u32 pid, u32 seq,
7bc570c8 2309 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2310{
2311 struct rtmsg *rtm;
2d7202bf 2312 struct nlmsghdr *nlh;
e3703b3d 2313 long expires;
9e762a4a 2314 u32 table;
1da177e4
LT
2315
2316 if (prefix) { /* user wants prefix routes only */
2317 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2318 /* success since this is not a prefix route */
2319 return 1;
2320 }
2321 }
2322
2d7202bf
TG
2323 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2324 if (nlh == NULL)
26932566 2325 return -EMSGSIZE;
2d7202bf
TG
2326
2327 rtm = nlmsg_data(nlh);
1da177e4
LT
2328 rtm->rtm_family = AF_INET6;
2329 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2330 rtm->rtm_src_len = rt->rt6i_src.plen;
2331 rtm->rtm_tos = 0;
c71099ac 2332 if (rt->rt6i_table)
9e762a4a 2333 table = rt->rt6i_table->tb6_id;
c71099ac 2334 else
9e762a4a
PM
2335 table = RT6_TABLE_UNSPEC;
2336 rtm->rtm_table = table;
2d7202bf 2337 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2338 if (rt->rt6i_flags&RTF_REJECT)
2339 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2340 else if (rt->rt6i_flags&RTF_LOCAL)
2341 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2342 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2343 rtm->rtm_type = RTN_LOCAL;
2344 else
2345 rtm->rtm_type = RTN_UNICAST;
2346 rtm->rtm_flags = 0;
2347 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2348 rtm->rtm_protocol = rt->rt6i_protocol;
2349 if (rt->rt6i_flags&RTF_DYNAMIC)
2350 rtm->rtm_protocol = RTPROT_REDIRECT;
2351 else if (rt->rt6i_flags & RTF_ADDRCONF)
2352 rtm->rtm_protocol = RTPROT_KERNEL;
2353 else if (rt->rt6i_flags&RTF_DEFAULT)
2354 rtm->rtm_protocol = RTPROT_RA;
2355
2356 if (rt->rt6i_flags&RTF_CACHE)
2357 rtm->rtm_flags |= RTM_F_CLONED;
2358
2359 if (dst) {
2d7202bf 2360 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2361 rtm->rtm_dst_len = 128;
1da177e4 2362 } else if (rtm->rtm_dst_len)
2d7202bf 2363 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2364#ifdef CONFIG_IPV6_SUBTREES
2365 if (src) {
2d7202bf 2366 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2367 rtm->rtm_src_len = 128;
1da177e4 2368 } else if (rtm->rtm_src_len)
2d7202bf 2369 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2370#endif
7bc570c8
YH
2371 if (iif) {
2372#ifdef CONFIG_IPV6_MROUTE
2373 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2374 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2375 if (err <= 0) {
2376 if (!nowait) {
2377 if (err == 0)
2378 return 0;
2379 goto nla_put_failure;
2380 } else {
2381 if (err == -EMSGSIZE)
2382 goto nla_put_failure;
2383 }
2384 }
2385 } else
2386#endif
2387 NLA_PUT_U32(skb, RTA_IIF, iif);
2388 } else if (dst) {
1da177e4 2389 struct in6_addr saddr_buf;
c3968a85 2390 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2391 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2392 }
2d7202bf 2393
c3968a85
DW
2394 if (rt->rt6i_prefsrc.plen) {
2395 struct in6_addr saddr_buf;
2396 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2397 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2398 }
2399
defb3519 2400 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2401 goto nla_put_failure;
2402
d8d1f30b
CG
2403 if (rt->dst.neighbour)
2404 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2405
d8d1f30b 2406 if (rt->dst.dev)
2d7202bf
TG
2407 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2408
2409 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2410
36e3deae
YH
2411 if (!(rt->rt6i_flags & RTF_EXPIRES))
2412 expires = 0;
2413 else if (rt->rt6i_expires - jiffies < INT_MAX)
2414 expires = rt->rt6i_expires - jiffies;
2415 else
2416 expires = INT_MAX;
69cdf8f9 2417
d8d1f30b
CG
2418 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2419 expires, rt->dst.error) < 0)
e3703b3d 2420 goto nla_put_failure;
2d7202bf
TG
2421
2422 return nlmsg_end(skb, nlh);
2423
2424nla_put_failure:
26932566
PM
2425 nlmsg_cancel(skb, nlh);
2426 return -EMSGSIZE;
1da177e4
LT
2427}
2428
1b43af54 2429int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2430{
2431 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2432 int prefix;
2433
2d7202bf
TG
2434 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2435 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2436 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2437 } else
2438 prefix = 0;
2439
191cd582
BH
2440 return rt6_fill_node(arg->net,
2441 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2442 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2443 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2444}
2445
c127ea2c 2446static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2447{
3b1e0a65 2448 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2449 struct nlattr *tb[RTA_MAX+1];
2450 struct rt6_info *rt;
1da177e4 2451 struct sk_buff *skb;
ab364a6f 2452 struct rtmsg *rtm;
4c9483b2 2453 struct flowi6 fl6;
ab364a6f 2454 int err, iif = 0;
1da177e4 2455
ab364a6f
TG
2456 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2457 if (err < 0)
2458 goto errout;
1da177e4 2459
ab364a6f 2460 err = -EINVAL;
4c9483b2 2461 memset(&fl6, 0, sizeof(fl6));
1da177e4 2462
ab364a6f
TG
2463 if (tb[RTA_SRC]) {
2464 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2465 goto errout;
2466
4c9483b2 2467 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2468 }
2469
2470 if (tb[RTA_DST]) {
2471 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2472 goto errout;
2473
4c9483b2 2474 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2475 }
2476
2477 if (tb[RTA_IIF])
2478 iif = nla_get_u32(tb[RTA_IIF]);
2479
2480 if (tb[RTA_OIF])
4c9483b2 2481 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2482
2483 if (iif) {
2484 struct net_device *dev;
5578689a 2485 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2486 if (!dev) {
2487 err = -ENODEV;
ab364a6f 2488 goto errout;
1da177e4
LT
2489 }
2490 }
2491
ab364a6f
TG
2492 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2493 if (skb == NULL) {
2494 err = -ENOBUFS;
2495 goto errout;
2496 }
1da177e4 2497
ab364a6f
TG
2498 /* Reserve room for dummy headers, this skb can pass
2499 through good chunk of routing engine.
2500 */
459a98ed 2501 skb_reset_mac_header(skb);
ab364a6f 2502 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2503
4c9483b2 2504 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2505 skb_dst_set(skb, &rt->dst);
1da177e4 2506
4c9483b2 2507 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2508 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2509 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2510 if (err < 0) {
ab364a6f
TG
2511 kfree_skb(skb);
2512 goto errout;
1da177e4
LT
2513 }
2514
5578689a 2515 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2516errout:
1da177e4 2517 return err;
1da177e4
LT
2518}
2519
86872cb5 2520void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2521{
2522 struct sk_buff *skb;
5578689a 2523 struct net *net = info->nl_net;
528c4ceb
DL
2524 u32 seq;
2525 int err;
2526
2527 err = -ENOBUFS;
2528 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2529
339bf98f 2530 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2531 if (skb == NULL)
2532 goto errout;
2533
191cd582 2534 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2535 event, info->pid, seq, 0, 0, 0);
26932566
PM
2536 if (err < 0) {
2537 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2538 WARN_ON(err == -EMSGSIZE);
2539 kfree_skb(skb);
2540 goto errout;
2541 }
1ce85fe4
PNA
2542 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2543 info->nlh, gfp_any());
2544 return;
21713ebc
TG
2545errout:
2546 if (err < 0)
5578689a 2547 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2548}
2549
8ed67789
DL
2550static int ip6_route_dev_notify(struct notifier_block *this,
2551 unsigned long event, void *data)
2552{
2553 struct net_device *dev = (struct net_device *)data;
c346dca1 2554 struct net *net = dev_net(dev);
8ed67789
DL
2555
2556 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2557 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2558 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2559#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2560 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2561 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2562 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2563 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2564#endif
2565 }
2566
2567 return NOTIFY_OK;
2568}
2569
1da177e4
LT
2570/*
2571 * /proc
2572 */
2573
2574#ifdef CONFIG_PROC_FS
2575
1da177e4
LT
2576struct rt6_proc_arg
2577{
2578 char *buffer;
2579 int offset;
2580 int length;
2581 int skip;
2582 int len;
2583};
2584
2585static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2586{
33120b30 2587 struct seq_file *m = p_arg;
1da177e4 2588
4b7a4274 2589 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2590
2591#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2592 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2593#else
33120b30 2594 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2595#endif
2596
2597 if (rt->rt6i_nexthop) {
4b7a4274 2598 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2599 } else {
33120b30 2600 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2601 }
33120b30 2602 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2603 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2604 rt->dst.__use, rt->rt6i_flags,
33120b30 2605 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2606 return 0;
2607}
2608
33120b30 2609static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2610{
f3db4851
DL
2611 struct net *net = (struct net *)m->private;
2612 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2613 return 0;
2614}
1da177e4 2615
33120b30
AD
2616static int ipv6_route_open(struct inode *inode, struct file *file)
2617{
de05c557 2618 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2619}
2620
33120b30
AD
2621static const struct file_operations ipv6_route_proc_fops = {
2622 .owner = THIS_MODULE,
2623 .open = ipv6_route_open,
2624 .read = seq_read,
2625 .llseek = seq_lseek,
b6fcbdb4 2626 .release = single_release_net,
33120b30
AD
2627};
2628
1da177e4
LT
2629static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2630{
69ddb805 2631 struct net *net = (struct net *)seq->private;
1da177e4 2632 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2633 net->ipv6.rt6_stats->fib_nodes,
2634 net->ipv6.rt6_stats->fib_route_nodes,
2635 net->ipv6.rt6_stats->fib_rt_alloc,
2636 net->ipv6.rt6_stats->fib_rt_entries,
2637 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2638 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2639 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2640
2641 return 0;
2642}
2643
2644static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2645{
de05c557 2646 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2647}
2648
9a32144e 2649static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2650 .owner = THIS_MODULE,
2651 .open = rt6_stats_seq_open,
2652 .read = seq_read,
2653 .llseek = seq_lseek,
b6fcbdb4 2654 .release = single_release_net,
1da177e4
LT
2655};
2656#endif /* CONFIG_PROC_FS */
2657
2658#ifdef CONFIG_SYSCTL
2659
1da177e4 2660static
8d65af78 2661int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2662 void __user *buffer, size_t *lenp, loff_t *ppos)
2663{
c486da34
LAG
2664 struct net *net;
2665 int delay;
2666 if (!write)
1da177e4 2667 return -EINVAL;
c486da34
LAG
2668
2669 net = (struct net *)ctl->extra1;
2670 delay = net->ipv6.sysctl.flush_delay;
2671 proc_dointvec(ctl, write, buffer, lenp, ppos);
2672 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2673 return 0;
1da177e4
LT
2674}
2675
760f2d01 2676ctl_table ipv6_route_table_template[] = {
1ab1457c 2677 {
1da177e4 2678 .procname = "flush",
4990509f 2679 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2680 .maxlen = sizeof(int),
89c8b3a1 2681 .mode = 0200,
6d9f239a 2682 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2683 },
2684 {
1da177e4 2685 .procname = "gc_thresh",
9a7ec3a9 2686 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2687 .maxlen = sizeof(int),
2688 .mode = 0644,
6d9f239a 2689 .proc_handler = proc_dointvec,
1da177e4
LT
2690 },
2691 {
1da177e4 2692 .procname = "max_size",
4990509f 2693 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2694 .maxlen = sizeof(int),
2695 .mode = 0644,
6d9f239a 2696 .proc_handler = proc_dointvec,
1da177e4
LT
2697 },
2698 {
1da177e4 2699 .procname = "gc_min_interval",
4990509f 2700 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2701 .maxlen = sizeof(int),
2702 .mode = 0644,
6d9f239a 2703 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2704 },
2705 {
1da177e4 2706 .procname = "gc_timeout",
4990509f 2707 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2708 .maxlen = sizeof(int),
2709 .mode = 0644,
6d9f239a 2710 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2711 },
2712 {
1da177e4 2713 .procname = "gc_interval",
4990509f 2714 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2715 .maxlen = sizeof(int),
2716 .mode = 0644,
6d9f239a 2717 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2718 },
2719 {
1da177e4 2720 .procname = "gc_elasticity",
4990509f 2721 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2722 .maxlen = sizeof(int),
2723 .mode = 0644,
f3d3f616 2724 .proc_handler = proc_dointvec,
1da177e4
LT
2725 },
2726 {
1da177e4 2727 .procname = "mtu_expires",
4990509f 2728 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2729 .maxlen = sizeof(int),
2730 .mode = 0644,
6d9f239a 2731 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2732 },
2733 {
1da177e4 2734 .procname = "min_adv_mss",
4990509f 2735 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2736 .maxlen = sizeof(int),
2737 .mode = 0644,
f3d3f616 2738 .proc_handler = proc_dointvec,
1da177e4
LT
2739 },
2740 {
1da177e4 2741 .procname = "gc_min_interval_ms",
4990509f 2742 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2743 .maxlen = sizeof(int),
2744 .mode = 0644,
6d9f239a 2745 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2746 },
f8572d8f 2747 { }
1da177e4
LT
2748};
2749
2c8c1e72 2750struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2751{
2752 struct ctl_table *table;
2753
2754 table = kmemdup(ipv6_route_table_template,
2755 sizeof(ipv6_route_table_template),
2756 GFP_KERNEL);
5ee09105
YH
2757
2758 if (table) {
2759 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2760 table[0].extra1 = net;
86393e52 2761 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2762 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2763 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2764 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2765 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2766 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2767 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2768 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2769 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2770 }
2771
760f2d01
DL
2772 return table;
2773}
1da177e4
LT
2774#endif
2775
2c8c1e72 2776static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2777{
633d424b 2778 int ret = -ENOMEM;
8ed67789 2779
86393e52
AD
2780 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2781 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2782
fc66f95c
ED
2783 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2784 goto out_ip6_dst_ops;
2785
8ed67789
DL
2786 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2787 sizeof(*net->ipv6.ip6_null_entry),
2788 GFP_KERNEL);
2789 if (!net->ipv6.ip6_null_entry)
fc66f95c 2790 goto out_ip6_dst_entries;
d8d1f30b 2791 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2792 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2793 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2794 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2795 ip6_template_metrics, true);
8ed67789
DL
2796
2797#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2798 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2799 sizeof(*net->ipv6.ip6_prohibit_entry),
2800 GFP_KERNEL);
68fffc67
PZ
2801 if (!net->ipv6.ip6_prohibit_entry)
2802 goto out_ip6_null_entry;
d8d1f30b 2803 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2804 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2805 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2806 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2807 ip6_template_metrics, true);
8ed67789
DL
2808
2809 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2810 sizeof(*net->ipv6.ip6_blk_hole_entry),
2811 GFP_KERNEL);
68fffc67
PZ
2812 if (!net->ipv6.ip6_blk_hole_entry)
2813 goto out_ip6_prohibit_entry;
d8d1f30b 2814 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2815 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2816 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2817 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2818 ip6_template_metrics, true);
8ed67789
DL
2819#endif
2820
b339a47c
PZ
2821 net->ipv6.sysctl.flush_delay = 0;
2822 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2823 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2824 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2825 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2826 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2827 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2828 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2829
cdb18761
DL
2830#ifdef CONFIG_PROC_FS
2831 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2832 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2833#endif
6891a346
BT
2834 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2835
8ed67789
DL
2836 ret = 0;
2837out:
2838 return ret;
f2fc6a54 2839
68fffc67
PZ
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841out_ip6_prohibit_entry:
2842 kfree(net->ipv6.ip6_prohibit_entry);
2843out_ip6_null_entry:
2844 kfree(net->ipv6.ip6_null_entry);
2845#endif
fc66f95c
ED
2846out_ip6_dst_entries:
2847 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2848out_ip6_dst_ops:
f2fc6a54 2849 goto out;
cdb18761
DL
2850}
2851
2c8c1e72 2852static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2853{
2854#ifdef CONFIG_PROC_FS
2855 proc_net_remove(net, "ipv6_route");
2856 proc_net_remove(net, "rt6_stats");
2857#endif
8ed67789
DL
2858 kfree(net->ipv6.ip6_null_entry);
2859#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2860 kfree(net->ipv6.ip6_prohibit_entry);
2861 kfree(net->ipv6.ip6_blk_hole_entry);
2862#endif
41bb78b4 2863 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2864}
2865
2866static struct pernet_operations ip6_route_net_ops = {
2867 .init = ip6_route_net_init,
2868 .exit = ip6_route_net_exit,
2869};
2870
8ed67789
DL
2871static struct notifier_block ip6_route_dev_notifier = {
2872 .notifier_call = ip6_route_dev_notify,
2873 .priority = 0,
2874};
2875
433d49c3 2876int __init ip6_route_init(void)
1da177e4 2877{
433d49c3
DL
2878 int ret;
2879
9a7ec3a9
DL
2880 ret = -ENOMEM;
2881 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2882 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2883 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2884 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2885 goto out;
14e50e57 2886
fc66f95c 2887 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2888 if (ret)
bdb3289f 2889 goto out_kmem_cache;
bdb3289f 2890
fc66f95c
ED
2891 ret = register_pernet_subsys(&ip6_route_net_ops);
2892 if (ret)
2893 goto out_dst_entries;
2894
5dc121e9
AE
2895 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2896
8ed67789
DL
2897 /* Registering of the loopback is done before this portion of code,
2898 * the loopback reference in rt6_info will not be taken, do it
2899 * manually for init_net */
d8d1f30b 2900 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2901 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2902 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2903 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2904 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2905 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2906 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2907 #endif
433d49c3
DL
2908 ret = fib6_init();
2909 if (ret)
8ed67789 2910 goto out_register_subsys;
433d49c3 2911
433d49c3
DL
2912 ret = xfrm6_init();
2913 if (ret)
cdb18761 2914 goto out_fib6_init;
c35b7e72 2915
433d49c3
DL
2916 ret = fib6_rules_init();
2917 if (ret)
2918 goto xfrm6_init;
7e5449c2 2919
433d49c3
DL
2920 ret = -ENOBUFS;
2921 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2922 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2923 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2924 goto fib6_rules_init;
c127ea2c 2925
8ed67789 2926 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2927 if (ret)
2928 goto fib6_rules_init;
8ed67789 2929
433d49c3
DL
2930out:
2931 return ret;
2932
2933fib6_rules_init:
433d49c3
DL
2934 fib6_rules_cleanup();
2935xfrm6_init:
433d49c3 2936 xfrm6_fini();
433d49c3 2937out_fib6_init:
433d49c3 2938 fib6_gc_cleanup();
8ed67789
DL
2939out_register_subsys:
2940 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2941out_dst_entries:
2942 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2943out_kmem_cache:
f2fc6a54 2944 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2945 goto out;
1da177e4
LT
2946}
2947
2948void ip6_route_cleanup(void)
2949{
8ed67789 2950 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2951 fib6_rules_cleanup();
1da177e4 2952 xfrm6_fini();
1da177e4 2953 fib6_gc_cleanup();
8ed67789 2954 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2955 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2956 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2957}
This page took 1.387082 seconds and 5 git commands to generate.