ipv6: Move xfrm_lookup() call down into icmp6_dst_alloc().
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 80static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
ebb762f2 147 .mtu = ip6_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ebb762f2 158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 159{
618f9bc7
SK
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
ec831ea7
RD
163}
164
14e50e57
DM
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
0972ddb2
HB
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
14e50e57
DM
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
09640e63 177 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
ebb762f2 180 .mtu = ip6_blackhole_mtu,
214f45c9 181 .default_advmss = ip6_default_advmss,
14e50e57 182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 184 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
185};
186
62fa8a84
DM
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
bdb3289f 191static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
d8d1f30b
CG
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
1da177e4
LT
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 201 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
101367c2
TG
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
6723ab54
DM
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 210
280a34c8 211static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
d8d1f30b
CG
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
101367c2
TG
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 221 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
bdb3289f 226static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
d8d1f30b
CG
232 .input = dst_discard,
233 .output = dst_discard,
101367c2
TG
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
1da177e4 243/* allocate dst with ip6_dst_ops */
5c1e6aa3 244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
245 struct net_device *dev,
246 int flags)
1da177e4 247{
957c665f 248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 249
38308473 250 if (rt)
fbe58186 251 memset(&rt->rt6i_table, 0,
38308473 252 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
253
254 return rt;
1da177e4
LT
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 261 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 262
8e2ec639
YZ
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
38308473 266 if (idev) {
1da177e4
LT
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
1ab1457c 269 }
b3419363 270 if (peer) {
b3419363
DM
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
6431cbc2
DM
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
b3419363
DM
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
b3419363
DM
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
6431cbc2
DM
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 299 struct net_device *loopback_dev =
c346dca1 300 dev_net(dev)->loopback_dev;
1da177e4 301
38308473 302 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
38308473 305 if (loopback_idev) {
1da177e4
LT
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
a02cec21
ED
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
316}
317
b71d1d42 318static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 319{
a02cec21
ED
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
322}
323
1da177e4 324/*
c71099ac 325 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
326 */
327
8ed67789
DL
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
b71d1d42 330 const struct in6_addr *saddr,
1da177e4 331 int oif,
d420895e 332 int flags)
1da177e4
LT
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
dd3abc4e
YH
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
d8d1f30b 340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
1da177e4
LT
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
38308473 347 if (!sprt->rt6i_idev ||
1da177e4 348 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 349 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 350 continue;
1ab1457c 351 if (local && (!oif ||
1da177e4
LT
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
dd3abc4e
YH
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
1da177e4 361 }
dd3abc4e 362 }
1da177e4 363
dd3abc4e 364 if (oif) {
1da177e4
LT
365 if (local)
366 return local;
367
d420895e 368 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 369 return net->ipv6.ip6_null_entry;
1da177e4 370 }
dd3abc4e 371out:
1da177e4
LT
372 return rt;
373}
374
27097255
YH
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
f2c31e32 378 struct neighbour *neigh;
27097255
YH
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
f2c31e32 387 rcu_read_lock();
27217455 388 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 389 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 390 goto out;
27097255
YH
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 403 } else {
27097255 404 read_unlock_bh(&neigh->lock);
f2c31e32
ED
405 }
406out:
407 rcu_read_unlock();
27097255
YH
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
27097255
YH
412}
413#endif
414
1da177e4 415/*
554cfb7e 416 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 417 */
b6f99a21 418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
419{
420 struct net_device *dev = rt->rt6i_dev;
161980f4 421 if (!oif || dev->ifindex == oif)
554cfb7e 422 return 2;
161980f4
DM
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
554cfb7e 427}
1da177e4 428
b6f99a21 429static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 430{
f2c31e32 431 struct neighbour *neigh;
398bcbeb 432 int m;
f2c31e32
ED
433
434 rcu_read_lock();
27217455 435 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
554cfb7e
YH
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
4d0c5911 442 m = 2;
398bcbeb
YH
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
ea73ee23 448 m = 1;
554cfb7e 449 read_unlock_bh(&neigh->lock);
398bcbeb
YH
450 } else
451 m = 0;
f2c31e32 452 rcu_read_unlock();
554cfb7e 453 return m;
1da177e4
LT
454}
455
554cfb7e
YH
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
1da177e4 458{
4d0c5911 459 int m, n;
1ab1457c 460
4d0c5911 461 m = rt6_check_dev(rt, oif);
77d16f45 462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 463 return -1;
ebacaaa0
YH
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
4d0c5911 467 n = rt6_check_neigh(rt);
557e92ef 468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
469 return -1;
470 return m;
471}
472
f11e6659
DM
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
554cfb7e 475{
f11e6659
DM
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
554cfb7e 503 int mpri = -1;
1da177e4 504
f11e6659
DM
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 507 rt = rt->dst.rt6_next)
f11e6659
DM
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 510 rt = rt->dst.rt6_next)
f11e6659 511 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 512
f11e6659
DM
513 return match;
514}
1da177e4 515
f11e6659
DM
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
8ed67789 519 struct net *net;
1da177e4 520
f11e6659 521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 522 __func__, fn->leaf, oif);
554cfb7e 523
f11e6659
DM
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 527
f11e6659 528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 529
554cfb7e 530 if (!match &&
f11e6659 531 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 532 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 533
554cfb7e 534 /* no entries matched; do round-robin */
f11e6659
DM
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
1da177e4 540 }
1da177e4 541
f11e6659 542 RT6_TRACE("%s() => %p\n",
0dc47877 543 __func__, match);
1da177e4 544
c346dca1 545 net = dev_net(rt0->rt6i_dev);
a02cec21 546 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
547}
548
70ceb4f5
YH
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 551 const struct in6_addr *gwaddr)
70ceb4f5 552{
c346dca1 553 struct net *net = dev_net(dev);
70ceb4f5
YH
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
4bed72e4 557 unsigned long lifetime;
70ceb4f5
YH
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 581 return -EINVAL;
70ceb4f5 582
4bed72e4 583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
efa2cea0
DL
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
70ceb4f5
YH
597
598 if (rt && !lifetime) {
e0a1ad73 599 ip6_del_rt(rt);
70ceb4f5
YH
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
efa2cea0 604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
4bed72e4 611 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
d8d1f30b 617 dst_release(&rt->dst);
70ceb4f5
YH
618 }
619 return 0;
620}
621#endif
622
8ed67789 623#define BACKTRACK(__net, saddr) \
982f56f3 624do { \
8ed67789 625 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 626 struct fib6_node *pn; \
e0eda7bb 627 while (1) { \
982f56f3
YH
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
c71099ac 637 } \
c71099ac 638 } \
38308473 639} while (0)
c71099ac 640
8ed67789
DL
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
4c9483b2 643 struct flowi6 *fl6, int flags)
1da177e4
LT
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
c71099ac 648 read_lock_bh(&table->tb6_lock);
4c9483b2 649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
650restart:
651 rt = fn->leaf;
4c9483b2
DM
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
c71099ac 654out:
d8d1f30b 655 dst_use(&rt->dst, jiffies);
c71099ac 656 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
657 return rt;
658
659}
660
9acd9f3a
YH
661struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 const struct in6_addr *saddr, int oif, int strict)
c71099ac 663{
4c9483b2
DM
664 struct flowi6 fl6 = {
665 .flowi6_oif = oif,
666 .daddr = *daddr,
c71099ac
TG
667 };
668 struct dst_entry *dst;
77d16f45 669 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 670
adaa70bb 671 if (saddr) {
4c9483b2 672 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
673 flags |= RT6_LOOKUP_F_HAS_SADDR;
674 }
675
4c9483b2 676 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
677 if (dst->error == 0)
678 return (struct rt6_info *) dst;
679
680 dst_release(dst);
681
1da177e4
LT
682 return NULL;
683}
684
7159039a
YH
685EXPORT_SYMBOL(rt6_lookup);
686
c71099ac 687/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
688 It takes new route entry, the addition fails by any reason the
689 route is freed. In any case, if caller does not hold it, it may
690 be destroyed.
691 */
692
86872cb5 693static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
694{
695 int err;
c71099ac 696 struct fib6_table *table;
1da177e4 697
c71099ac
TG
698 table = rt->rt6i_table;
699 write_lock_bh(&table->tb6_lock);
86872cb5 700 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 701 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
702
703 return err;
704}
705
40e22e8f
TG
706int ip6_ins_rt(struct rt6_info *rt)
707{
4d1169c1 708 struct nl_info info = {
c346dca1 709 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 710 };
528c4ceb 711 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
712}
713
21efcfa0
ED
714static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 const struct in6_addr *daddr,
b71d1d42 716 const struct in6_addr *saddr)
1da177e4 717{
1da177e4
LT
718 struct rt6_info *rt;
719
720 /*
721 * Clone the route.
722 */
723
21efcfa0 724 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
725
726 if (rt) {
14deae41
DM
727 struct neighbour *neigh;
728 int attempts = !in_softirq();
729
38308473 730 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
58c4fb86 731 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 733 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 734 rt->rt6i_gateway = *daddr;
58c4fb86 735 }
1da177e4 736
1da177e4 737 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
738
739#ifdef CONFIG_IPV6_SUBTREES
740 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 741 rt->rt6i_src.addr = *saddr;
1da177e4
LT
742 rt->rt6i_src.plen = 128;
743 }
744#endif
745
14deae41 746 retry:
04a6f441
DM
747 neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway,
748 rt->rt6i_dev);
14deae41
DM
749 if (IS_ERR(neigh)) {
750 struct net *net = dev_net(rt->rt6i_dev);
751 int saved_rt_min_interval =
752 net->ipv6.sysctl.ip6_rt_gc_min_interval;
753 int saved_rt_elasticity =
754 net->ipv6.sysctl.ip6_rt_gc_elasticity;
755
756 if (attempts-- > 0) {
757 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
758 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
759
86393e52 760 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
761
762 net->ipv6.sysctl.ip6_rt_gc_elasticity =
763 saved_rt_elasticity;
764 net->ipv6.sysctl.ip6_rt_gc_min_interval =
765 saved_rt_min_interval;
766 goto retry;
767 }
768
769 if (net_ratelimit())
770 printk(KERN_WARNING
7e1b33e5 771 "ipv6: Neighbour table overflow.\n");
d8d1f30b 772 dst_free(&rt->dst);
14deae41
DM
773 return NULL;
774 }
69cce1d1 775 dst_set_neighbour(&rt->dst, neigh);
1da177e4 776
95a9a5ba 777 }
1da177e4 778
95a9a5ba
YH
779 return rt;
780}
1da177e4 781
21efcfa0
ED
782static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
783 const struct in6_addr *daddr)
299d9939 784{
21efcfa0
ED
785 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
786
299d9939 787 if (rt) {
299d9939 788 rt->rt6i_flags |= RTF_CACHE;
27217455 789 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
790 }
791 return rt;
792}
793
8ed67789 794static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 795 struct flowi6 *fl6, int flags)
1da177e4
LT
796{
797 struct fib6_node *fn;
519fbd87 798 struct rt6_info *rt, *nrt;
c71099ac 799 int strict = 0;
1da177e4 800 int attempts = 3;
519fbd87 801 int err;
53b7997f 802 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 803
77d16f45 804 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
805
806relookup:
c71099ac 807 read_lock_bh(&table->tb6_lock);
1da177e4 808
8238dd06 809restart_2:
4c9483b2 810 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
811
812restart:
4acad72d 813 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 814
4c9483b2 815 BACKTRACK(net, &fl6->saddr);
8ed67789 816 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 817 rt->rt6i_flags & RTF_CACHE)
1ddef044 818 goto out;
1da177e4 819
d8d1f30b 820 dst_hold(&rt->dst);
c71099ac 821 read_unlock_bh(&table->tb6_lock);
fb9de91e 822
27217455 823 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 824 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 825 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 826 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
827 else
828 goto out2;
e40cf353 829
d8d1f30b 830 dst_release(&rt->dst);
8ed67789 831 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 832
d8d1f30b 833 dst_hold(&rt->dst);
519fbd87 834 if (nrt) {
40e22e8f 835 err = ip6_ins_rt(nrt);
519fbd87 836 if (!err)
1da177e4 837 goto out2;
1da177e4 838 }
1da177e4 839
519fbd87
YH
840 if (--attempts <= 0)
841 goto out2;
842
843 /*
c71099ac 844 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
845 * released someone could insert this route. Relookup.
846 */
d8d1f30b 847 dst_release(&rt->dst);
519fbd87
YH
848 goto relookup;
849
850out:
8238dd06
YH
851 if (reachable) {
852 reachable = 0;
853 goto restart_2;
854 }
d8d1f30b 855 dst_hold(&rt->dst);
c71099ac 856 read_unlock_bh(&table->tb6_lock);
1da177e4 857out2:
d8d1f30b
CG
858 rt->dst.lastuse = jiffies;
859 rt->dst.__use++;
c71099ac
TG
860
861 return rt;
1da177e4
LT
862}
863
8ed67789 864static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 865 struct flowi6 *fl6, int flags)
4acad72d 866{
4c9483b2 867 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
868}
869
c71099ac
TG
870void ip6_route_input(struct sk_buff *skb)
871{
b71d1d42 872 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 873 struct net *net = dev_net(skb->dev);
adaa70bb 874 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
875 struct flowi6 fl6 = {
876 .flowi6_iif = skb->dev->ifindex,
877 .daddr = iph->daddr,
878 .saddr = iph->saddr,
38308473 879 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
880 .flowi6_mark = skb->mark,
881 .flowi6_proto = iph->nexthdr,
c71099ac 882 };
adaa70bb 883
1d6e55f1 884 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 885 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 886
4c9483b2 887 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
888}
889
8ed67789 890static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 891 struct flowi6 *fl6, int flags)
1da177e4 892{
4c9483b2 893 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
894}
895
9c7a4f9c 896struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 897 struct flowi6 *fl6)
c71099ac
TG
898{
899 int flags = 0;
900
4c9483b2 901 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 902 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 903
4c9483b2 904 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 905 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
906 else if (sk)
907 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 908
4c9483b2 909 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
910}
911
7159039a 912EXPORT_SYMBOL(ip6_route_output);
1da177e4 913
2774c131 914struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 915{
5c1e6aa3 916 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
917 struct dst_entry *new = NULL;
918
5c1e6aa3 919 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 920 if (rt) {
cf911662
DM
921 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
922
d8d1f30b 923 new = &rt->dst;
14e50e57 924
14e50e57 925 new->__use = 1;
352e512c
HX
926 new->input = dst_discard;
927 new->output = dst_discard;
14e50e57 928
21efcfa0
ED
929 if (dst_metrics_read_only(&ort->dst))
930 new->_metrics = ort->dst._metrics;
931 else
932 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
933 rt->rt6i_idev = ort->rt6i_idev;
934 if (rt->rt6i_idev)
935 in6_dev_hold(rt->rt6i_idev);
936 rt->rt6i_expires = 0;
937
4e3fd7a0 938 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
939 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
940 rt->rt6i_metric = 0;
941
942 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
943#ifdef CONFIG_IPV6_SUBTREES
944 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
945#endif
946
947 dst_free(new);
948 }
949
69ead7af
DM
950 dst_release(dst_orig);
951 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 952}
14e50e57 953
1da177e4
LT
954/*
955 * Destination cache support functions
956 */
957
958static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
959{
960 struct rt6_info *rt;
961
962 rt = (struct rt6_info *) dst;
963
6431cbc2
DM
964 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
965 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
966 if (!rt->rt6i_peer)
967 rt6_bind_peer(rt, 0);
968 rt->rt6i_peer_genid = rt6_peer_genid();
969 }
1da177e4 970 return dst;
6431cbc2 971 }
1da177e4
LT
972 return NULL;
973}
974
975static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
976{
977 struct rt6_info *rt = (struct rt6_info *) dst;
978
979 if (rt) {
54c1a859
YH
980 if (rt->rt6i_flags & RTF_CACHE) {
981 if (rt6_check_expired(rt)) {
982 ip6_del_rt(rt);
983 dst = NULL;
984 }
985 } else {
1da177e4 986 dst_release(dst);
54c1a859
YH
987 dst = NULL;
988 }
1da177e4 989 }
54c1a859 990 return dst;
1da177e4
LT
991}
992
993static void ip6_link_failure(struct sk_buff *skb)
994{
995 struct rt6_info *rt;
996
3ffe533c 997 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 998
adf30907 999 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1000 if (rt) {
38308473 1001 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1002 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1003 rt->rt6i_flags |= RTF_EXPIRES;
1004 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1005 rt->rt6i_node->fn_sernum = -1;
1006 }
1007}
1008
1009static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1010{
1011 struct rt6_info *rt6 = (struct rt6_info*)dst;
1012
1013 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1014 rt6->rt6i_flags |= RTF_MODIFIED;
1015 if (mtu < IPV6_MIN_MTU) {
defb3519 1016 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1017 mtu = IPV6_MIN_MTU;
defb3519
DM
1018 features |= RTAX_FEATURE_ALLFRAG;
1019 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1020 }
defb3519 1021 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1022 }
1023}
1024
0dbaee3b 1025static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1026{
0dbaee3b
DM
1027 struct net_device *dev = dst->dev;
1028 unsigned int mtu = dst_mtu(dst);
1029 struct net *net = dev_net(dev);
1030
1da177e4
LT
1031 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1032
5578689a
DL
1033 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1034 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1035
1036 /*
1ab1457c
YH
1037 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1038 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1039 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1040 * rely only on pmtu discovery"
1041 */
1042 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1043 mtu = IPV6_MAXPLEN;
1044 return mtu;
1045}
1046
ebb762f2 1047static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1048{
d33e4553 1049 struct inet6_dev *idev;
618f9bc7
SK
1050 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1051
1052 if (mtu)
1053 return mtu;
1054
1055 mtu = IPV6_MIN_MTU;
d33e4553
DM
1056
1057 rcu_read_lock();
1058 idev = __in6_dev_get(dst->dev);
1059 if (idev)
1060 mtu = idev->cnf.mtu6;
1061 rcu_read_unlock();
1062
1063 return mtu;
1064}
1065
3b00944c
YH
1066static struct dst_entry *icmp6_dst_gc_list;
1067static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1068
3b00944c 1069struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1070 struct neighbour *neigh,
87a11578 1071 struct flowi6 *fl6)
1da177e4 1072{
87a11578 1073 struct dst_entry *dst;
1da177e4
LT
1074 struct rt6_info *rt;
1075 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1076 struct net *net = dev_net(dev);
1da177e4 1077
38308473 1078 if (unlikely(!idev))
1da177e4
LT
1079 return NULL;
1080
957c665f 1081 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1082 if (unlikely(!rt)) {
1da177e4 1083 in6_dev_put(idev);
87a11578 1084 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1085 goto out;
1086 }
1087
1da177e4
LT
1088 if (neigh)
1089 neigh_hold(neigh);
14deae41 1090 else {
87a11578 1091 neigh = __neigh_lookup_errno(&nd_tbl, &fl6->daddr, dev);
14deae41
DM
1092 if (IS_ERR(neigh))
1093 neigh = NULL;
1094 }
1da177e4 1095
8e2ec639
YZ
1096 rt->dst.flags |= DST_HOST;
1097 rt->dst.output = ip6_output;
69cce1d1 1098 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1099 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1100 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1101 rt->rt6i_dst.plen = 128;
1102 rt->rt6i_idev = idev;
7011687f 1103 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1104
3b00944c 1105 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1106 rt->dst.next = icmp6_dst_gc_list;
1107 icmp6_dst_gc_list = &rt->dst;
3b00944c 1108 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1109
5578689a 1110 fib6_force_start_gc(net);
1da177e4 1111
87a11578
DM
1112 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1113
1da177e4 1114out:
87a11578 1115 return dst;
1da177e4
LT
1116}
1117
3d0f24a7 1118int icmp6_dst_gc(void)
1da177e4 1119{
e9476e95 1120 struct dst_entry *dst, **pprev;
3d0f24a7 1121 int more = 0;
1da177e4 1122
3b00944c
YH
1123 spin_lock_bh(&icmp6_dst_lock);
1124 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1125
1da177e4
LT
1126 while ((dst = *pprev) != NULL) {
1127 if (!atomic_read(&dst->__refcnt)) {
1128 *pprev = dst->next;
1129 dst_free(dst);
1da177e4
LT
1130 } else {
1131 pprev = &dst->next;
3d0f24a7 1132 ++more;
1da177e4
LT
1133 }
1134 }
1135
3b00944c 1136 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1137
3d0f24a7 1138 return more;
1da177e4
LT
1139}
1140
1e493d19
DM
1141static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1142 void *arg)
1143{
1144 struct dst_entry *dst, **pprev;
1145
1146 spin_lock_bh(&icmp6_dst_lock);
1147 pprev = &icmp6_dst_gc_list;
1148 while ((dst = *pprev) != NULL) {
1149 struct rt6_info *rt = (struct rt6_info *) dst;
1150 if (func(rt, arg)) {
1151 *pprev = dst->next;
1152 dst_free(dst);
1153 } else {
1154 pprev = &dst->next;
1155 }
1156 }
1157 spin_unlock_bh(&icmp6_dst_lock);
1158}
1159
569d3645 1160static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1161{
1da177e4 1162 unsigned long now = jiffies;
86393e52 1163 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1164 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1165 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1166 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1167 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1168 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1169 int entries;
7019b78e 1170
fc66f95c 1171 entries = dst_entries_get_fast(ops);
7019b78e 1172 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1173 entries <= rt_max_size)
1da177e4
LT
1174 goto out;
1175
6891a346
BT
1176 net->ipv6.ip6_rt_gc_expire++;
1177 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1178 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1179 entries = dst_entries_get_slow(ops);
1180 if (entries < ops->gc_thresh)
7019b78e 1181 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1182out:
7019b78e 1183 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1184 return entries > rt_max_size;
1da177e4
LT
1185}
1186
1187/* Clean host part of a prefix. Not necessary in radix tree,
1188 but results in cleaner routing tables.
1189
1190 Remove it only when all the things will work!
1191 */
1192
6b75d090 1193int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1194{
5170ae82 1195 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1196 if (hoplimit == 0) {
6b75d090 1197 struct net_device *dev = dst->dev;
c68f24cc
ED
1198 struct inet6_dev *idev;
1199
1200 rcu_read_lock();
1201 idev = __in6_dev_get(dev);
1202 if (idev)
6b75d090 1203 hoplimit = idev->cnf.hop_limit;
c68f24cc 1204 else
53b7997f 1205 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1206 rcu_read_unlock();
1da177e4
LT
1207 }
1208 return hoplimit;
1209}
abbf46ae 1210EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1211
1212/*
1213 *
1214 */
1215
86872cb5 1216int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1217{
1218 int err;
5578689a 1219 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1220 struct rt6_info *rt = NULL;
1221 struct net_device *dev = NULL;
1222 struct inet6_dev *idev = NULL;
c71099ac 1223 struct fib6_table *table;
1da177e4
LT
1224 int addr_type;
1225
86872cb5 1226 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1227 return -EINVAL;
1228#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1229 if (cfg->fc_src_len)
1da177e4
LT
1230 return -EINVAL;
1231#endif
86872cb5 1232 if (cfg->fc_ifindex) {
1da177e4 1233 err = -ENODEV;
5578689a 1234 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1235 if (!dev)
1236 goto out;
1237 idev = in6_dev_get(dev);
1238 if (!idev)
1239 goto out;
1240 }
1241
86872cb5
TG
1242 if (cfg->fc_metric == 0)
1243 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1244
d71314b4 1245 err = -ENOBUFS;
38308473
DM
1246 if (cfg->fc_nlinfo.nlh &&
1247 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1248 table = fib6_get_table(net, cfg->fc_table);
38308473 1249 if (!table) {
d71314b4
MV
1250 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1251 table = fib6_new_table(net, cfg->fc_table);
1252 }
1253 } else {
1254 table = fib6_new_table(net, cfg->fc_table);
1255 }
38308473
DM
1256
1257 if (!table)
c71099ac 1258 goto out;
c71099ac 1259
957c665f 1260 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1261
38308473 1262 if (!rt) {
1da177e4
LT
1263 err = -ENOMEM;
1264 goto out;
1265 }
1266
d8d1f30b 1267 rt->dst.obsolete = -1;
6f704992
YH
1268 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1269 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1270 0;
1da177e4 1271
86872cb5
TG
1272 if (cfg->fc_protocol == RTPROT_UNSPEC)
1273 cfg->fc_protocol = RTPROT_BOOT;
1274 rt->rt6i_protocol = cfg->fc_protocol;
1275
1276 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1277
1278 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1279 rt->dst.input = ip6_mc_input;
ab79ad14
1280 else if (cfg->fc_flags & RTF_LOCAL)
1281 rt->dst.input = ip6_input;
1da177e4 1282 else
d8d1f30b 1283 rt->dst.input = ip6_forward;
1da177e4 1284
d8d1f30b 1285 rt->dst.output = ip6_output;
1da177e4 1286
86872cb5
TG
1287 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1288 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1289 if (rt->rt6i_dst.plen == 128)
11d53b49 1290 rt->dst.flags |= DST_HOST;
1da177e4 1291
8e2ec639
YZ
1292 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1293 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1294 if (!metrics) {
1295 err = -ENOMEM;
1296 goto out;
1297 }
1298 dst_init_metrics(&rt->dst, metrics, 0);
1299 }
1da177e4 1300#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1301 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1302 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1303#endif
1304
86872cb5 1305 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1306
1307 /* We cannot add true routes via loopback here,
1308 they would result in kernel looping; promote them to reject routes
1309 */
86872cb5 1310 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1311 (dev && (dev->flags & IFF_LOOPBACK) &&
1312 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1313 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1314 /* hold loopback dev/idev if we haven't done so. */
5578689a 1315 if (dev != net->loopback_dev) {
1da177e4
LT
1316 if (dev) {
1317 dev_put(dev);
1318 in6_dev_put(idev);
1319 }
5578689a 1320 dev = net->loopback_dev;
1da177e4
LT
1321 dev_hold(dev);
1322 idev = in6_dev_get(dev);
1323 if (!idev) {
1324 err = -ENODEV;
1325 goto out;
1326 }
1327 }
d8d1f30b
CG
1328 rt->dst.output = ip6_pkt_discard_out;
1329 rt->dst.input = ip6_pkt_discard;
1330 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1331 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1332 goto install_route;
1333 }
1334
86872cb5 1335 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1336 const struct in6_addr *gw_addr;
1da177e4
LT
1337 int gwa_type;
1338
86872cb5 1339 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1340 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1341 gwa_type = ipv6_addr_type(gw_addr);
1342
1343 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1344 struct rt6_info *grt;
1345
1346 /* IPv6 strictly inhibits using not link-local
1347 addresses as nexthop address.
1348 Otherwise, router will not able to send redirects.
1349 It is very good, but in some (rare!) circumstances
1350 (SIT, PtP, NBMA NOARP links) it is handy to allow
1351 some exceptions. --ANK
1352 */
1353 err = -EINVAL;
38308473 1354 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1355 goto out;
1356
5578689a 1357 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1358
1359 err = -EHOSTUNREACH;
38308473 1360 if (!grt)
1da177e4
LT
1361 goto out;
1362 if (dev) {
1363 if (dev != grt->rt6i_dev) {
d8d1f30b 1364 dst_release(&grt->dst);
1da177e4
LT
1365 goto out;
1366 }
1367 } else {
1368 dev = grt->rt6i_dev;
1369 idev = grt->rt6i_idev;
1370 dev_hold(dev);
1371 in6_dev_hold(grt->rt6i_idev);
1372 }
38308473 1373 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1374 err = 0;
d8d1f30b 1375 dst_release(&grt->dst);
1da177e4
LT
1376
1377 if (err)
1378 goto out;
1379 }
1380 err = -EINVAL;
38308473 1381 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1382 goto out;
1383 }
1384
1385 err = -ENODEV;
38308473 1386 if (!dev)
1da177e4
LT
1387 goto out;
1388
c3968a85
DW
1389 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1390 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1391 err = -EINVAL;
1392 goto out;
1393 }
4e3fd7a0 1394 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1395 rt->rt6i_prefsrc.plen = 128;
1396 } else
1397 rt->rt6i_prefsrc.plen = 0;
1398
86872cb5 1399 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1400 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1401 if (IS_ERR(n)) {
1402 err = PTR_ERR(n);
1da177e4
LT
1403 goto out;
1404 }
69cce1d1 1405 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1406 }
1407
86872cb5 1408 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1409
1410install_route:
86872cb5
TG
1411 if (cfg->fc_mx) {
1412 struct nlattr *nla;
1413 int remaining;
1414
1415 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1416 int type = nla_type(nla);
86872cb5
TG
1417
1418 if (type) {
1419 if (type > RTAX_MAX) {
1da177e4
LT
1420 err = -EINVAL;
1421 goto out;
1422 }
86872cb5 1423
defb3519 1424 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1425 }
1da177e4
LT
1426 }
1427 }
1428
d8d1f30b 1429 rt->dst.dev = dev;
1da177e4 1430 rt->rt6i_idev = idev;
c71099ac 1431 rt->rt6i_table = table;
63152fc0 1432
c346dca1 1433 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1434
86872cb5 1435 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1436
1437out:
1438 if (dev)
1439 dev_put(dev);
1440 if (idev)
1441 in6_dev_put(idev);
1442 if (rt)
d8d1f30b 1443 dst_free(&rt->dst);
1da177e4
LT
1444 return err;
1445}
1446
86872cb5 1447static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1448{
1449 int err;
c71099ac 1450 struct fib6_table *table;
c346dca1 1451 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1452
8ed67789 1453 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1454 return -ENOENT;
1455
c71099ac
TG
1456 table = rt->rt6i_table;
1457 write_lock_bh(&table->tb6_lock);
1da177e4 1458
86872cb5 1459 err = fib6_del(rt, info);
d8d1f30b 1460 dst_release(&rt->dst);
1da177e4 1461
c71099ac 1462 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1463
1464 return err;
1465}
1466
e0a1ad73
TG
1467int ip6_del_rt(struct rt6_info *rt)
1468{
4d1169c1 1469 struct nl_info info = {
c346dca1 1470 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1471 };
528c4ceb 1472 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1473}
1474
86872cb5 1475static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1476{
c71099ac 1477 struct fib6_table *table;
1da177e4
LT
1478 struct fib6_node *fn;
1479 struct rt6_info *rt;
1480 int err = -ESRCH;
1481
5578689a 1482 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1483 if (!table)
c71099ac
TG
1484 return err;
1485
1486 read_lock_bh(&table->tb6_lock);
1da177e4 1487
c71099ac 1488 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1489 &cfg->fc_dst, cfg->fc_dst_len,
1490 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1491
1da177e4 1492 if (fn) {
d8d1f30b 1493 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1494 if (cfg->fc_ifindex &&
38308473 1495 (!rt->rt6i_dev ||
86872cb5 1496 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1497 continue;
86872cb5
TG
1498 if (cfg->fc_flags & RTF_GATEWAY &&
1499 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1500 continue;
86872cb5 1501 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1502 continue;
d8d1f30b 1503 dst_hold(&rt->dst);
c71099ac 1504 read_unlock_bh(&table->tb6_lock);
1da177e4 1505
86872cb5 1506 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1507 }
1508 }
c71099ac 1509 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1510
1511 return err;
1512}
1513
1514/*
1515 * Handle redirects
1516 */
a6279458 1517struct ip6rd_flowi {
4c9483b2 1518 struct flowi6 fl6;
a6279458
YH
1519 struct in6_addr gateway;
1520};
1521
8ed67789
DL
1522static struct rt6_info *__ip6_route_redirect(struct net *net,
1523 struct fib6_table *table,
4c9483b2 1524 struct flowi6 *fl6,
a6279458 1525 int flags)
1da177e4 1526{
4c9483b2 1527 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1528 struct rt6_info *rt;
e843b9e1 1529 struct fib6_node *fn;
c71099ac 1530
1da177e4 1531 /*
e843b9e1
YH
1532 * Get the "current" route for this destination and
1533 * check if the redirect has come from approriate router.
1534 *
1535 * RFC 2461 specifies that redirects should only be
1536 * accepted if they come from the nexthop to the target.
1537 * Due to the way the routes are chosen, this notion
1538 * is a bit fuzzy and one might need to check all possible
1539 * routes.
1da177e4 1540 */
1da177e4 1541
c71099ac 1542 read_lock_bh(&table->tb6_lock);
4c9483b2 1543 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1544restart:
d8d1f30b 1545 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1546 /*
1547 * Current route is on-link; redirect is always invalid.
1548 *
1549 * Seems, previous statement is not true. It could
1550 * be node, which looks for us as on-link (f.e. proxy ndisc)
1551 * But then router serving it might decide, that we should
1552 * know truth 8)8) --ANK (980726).
1553 */
1554 if (rt6_check_expired(rt))
1555 continue;
1556 if (!(rt->rt6i_flags & RTF_GATEWAY))
1557 continue;
4c9483b2 1558 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1559 continue;
a6279458 1560 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1561 continue;
1562 break;
1563 }
a6279458 1564
cb15d9c2 1565 if (!rt)
8ed67789 1566 rt = net->ipv6.ip6_null_entry;
4c9483b2 1567 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1568out:
d8d1f30b 1569 dst_hold(&rt->dst);
a6279458 1570
c71099ac 1571 read_unlock_bh(&table->tb6_lock);
e843b9e1 1572
a6279458
YH
1573 return rt;
1574};
1575
b71d1d42
ED
1576static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1577 const struct in6_addr *src,
1578 const struct in6_addr *gateway,
a6279458
YH
1579 struct net_device *dev)
1580{
adaa70bb 1581 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1582 struct net *net = dev_net(dev);
a6279458 1583 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1584 .fl6 = {
1585 .flowi6_oif = dev->ifindex,
1586 .daddr = *dest,
1587 .saddr = *src,
a6279458 1588 },
a6279458 1589 };
adaa70bb 1590
4e3fd7a0 1591 rdfl.gateway = *gateway;
86c36ce4 1592
adaa70bb
TG
1593 if (rt6_need_strict(dest))
1594 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1595
4c9483b2 1596 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1597 flags, __ip6_route_redirect);
a6279458
YH
1598}
1599
b71d1d42
ED
1600void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1601 const struct in6_addr *saddr,
a6279458
YH
1602 struct neighbour *neigh, u8 *lladdr, int on_link)
1603{
1604 struct rt6_info *rt, *nrt = NULL;
1605 struct netevent_redirect netevent;
c346dca1 1606 struct net *net = dev_net(neigh->dev);
a6279458
YH
1607
1608 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1609
8ed67789 1610 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1611 if (net_ratelimit())
1612 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1613 "for redirect target\n");
a6279458 1614 goto out;
1da177e4
LT
1615 }
1616
1da177e4
LT
1617 /*
1618 * We have finally decided to accept it.
1619 */
1620
1ab1457c 1621 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1622 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1623 NEIGH_UPDATE_F_OVERRIDE|
1624 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1625 NEIGH_UPDATE_F_ISROUTER))
1626 );
1627
1628 /*
1629 * Redirect received -> path was valid.
1630 * Look, redirects are sent only in response to data packets,
1631 * so that this nexthop apparently is reachable. --ANK
1632 */
d8d1f30b 1633 dst_confirm(&rt->dst);
1da177e4
LT
1634
1635 /* Duplicate redirect: silently ignore. */
27217455 1636 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1637 goto out;
1638
21efcfa0 1639 nrt = ip6_rt_copy(rt, dest);
38308473 1640 if (!nrt)
1da177e4
LT
1641 goto out;
1642
1643 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1644 if (on_link)
1645 nrt->rt6i_flags &= ~RTF_GATEWAY;
1646
4e3fd7a0 1647 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1648 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1649
40e22e8f 1650 if (ip6_ins_rt(nrt))
1da177e4
LT
1651 goto out;
1652
d8d1f30b
CG
1653 netevent.old = &rt->dst;
1654 netevent.new = &nrt->dst;
8d71740c
TT
1655 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1656
38308473 1657 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1658 ip6_del_rt(rt);
1da177e4
LT
1659 return;
1660 }
1661
1662out:
d8d1f30b 1663 dst_release(&rt->dst);
1da177e4
LT
1664}
1665
1666/*
1667 * Handle ICMP "packet too big" messages
1668 * i.e. Path MTU discovery
1669 */
1670
b71d1d42 1671static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1672 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1673{
1674 struct rt6_info *rt, *nrt;
1675 int allfrag = 0;
d3052b55 1676again:
ae878ae2 1677 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1678 if (!rt)
1da177e4
LT
1679 return;
1680
d3052b55
AV
1681 if (rt6_check_expired(rt)) {
1682 ip6_del_rt(rt);
1683 goto again;
1684 }
1685
d8d1f30b 1686 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1687 goto out;
1688
1689 if (pmtu < IPV6_MIN_MTU) {
1690 /*
1ab1457c 1691 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1692 * MTU (1280) and a fragment header should always be included
1693 * after a node receiving Too Big message reporting PMTU is
1694 * less than the IPv6 Minimum Link MTU.
1695 */
1696 pmtu = IPV6_MIN_MTU;
1697 allfrag = 1;
1698 }
1699
1700 /* New mtu received -> path was valid.
1701 They are sent only in response to data packets,
1702 so that this nexthop apparently is reachable. --ANK
1703 */
d8d1f30b 1704 dst_confirm(&rt->dst);
1da177e4
LT
1705
1706 /* Host route. If it is static, it would be better
1707 not to override it, but add new one, so that
1708 when cache entry will expire old pmtu
1709 would return automatically.
1710 */
1711 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1712 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1713 if (allfrag) {
1714 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1715 features |= RTAX_FEATURE_ALLFRAG;
1716 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1717 }
d8d1f30b 1718 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1719 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1720 goto out;
1721 }
1722
1723 /* Network route.
1724 Two cases are possible:
1725 1. It is connected route. Action: COW
1726 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1727 */
27217455 1728 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1729 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1730 else
1731 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1732
d5315b50 1733 if (nrt) {
defb3519
DM
1734 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1735 if (allfrag) {
1736 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1737 features |= RTAX_FEATURE_ALLFRAG;
1738 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1739 }
a1e78363
YH
1740
1741 /* According to RFC 1981, detecting PMTU increase shouldn't be
1742 * happened within 5 mins, the recommended timer is 10 mins.
1743 * Here this route expiration time is set to ip6_rt_mtu_expires
1744 * which is 10 mins. After 10 mins the decreased pmtu is expired
1745 * and detecting PMTU increase will be automatically happened.
1746 */
d8d1f30b 1747 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1748 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1749
40e22e8f 1750 ip6_ins_rt(nrt);
1da177e4 1751 }
1da177e4 1752out:
d8d1f30b 1753 dst_release(&rt->dst);
1da177e4
LT
1754}
1755
b71d1d42 1756void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1757 struct net_device *dev, u32 pmtu)
1758{
1759 struct net *net = dev_net(dev);
1760
1761 /*
1762 * RFC 1981 states that a node "MUST reduce the size of the packets it
1763 * is sending along the path" that caused the Packet Too Big message.
1764 * Since it's not possible in the general case to determine which
1765 * interface was used to send the original packet, we update the MTU
1766 * on the interface that will be used to send future packets. We also
1767 * update the MTU on the interface that received the Packet Too Big in
1768 * case the original packet was forced out that interface with
1769 * SO_BINDTODEVICE or similar. This is the next best thing to the
1770 * correct behaviour, which would be to update the MTU on all
1771 * interfaces.
1772 */
1773 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1774 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1775}
1776
1da177e4
LT
1777/*
1778 * Misc support functions
1779 */
1780
21efcfa0
ED
1781static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1782 const struct in6_addr *dest)
1da177e4 1783{
c346dca1 1784 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1785 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1786 ort->dst.dev, 0);
1da177e4
LT
1787
1788 if (rt) {
d8d1f30b
CG
1789 rt->dst.input = ort->dst.input;
1790 rt->dst.output = ort->dst.output;
8e2ec639 1791 rt->dst.flags |= DST_HOST;
d8d1f30b 1792
4e3fd7a0 1793 rt->rt6i_dst.addr = *dest;
8e2ec639 1794 rt->rt6i_dst.plen = 128;
defb3519 1795 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1796 rt->dst.error = ort->dst.error;
1da177e4
LT
1797 rt->rt6i_idev = ort->rt6i_idev;
1798 if (rt->rt6i_idev)
1799 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1800 rt->dst.lastuse = jiffies;
1da177e4
LT
1801 rt->rt6i_expires = 0;
1802
4e3fd7a0 1803 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1804 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1805 rt->rt6i_metric = 0;
1806
1da177e4
LT
1807#ifdef CONFIG_IPV6_SUBTREES
1808 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1809#endif
0f6c6392 1810 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1811 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1812 }
1813 return rt;
1814}
1815
70ceb4f5 1816#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1817static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1818 const struct in6_addr *prefix, int prefixlen,
1819 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1820{
1821 struct fib6_node *fn;
1822 struct rt6_info *rt = NULL;
c71099ac
TG
1823 struct fib6_table *table;
1824
efa2cea0 1825 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1826 if (!table)
c71099ac 1827 return NULL;
70ceb4f5 1828
c71099ac
TG
1829 write_lock_bh(&table->tb6_lock);
1830 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1831 if (!fn)
1832 goto out;
1833
d8d1f30b 1834 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1835 if (rt->rt6i_dev->ifindex != ifindex)
1836 continue;
1837 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1838 continue;
1839 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1840 continue;
d8d1f30b 1841 dst_hold(&rt->dst);
70ceb4f5
YH
1842 break;
1843 }
1844out:
c71099ac 1845 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1846 return rt;
1847}
1848
efa2cea0 1849static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1850 const struct in6_addr *prefix, int prefixlen,
1851 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1852 unsigned pref)
1853{
86872cb5
TG
1854 struct fib6_config cfg = {
1855 .fc_table = RT6_TABLE_INFO,
238fc7ea 1856 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1857 .fc_ifindex = ifindex,
1858 .fc_dst_len = prefixlen,
1859 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1860 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1861 .fc_nlinfo.pid = 0,
1862 .fc_nlinfo.nlh = NULL,
1863 .fc_nlinfo.nl_net = net,
86872cb5
TG
1864 };
1865
4e3fd7a0
AD
1866 cfg.fc_dst = *prefix;
1867 cfg.fc_gateway = *gwaddr;
70ceb4f5 1868
e317da96
YH
1869 /* We should treat it as a default route if prefix length is 0. */
1870 if (!prefixlen)
86872cb5 1871 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1872
86872cb5 1873 ip6_route_add(&cfg);
70ceb4f5 1874
efa2cea0 1875 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1876}
1877#endif
1878
b71d1d42 1879struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1880{
1da177e4 1881 struct rt6_info *rt;
c71099ac 1882 struct fib6_table *table;
1da177e4 1883
c346dca1 1884 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1885 if (!table)
c71099ac 1886 return NULL;
1da177e4 1887
c71099ac 1888 write_lock_bh(&table->tb6_lock);
d8d1f30b 1889 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1890 if (dev == rt->rt6i_dev &&
045927ff 1891 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1892 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1893 break;
1894 }
1895 if (rt)
d8d1f30b 1896 dst_hold(&rt->dst);
c71099ac 1897 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1898 return rt;
1899}
1900
b71d1d42 1901struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1902 struct net_device *dev,
1903 unsigned int pref)
1da177e4 1904{
86872cb5
TG
1905 struct fib6_config cfg = {
1906 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1907 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1908 .fc_ifindex = dev->ifindex,
1909 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1910 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1911 .fc_nlinfo.pid = 0,
1912 .fc_nlinfo.nlh = NULL,
c346dca1 1913 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1914 };
1da177e4 1915
4e3fd7a0 1916 cfg.fc_gateway = *gwaddr;
1da177e4 1917
86872cb5 1918 ip6_route_add(&cfg);
1da177e4 1919
1da177e4
LT
1920 return rt6_get_dflt_router(gwaddr, dev);
1921}
1922
7b4da532 1923void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1924{
1925 struct rt6_info *rt;
c71099ac
TG
1926 struct fib6_table *table;
1927
1928 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1929 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1930 if (!table)
c71099ac 1931 return;
1da177e4
LT
1932
1933restart:
c71099ac 1934 read_lock_bh(&table->tb6_lock);
d8d1f30b 1935 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1936 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1937 dst_hold(&rt->dst);
c71099ac 1938 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1939 ip6_del_rt(rt);
1da177e4
LT
1940 goto restart;
1941 }
1942 }
c71099ac 1943 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1944}
1945
5578689a
DL
1946static void rtmsg_to_fib6_config(struct net *net,
1947 struct in6_rtmsg *rtmsg,
86872cb5
TG
1948 struct fib6_config *cfg)
1949{
1950 memset(cfg, 0, sizeof(*cfg));
1951
1952 cfg->fc_table = RT6_TABLE_MAIN;
1953 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1954 cfg->fc_metric = rtmsg->rtmsg_metric;
1955 cfg->fc_expires = rtmsg->rtmsg_info;
1956 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1957 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1958 cfg->fc_flags = rtmsg->rtmsg_flags;
1959
5578689a 1960 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1961
4e3fd7a0
AD
1962 cfg->fc_dst = rtmsg->rtmsg_dst;
1963 cfg->fc_src = rtmsg->rtmsg_src;
1964 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1965}
1966
5578689a 1967int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1968{
86872cb5 1969 struct fib6_config cfg;
1da177e4
LT
1970 struct in6_rtmsg rtmsg;
1971 int err;
1972
1973 switch(cmd) {
1974 case SIOCADDRT: /* Add a route */
1975 case SIOCDELRT: /* Delete a route */
1976 if (!capable(CAP_NET_ADMIN))
1977 return -EPERM;
1978 err = copy_from_user(&rtmsg, arg,
1979 sizeof(struct in6_rtmsg));
1980 if (err)
1981 return -EFAULT;
86872cb5 1982
5578689a 1983 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1984
1da177e4
LT
1985 rtnl_lock();
1986 switch (cmd) {
1987 case SIOCADDRT:
86872cb5 1988 err = ip6_route_add(&cfg);
1da177e4
LT
1989 break;
1990 case SIOCDELRT:
86872cb5 1991 err = ip6_route_del(&cfg);
1da177e4
LT
1992 break;
1993 default:
1994 err = -EINVAL;
1995 }
1996 rtnl_unlock();
1997
1998 return err;
3ff50b79 1999 }
1da177e4
LT
2000
2001 return -EINVAL;
2002}
2003
2004/*
2005 * Drop the packet on the floor
2006 */
2007
d5fdd6ba 2008static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2009{
612f09e8 2010 int type;
adf30907 2011 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2012 switch (ipstats_mib_noroutes) {
2013 case IPSTATS_MIB_INNOROUTES:
0660e03f 2014 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2015 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2016 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2017 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2018 break;
2019 }
2020 /* FALLTHROUGH */
2021 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2022 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2023 ipstats_mib_noroutes);
612f09e8
YH
2024 break;
2025 }
3ffe533c 2026 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2027 kfree_skb(skb);
2028 return 0;
2029}
2030
9ce8ade0
TG
2031static int ip6_pkt_discard(struct sk_buff *skb)
2032{
612f09e8 2033 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2034}
2035
20380731 2036static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2037{
adf30907 2038 skb->dev = skb_dst(skb)->dev;
612f09e8 2039 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2040}
2041
6723ab54
DM
2042#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2043
9ce8ade0
TG
2044static int ip6_pkt_prohibit(struct sk_buff *skb)
2045{
612f09e8 2046 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2047}
2048
2049static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2050{
adf30907 2051 skb->dev = skb_dst(skb)->dev;
612f09e8 2052 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2053}
2054
6723ab54
DM
2055#endif
2056
1da177e4
LT
2057/*
2058 * Allocate a dst for local (unicast / anycast) address.
2059 */
2060
2061struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2062 const struct in6_addr *addr,
8f031519 2063 bool anycast)
1da177e4 2064{
c346dca1 2065 struct net *net = dev_net(idev->dev);
5c1e6aa3 2066 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2067 net->loopback_dev, 0);
14deae41 2068 struct neighbour *neigh;
1da177e4 2069
38308473 2070 if (!rt) {
40385653
BG
2071 if (net_ratelimit())
2072 pr_warning("IPv6: Maximum number of routes reached,"
2073 " consider increasing route/max_size.\n");
1da177e4 2074 return ERR_PTR(-ENOMEM);
40385653 2075 }
1da177e4 2076
1da177e4
LT
2077 in6_dev_hold(idev);
2078
11d53b49 2079 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2080 rt->dst.input = ip6_input;
2081 rt->dst.output = ip6_output;
1da177e4 2082 rt->rt6i_idev = idev;
d8d1f30b 2083 rt->dst.obsolete = -1;
1da177e4
LT
2084
2085 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2086 if (anycast)
2087 rt->rt6i_flags |= RTF_ANYCAST;
2088 else
1da177e4 2089 rt->rt6i_flags |= RTF_LOCAL;
04a6f441 2090 neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
14deae41 2091 if (IS_ERR(neigh)) {
d8d1f30b 2092 dst_free(&rt->dst);
14deae41 2093
29546a64 2094 return ERR_CAST(neigh);
1da177e4 2095 }
69cce1d1 2096 dst_set_neighbour(&rt->dst, neigh);
1da177e4 2097
4e3fd7a0 2098 rt->rt6i_dst.addr = *addr;
1da177e4 2099 rt->rt6i_dst.plen = 128;
5578689a 2100 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2101
d8d1f30b 2102 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2103
2104 return rt;
2105}
2106
c3968a85
DW
2107int ip6_route_get_saddr(struct net *net,
2108 struct rt6_info *rt,
b71d1d42 2109 const struct in6_addr *daddr,
c3968a85
DW
2110 unsigned int prefs,
2111 struct in6_addr *saddr)
2112{
2113 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2114 int err = 0;
2115 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2116 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2117 else
2118 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2119 daddr, prefs, saddr);
2120 return err;
2121}
2122
2123/* remove deleted ip from prefsrc entries */
2124struct arg_dev_net_ip {
2125 struct net_device *dev;
2126 struct net *net;
2127 struct in6_addr *addr;
2128};
2129
2130static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2131{
2132 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2133 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2134 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2135
38308473 2136 if (((void *)rt->rt6i_dev == dev || !dev) &&
c3968a85
DW
2137 rt != net->ipv6.ip6_null_entry &&
2138 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2139 /* remove prefsrc entry */
2140 rt->rt6i_prefsrc.plen = 0;
2141 }
2142 return 0;
2143}
2144
2145void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2146{
2147 struct net *net = dev_net(ifp->idev->dev);
2148 struct arg_dev_net_ip adni = {
2149 .dev = ifp->idev->dev,
2150 .net = net,
2151 .addr = &ifp->addr,
2152 };
2153 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2154}
2155
8ed67789
DL
2156struct arg_dev_net {
2157 struct net_device *dev;
2158 struct net *net;
2159};
2160
1da177e4
LT
2161static int fib6_ifdown(struct rt6_info *rt, void *arg)
2162{
bc3ef660 2163 const struct arg_dev_net *adn = arg;
2164 const struct net_device *dev = adn->dev;
8ed67789 2165
38308473 2166 if ((rt->rt6i_dev == dev || !dev) &&
bc3ef660 2167 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2168 RT6_TRACE("deleted by ifdown %p\n", rt);
2169 return -1;
2170 }
2171 return 0;
2172}
2173
f3db4851 2174void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2175{
8ed67789
DL
2176 struct arg_dev_net adn = {
2177 .dev = dev,
2178 .net = net,
2179 };
2180
2181 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2182 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2183}
2184
2185struct rt6_mtu_change_arg
2186{
2187 struct net_device *dev;
2188 unsigned mtu;
2189};
2190
2191static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2192{
2193 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2194 struct inet6_dev *idev;
2195
2196 /* In IPv6 pmtu discovery is not optional,
2197 so that RTAX_MTU lock cannot disable it.
2198 We still use this lock to block changes
2199 caused by addrconf/ndisc.
2200 */
2201
2202 idev = __in6_dev_get(arg->dev);
38308473 2203 if (!idev)
1da177e4
LT
2204 return 0;
2205
2206 /* For administrative MTU increase, there is no way to discover
2207 IPv6 PMTU increase, so PMTU increase should be updated here.
2208 Since RFC 1981 doesn't include administrative MTU increase
2209 update PMTU increase is a MUST. (i.e. jumbo frame)
2210 */
2211 /*
2212 If new MTU is less than route PMTU, this new MTU will be the
2213 lowest MTU in the path, update the route PMTU to reflect PMTU
2214 decreases; if new MTU is greater than route PMTU, and the
2215 old MTU is the lowest MTU in the path, update the route PMTU
2216 to reflect the increase. In this case if the other nodes' MTU
2217 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2218 PMTU discouvery.
2219 */
2220 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2221 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2222 (dst_mtu(&rt->dst) >= arg->mtu ||
2223 (dst_mtu(&rt->dst) < arg->mtu &&
2224 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2225 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2226 }
1da177e4
LT
2227 return 0;
2228}
2229
2230void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2231{
c71099ac
TG
2232 struct rt6_mtu_change_arg arg = {
2233 .dev = dev,
2234 .mtu = mtu,
2235 };
1da177e4 2236
c346dca1 2237 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2238}
2239
ef7c79ed 2240static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2241 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2242 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2243 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2244 [RTA_PRIORITY] = { .type = NLA_U32 },
2245 [RTA_METRICS] = { .type = NLA_NESTED },
2246};
2247
2248static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2249 struct fib6_config *cfg)
1da177e4 2250{
86872cb5
TG
2251 struct rtmsg *rtm;
2252 struct nlattr *tb[RTA_MAX+1];
2253 int err;
1da177e4 2254
86872cb5
TG
2255 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2256 if (err < 0)
2257 goto errout;
1da177e4 2258
86872cb5
TG
2259 err = -EINVAL;
2260 rtm = nlmsg_data(nlh);
2261 memset(cfg, 0, sizeof(*cfg));
2262
2263 cfg->fc_table = rtm->rtm_table;
2264 cfg->fc_dst_len = rtm->rtm_dst_len;
2265 cfg->fc_src_len = rtm->rtm_src_len;
2266 cfg->fc_flags = RTF_UP;
2267 cfg->fc_protocol = rtm->rtm_protocol;
2268
2269 if (rtm->rtm_type == RTN_UNREACHABLE)
2270 cfg->fc_flags |= RTF_REJECT;
2271
ab79ad14
2272 if (rtm->rtm_type == RTN_LOCAL)
2273 cfg->fc_flags |= RTF_LOCAL;
2274
86872cb5
TG
2275 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2276 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2277 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2278
2279 if (tb[RTA_GATEWAY]) {
2280 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2281 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2282 }
86872cb5
TG
2283
2284 if (tb[RTA_DST]) {
2285 int plen = (rtm->rtm_dst_len + 7) >> 3;
2286
2287 if (nla_len(tb[RTA_DST]) < plen)
2288 goto errout;
2289
2290 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2291 }
86872cb5
TG
2292
2293 if (tb[RTA_SRC]) {
2294 int plen = (rtm->rtm_src_len + 7) >> 3;
2295
2296 if (nla_len(tb[RTA_SRC]) < plen)
2297 goto errout;
2298
2299 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2300 }
86872cb5 2301
c3968a85
DW
2302 if (tb[RTA_PREFSRC])
2303 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2304
86872cb5
TG
2305 if (tb[RTA_OIF])
2306 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2307
2308 if (tb[RTA_PRIORITY])
2309 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2310
2311 if (tb[RTA_METRICS]) {
2312 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2313 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2314 }
86872cb5
TG
2315
2316 if (tb[RTA_TABLE])
2317 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2318
2319 err = 0;
2320errout:
2321 return err;
1da177e4
LT
2322}
2323
c127ea2c 2324static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2325{
86872cb5
TG
2326 struct fib6_config cfg;
2327 int err;
1da177e4 2328
86872cb5
TG
2329 err = rtm_to_fib6_config(skb, nlh, &cfg);
2330 if (err < 0)
2331 return err;
2332
2333 return ip6_route_del(&cfg);
1da177e4
LT
2334}
2335
c127ea2c 2336static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2337{
86872cb5
TG
2338 struct fib6_config cfg;
2339 int err;
1da177e4 2340
86872cb5
TG
2341 err = rtm_to_fib6_config(skb, nlh, &cfg);
2342 if (err < 0)
2343 return err;
2344
2345 return ip6_route_add(&cfg);
1da177e4
LT
2346}
2347
339bf98f
TG
2348static inline size_t rt6_nlmsg_size(void)
2349{
2350 return NLMSG_ALIGN(sizeof(struct rtmsg))
2351 + nla_total_size(16) /* RTA_SRC */
2352 + nla_total_size(16) /* RTA_DST */
2353 + nla_total_size(16) /* RTA_GATEWAY */
2354 + nla_total_size(16) /* RTA_PREFSRC */
2355 + nla_total_size(4) /* RTA_TABLE */
2356 + nla_total_size(4) /* RTA_IIF */
2357 + nla_total_size(4) /* RTA_OIF */
2358 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2359 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2360 + nla_total_size(sizeof(struct rta_cacheinfo));
2361}
2362
191cd582
BH
2363static int rt6_fill_node(struct net *net,
2364 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2365 struct in6_addr *dst, struct in6_addr *src,
2366 int iif, int type, u32 pid, u32 seq,
7bc570c8 2367 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2368{
2369 struct rtmsg *rtm;
2d7202bf 2370 struct nlmsghdr *nlh;
e3703b3d 2371 long expires;
9e762a4a 2372 u32 table;
f2c31e32 2373 struct neighbour *n;
1da177e4
LT
2374
2375 if (prefix) { /* user wants prefix routes only */
2376 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2377 /* success since this is not a prefix route */
2378 return 1;
2379 }
2380 }
2381
2d7202bf 2382 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2383 if (!nlh)
26932566 2384 return -EMSGSIZE;
2d7202bf
TG
2385
2386 rtm = nlmsg_data(nlh);
1da177e4
LT
2387 rtm->rtm_family = AF_INET6;
2388 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2389 rtm->rtm_src_len = rt->rt6i_src.plen;
2390 rtm->rtm_tos = 0;
c71099ac 2391 if (rt->rt6i_table)
9e762a4a 2392 table = rt->rt6i_table->tb6_id;
c71099ac 2393 else
9e762a4a
PM
2394 table = RT6_TABLE_UNSPEC;
2395 rtm->rtm_table = table;
2d7202bf 2396 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2397 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2398 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2399 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2400 rtm->rtm_type = RTN_LOCAL;
38308473 2401 else if (rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
1da177e4
LT
2402 rtm->rtm_type = RTN_LOCAL;
2403 else
2404 rtm->rtm_type = RTN_UNICAST;
2405 rtm->rtm_flags = 0;
2406 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2407 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2408 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2409 rtm->rtm_protocol = RTPROT_REDIRECT;
2410 else if (rt->rt6i_flags & RTF_ADDRCONF)
2411 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2412 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2413 rtm->rtm_protocol = RTPROT_RA;
2414
38308473 2415 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2416 rtm->rtm_flags |= RTM_F_CLONED;
2417
2418 if (dst) {
2d7202bf 2419 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2420 rtm->rtm_dst_len = 128;
1da177e4 2421 } else if (rtm->rtm_dst_len)
2d7202bf 2422 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2423#ifdef CONFIG_IPV6_SUBTREES
2424 if (src) {
2d7202bf 2425 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2426 rtm->rtm_src_len = 128;
1da177e4 2427 } else if (rtm->rtm_src_len)
2d7202bf 2428 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2429#endif
7bc570c8
YH
2430 if (iif) {
2431#ifdef CONFIG_IPV6_MROUTE
2432 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2433 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2434 if (err <= 0) {
2435 if (!nowait) {
2436 if (err == 0)
2437 return 0;
2438 goto nla_put_failure;
2439 } else {
2440 if (err == -EMSGSIZE)
2441 goto nla_put_failure;
2442 }
2443 }
2444 } else
2445#endif
2446 NLA_PUT_U32(skb, RTA_IIF, iif);
2447 } else if (dst) {
1da177e4 2448 struct in6_addr saddr_buf;
c3968a85 2449 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2450 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2451 }
2d7202bf 2452
c3968a85
DW
2453 if (rt->rt6i_prefsrc.plen) {
2454 struct in6_addr saddr_buf;
4e3fd7a0 2455 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2456 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2457 }
2458
defb3519 2459 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2460 goto nla_put_failure;
2461
f2c31e32 2462 rcu_read_lock();
27217455 2463 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2464 if (n)
2465 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2466 rcu_read_unlock();
2d7202bf 2467
d8d1f30b 2468 if (rt->dst.dev)
2d7202bf
TG
2469 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2470
2471 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2472
36e3deae
YH
2473 if (!(rt->rt6i_flags & RTF_EXPIRES))
2474 expires = 0;
2475 else if (rt->rt6i_expires - jiffies < INT_MAX)
2476 expires = rt->rt6i_expires - jiffies;
2477 else
2478 expires = INT_MAX;
69cdf8f9 2479
d8d1f30b
CG
2480 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2481 expires, rt->dst.error) < 0)
e3703b3d 2482 goto nla_put_failure;
2d7202bf
TG
2483
2484 return nlmsg_end(skb, nlh);
2485
2486nla_put_failure:
26932566
PM
2487 nlmsg_cancel(skb, nlh);
2488 return -EMSGSIZE;
1da177e4
LT
2489}
2490
1b43af54 2491int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2492{
2493 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2494 int prefix;
2495
2d7202bf
TG
2496 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2497 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2498 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2499 } else
2500 prefix = 0;
2501
191cd582
BH
2502 return rt6_fill_node(arg->net,
2503 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2504 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2505 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2506}
2507
c127ea2c 2508static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2509{
3b1e0a65 2510 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2511 struct nlattr *tb[RTA_MAX+1];
2512 struct rt6_info *rt;
1da177e4 2513 struct sk_buff *skb;
ab364a6f 2514 struct rtmsg *rtm;
4c9483b2 2515 struct flowi6 fl6;
ab364a6f 2516 int err, iif = 0;
1da177e4 2517
ab364a6f
TG
2518 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2519 if (err < 0)
2520 goto errout;
1da177e4 2521
ab364a6f 2522 err = -EINVAL;
4c9483b2 2523 memset(&fl6, 0, sizeof(fl6));
1da177e4 2524
ab364a6f
TG
2525 if (tb[RTA_SRC]) {
2526 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2527 goto errout;
2528
4e3fd7a0 2529 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2530 }
2531
2532 if (tb[RTA_DST]) {
2533 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2534 goto errout;
2535
4e3fd7a0 2536 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2537 }
2538
2539 if (tb[RTA_IIF])
2540 iif = nla_get_u32(tb[RTA_IIF]);
2541
2542 if (tb[RTA_OIF])
4c9483b2 2543 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2544
2545 if (iif) {
2546 struct net_device *dev;
5578689a 2547 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2548 if (!dev) {
2549 err = -ENODEV;
ab364a6f 2550 goto errout;
1da177e4
LT
2551 }
2552 }
2553
ab364a6f 2554 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2555 if (!skb) {
ab364a6f
TG
2556 err = -ENOBUFS;
2557 goto errout;
2558 }
1da177e4 2559
ab364a6f
TG
2560 /* Reserve room for dummy headers, this skb can pass
2561 through good chunk of routing engine.
2562 */
459a98ed 2563 skb_reset_mac_header(skb);
ab364a6f 2564 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2565
4c9483b2 2566 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2567 skb_dst_set(skb, &rt->dst);
1da177e4 2568
4c9483b2 2569 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2570 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2571 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2572 if (err < 0) {
ab364a6f
TG
2573 kfree_skb(skb);
2574 goto errout;
1da177e4
LT
2575 }
2576
5578689a 2577 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2578errout:
1da177e4 2579 return err;
1da177e4
LT
2580}
2581
86872cb5 2582void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2583{
2584 struct sk_buff *skb;
5578689a 2585 struct net *net = info->nl_net;
528c4ceb
DL
2586 u32 seq;
2587 int err;
2588
2589 err = -ENOBUFS;
38308473 2590 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2591
339bf98f 2592 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2593 if (!skb)
21713ebc
TG
2594 goto errout;
2595
191cd582 2596 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2597 event, info->pid, seq, 0, 0, 0);
26932566
PM
2598 if (err < 0) {
2599 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2600 WARN_ON(err == -EMSGSIZE);
2601 kfree_skb(skb);
2602 goto errout;
2603 }
1ce85fe4
PNA
2604 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2605 info->nlh, gfp_any());
2606 return;
21713ebc
TG
2607errout:
2608 if (err < 0)
5578689a 2609 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2610}
2611
8ed67789
DL
2612static int ip6_route_dev_notify(struct notifier_block *this,
2613 unsigned long event, void *data)
2614{
2615 struct net_device *dev = (struct net_device *)data;
c346dca1 2616 struct net *net = dev_net(dev);
8ed67789
DL
2617
2618 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2619 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2620 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2621#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2622 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2623 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2624 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2625 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2626#endif
2627 }
2628
2629 return NOTIFY_OK;
2630}
2631
1da177e4
LT
2632/*
2633 * /proc
2634 */
2635
2636#ifdef CONFIG_PROC_FS
2637
1da177e4
LT
2638struct rt6_proc_arg
2639{
2640 char *buffer;
2641 int offset;
2642 int length;
2643 int skip;
2644 int len;
2645};
2646
2647static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2648{
33120b30 2649 struct seq_file *m = p_arg;
69cce1d1 2650 struct neighbour *n;
1da177e4 2651
4b7a4274 2652 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2653
2654#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2655 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2656#else
33120b30 2657 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2658#endif
f2c31e32 2659 rcu_read_lock();
27217455 2660 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2661 if (n) {
2662 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2663 } else {
33120b30 2664 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2665 }
f2c31e32 2666 rcu_read_unlock();
33120b30 2667 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2668 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2669 rt->dst.__use, rt->rt6i_flags,
33120b30 2670 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2671 return 0;
2672}
2673
33120b30 2674static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2675{
f3db4851
DL
2676 struct net *net = (struct net *)m->private;
2677 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2678 return 0;
2679}
1da177e4 2680
33120b30
AD
2681static int ipv6_route_open(struct inode *inode, struct file *file)
2682{
de05c557 2683 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2684}
2685
33120b30
AD
2686static const struct file_operations ipv6_route_proc_fops = {
2687 .owner = THIS_MODULE,
2688 .open = ipv6_route_open,
2689 .read = seq_read,
2690 .llseek = seq_lseek,
b6fcbdb4 2691 .release = single_release_net,
33120b30
AD
2692};
2693
1da177e4
LT
2694static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2695{
69ddb805 2696 struct net *net = (struct net *)seq->private;
1da177e4 2697 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2698 net->ipv6.rt6_stats->fib_nodes,
2699 net->ipv6.rt6_stats->fib_route_nodes,
2700 net->ipv6.rt6_stats->fib_rt_alloc,
2701 net->ipv6.rt6_stats->fib_rt_entries,
2702 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2703 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2704 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2705
2706 return 0;
2707}
2708
2709static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2710{
de05c557 2711 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2712}
2713
9a32144e 2714static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2715 .owner = THIS_MODULE,
2716 .open = rt6_stats_seq_open,
2717 .read = seq_read,
2718 .llseek = seq_lseek,
b6fcbdb4 2719 .release = single_release_net,
1da177e4
LT
2720};
2721#endif /* CONFIG_PROC_FS */
2722
2723#ifdef CONFIG_SYSCTL
2724
1da177e4 2725static
8d65af78 2726int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2727 void __user *buffer, size_t *lenp, loff_t *ppos)
2728{
c486da34
LAG
2729 struct net *net;
2730 int delay;
2731 if (!write)
1da177e4 2732 return -EINVAL;
c486da34
LAG
2733
2734 net = (struct net *)ctl->extra1;
2735 delay = net->ipv6.sysctl.flush_delay;
2736 proc_dointvec(ctl, write, buffer, lenp, ppos);
2737 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2738 return 0;
1da177e4
LT
2739}
2740
760f2d01 2741ctl_table ipv6_route_table_template[] = {
1ab1457c 2742 {
1da177e4 2743 .procname = "flush",
4990509f 2744 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2745 .maxlen = sizeof(int),
89c8b3a1 2746 .mode = 0200,
6d9f239a 2747 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2748 },
2749 {
1da177e4 2750 .procname = "gc_thresh",
9a7ec3a9 2751 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2752 .maxlen = sizeof(int),
2753 .mode = 0644,
6d9f239a 2754 .proc_handler = proc_dointvec,
1da177e4
LT
2755 },
2756 {
1da177e4 2757 .procname = "max_size",
4990509f 2758 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
6d9f239a 2761 .proc_handler = proc_dointvec,
1da177e4
LT
2762 },
2763 {
1da177e4 2764 .procname = "gc_min_interval",
4990509f 2765 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
6d9f239a 2768 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2769 },
2770 {
1da177e4 2771 .procname = "gc_timeout",
4990509f 2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
6d9f239a 2775 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2776 },
2777 {
1da177e4 2778 .procname = "gc_interval",
4990509f 2779 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
6d9f239a 2782 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2783 },
2784 {
1da177e4 2785 .procname = "gc_elasticity",
4990509f 2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
f3d3f616 2789 .proc_handler = proc_dointvec,
1da177e4
LT
2790 },
2791 {
1da177e4 2792 .procname = "mtu_expires",
4990509f 2793 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
6d9f239a 2796 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2797 },
2798 {
1da177e4 2799 .procname = "min_adv_mss",
4990509f 2800 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
f3d3f616 2803 .proc_handler = proc_dointvec,
1da177e4
LT
2804 },
2805 {
1da177e4 2806 .procname = "gc_min_interval_ms",
4990509f 2807 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
6d9f239a 2810 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2811 },
f8572d8f 2812 { }
1da177e4
LT
2813};
2814
2c8c1e72 2815struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2816{
2817 struct ctl_table *table;
2818
2819 table = kmemdup(ipv6_route_table_template,
2820 sizeof(ipv6_route_table_template),
2821 GFP_KERNEL);
5ee09105
YH
2822
2823 if (table) {
2824 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2825 table[0].extra1 = net;
86393e52 2826 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2827 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2828 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2829 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2830 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2831 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2832 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2833 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2834 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2835 }
2836
760f2d01
DL
2837 return table;
2838}
1da177e4
LT
2839#endif
2840
2c8c1e72 2841static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2842{
633d424b 2843 int ret = -ENOMEM;
8ed67789 2844
86393e52
AD
2845 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2846 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2847
fc66f95c
ED
2848 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2849 goto out_ip6_dst_ops;
2850
8ed67789
DL
2851 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2852 sizeof(*net->ipv6.ip6_null_entry),
2853 GFP_KERNEL);
2854 if (!net->ipv6.ip6_null_entry)
fc66f95c 2855 goto out_ip6_dst_entries;
d8d1f30b 2856 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2857 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2858 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2859 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2860 ip6_template_metrics, true);
8ed67789
DL
2861
2862#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2863 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2864 sizeof(*net->ipv6.ip6_prohibit_entry),
2865 GFP_KERNEL);
68fffc67
PZ
2866 if (!net->ipv6.ip6_prohibit_entry)
2867 goto out_ip6_null_entry;
d8d1f30b 2868 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2869 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2870 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2871 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2872 ip6_template_metrics, true);
8ed67789
DL
2873
2874 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2875 sizeof(*net->ipv6.ip6_blk_hole_entry),
2876 GFP_KERNEL);
68fffc67
PZ
2877 if (!net->ipv6.ip6_blk_hole_entry)
2878 goto out_ip6_prohibit_entry;
d8d1f30b 2879 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2880 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2881 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2882 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2883 ip6_template_metrics, true);
8ed67789
DL
2884#endif
2885
b339a47c
PZ
2886 net->ipv6.sysctl.flush_delay = 0;
2887 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2888 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2889 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2890 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2891 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2892 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2893 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2894
cdb18761
DL
2895#ifdef CONFIG_PROC_FS
2896 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2897 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2898#endif
6891a346
BT
2899 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2900
8ed67789
DL
2901 ret = 0;
2902out:
2903 return ret;
f2fc6a54 2904
68fffc67
PZ
2905#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2906out_ip6_prohibit_entry:
2907 kfree(net->ipv6.ip6_prohibit_entry);
2908out_ip6_null_entry:
2909 kfree(net->ipv6.ip6_null_entry);
2910#endif
fc66f95c
ED
2911out_ip6_dst_entries:
2912 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2913out_ip6_dst_ops:
f2fc6a54 2914 goto out;
cdb18761
DL
2915}
2916
2c8c1e72 2917static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2918{
2919#ifdef CONFIG_PROC_FS
2920 proc_net_remove(net, "ipv6_route");
2921 proc_net_remove(net, "rt6_stats");
2922#endif
8ed67789
DL
2923 kfree(net->ipv6.ip6_null_entry);
2924#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2925 kfree(net->ipv6.ip6_prohibit_entry);
2926 kfree(net->ipv6.ip6_blk_hole_entry);
2927#endif
41bb78b4 2928 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2929}
2930
2931static struct pernet_operations ip6_route_net_ops = {
2932 .init = ip6_route_net_init,
2933 .exit = ip6_route_net_exit,
2934};
2935
8ed67789
DL
2936static struct notifier_block ip6_route_dev_notifier = {
2937 .notifier_call = ip6_route_dev_notify,
2938 .priority = 0,
2939};
2940
433d49c3 2941int __init ip6_route_init(void)
1da177e4 2942{
433d49c3
DL
2943 int ret;
2944
9a7ec3a9
DL
2945 ret = -ENOMEM;
2946 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2947 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2948 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2949 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2950 goto out;
14e50e57 2951
fc66f95c 2952 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2953 if (ret)
bdb3289f 2954 goto out_kmem_cache;
bdb3289f 2955
fc66f95c
ED
2956 ret = register_pernet_subsys(&ip6_route_net_ops);
2957 if (ret)
2958 goto out_dst_entries;
2959
5dc121e9
AE
2960 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2961
8ed67789
DL
2962 /* Registering of the loopback is done before this portion of code,
2963 * the loopback reference in rt6_info will not be taken, do it
2964 * manually for init_net */
d8d1f30b 2965 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2966 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2967 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2968 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2969 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2970 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2971 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2972 #endif
433d49c3
DL
2973 ret = fib6_init();
2974 if (ret)
8ed67789 2975 goto out_register_subsys;
433d49c3 2976
433d49c3
DL
2977 ret = xfrm6_init();
2978 if (ret)
cdb18761 2979 goto out_fib6_init;
c35b7e72 2980
433d49c3
DL
2981 ret = fib6_rules_init();
2982 if (ret)
2983 goto xfrm6_init;
7e5449c2 2984
433d49c3 2985 ret = -ENOBUFS;
c7ac8679
GR
2986 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2987 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2988 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2989 goto fib6_rules_init;
c127ea2c 2990
8ed67789 2991 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2992 if (ret)
2993 goto fib6_rules_init;
8ed67789 2994
433d49c3
DL
2995out:
2996 return ret;
2997
2998fib6_rules_init:
433d49c3
DL
2999 fib6_rules_cleanup();
3000xfrm6_init:
433d49c3 3001 xfrm6_fini();
433d49c3 3002out_fib6_init:
433d49c3 3003 fib6_gc_cleanup();
8ed67789
DL
3004out_register_subsys:
3005 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3006out_dst_entries:
3007 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3008out_kmem_cache:
f2fc6a54 3009 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3010 goto out;
1da177e4
LT
3011}
3012
3013void ip6_route_cleanup(void)
3014{
8ed67789 3015 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3016 fib6_rules_cleanup();
1da177e4 3017 xfrm6_fini();
1da177e4 3018 fib6_gc_cleanup();
8ed67789 3019 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3020 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3021 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3022}
This page took 4.052049 seconds and 5 git commands to generate.