mlx4_core: limiting VF port options
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
d3aaeb38
DM
124static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
125{
f83c7790
DM
126 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
127 if (n)
128 return n;
129 return neigh_create(&nd_tbl, daddr, dst->dev);
130}
131
132static int rt6_bind_neighbour(struct rt6_info *rt)
133{
134 struct neighbour *n = ip6_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
135 if (IS_ERR(n))
136 return PTR_ERR(n);
137 dst_set_neighbour(&rt->dst, n);
138
139 return 0;
d3aaeb38
DM
140}
141
9a7ec3a9 142static struct dst_ops ip6_dst_ops_template = {
1da177e4 143 .family = AF_INET6,
09640e63 144 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
145 .gc = ip6_dst_gc,
146 .gc_thresh = 1024,
147 .check = ip6_dst_check,
0dbaee3b 148 .default_advmss = ip6_default_advmss,
ebb762f2 149 .mtu = ip6_mtu,
06582540 150 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
151 .destroy = ip6_dst_destroy,
152 .ifdown = ip6_dst_ifdown,
153 .negative_advice = ip6_negative_advice,
154 .link_failure = ip6_link_failure,
155 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 156 .local_out = __ip6_local_out,
d3aaeb38 157 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
158};
159
ebb762f2 160static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 161{
618f9bc7
SK
162 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
163
164 return mtu ? : dst->dev->mtu;
ec831ea7
RD
165}
166
14e50e57
DM
167static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
168{
169}
170
0972ddb2
HB
171static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
172 unsigned long old)
173{
174 return NULL;
175}
176
14e50e57
DM
177static struct dst_ops ip6_dst_blackhole_ops = {
178 .family = AF_INET6,
09640e63 179 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
180 .destroy = ip6_dst_destroy,
181 .check = ip6_dst_check,
ebb762f2 182 .mtu = ip6_blackhole_mtu,
214f45c9 183 .default_advmss = ip6_default_advmss,
14e50e57 184 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 185 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 186 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
187};
188
62fa8a84
DM
189static const u32 ip6_template_metrics[RTAX_MAX] = {
190 [RTAX_HOPLIMIT - 1] = 255,
191};
192
bdb3289f 193static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
194 .dst = {
195 .__refcnt = ATOMIC_INIT(1),
196 .__use = 1,
197 .obsolete = -1,
198 .error = -ENETUNREACH,
d8d1f30b
CG
199 .input = ip6_pkt_discard,
200 .output = ip6_pkt_discard_out,
1da177e4
LT
201 },
202 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 203 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
204 .rt6i_metric = ~(u32) 0,
205 .rt6i_ref = ATOMIC_INIT(1),
206};
207
101367c2
TG
208#ifdef CONFIG_IPV6_MULTIPLE_TABLES
209
6723ab54
DM
210static int ip6_pkt_prohibit(struct sk_buff *skb);
211static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 212
280a34c8 213static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
214 .dst = {
215 .__refcnt = ATOMIC_INIT(1),
216 .__use = 1,
217 .obsolete = -1,
218 .error = -EACCES,
d8d1f30b
CG
219 .input = ip6_pkt_prohibit,
220 .output = ip6_pkt_prohibit_out,
101367c2
TG
221 },
222 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 223 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
224 .rt6i_metric = ~(u32) 0,
225 .rt6i_ref = ATOMIC_INIT(1),
226};
227
bdb3289f 228static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EINVAL,
d8d1f30b
CG
234 .input = dst_discard,
235 .output = dst_discard,
101367c2
TG
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 238 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241};
242
243#endif
244
1da177e4 245/* allocate dst with ip6_dst_ops */
5c1e6aa3 246static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
247 struct net_device *dev,
248 int flags)
1da177e4 249{
957c665f 250 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 251
38308473 252 if (rt)
fbe58186 253 memset(&rt->rt6i_table, 0,
38308473 254 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
255
256 return rt;
1da177e4
LT
257}
258
259static void ip6_dst_destroy(struct dst_entry *dst)
260{
261 struct rt6_info *rt = (struct rt6_info *)dst;
262 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 263 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 264
8e2ec639
YZ
265 if (!(rt->dst.flags & DST_HOST))
266 dst_destroy_metrics_generic(dst);
267
38308473 268 if (idev) {
1da177e4
LT
269 rt->rt6i_idev = NULL;
270 in6_dev_put(idev);
1ab1457c 271 }
b3419363 272 if (peer) {
b3419363
DM
273 rt->rt6i_peer = NULL;
274 inet_putpeer(peer);
275 }
276}
277
6431cbc2
DM
278static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
279
280static u32 rt6_peer_genid(void)
281{
282 return atomic_read(&__rt6_peer_genid);
283}
284
b3419363
DM
285void rt6_bind_peer(struct rt6_info *rt, int create)
286{
287 struct inet_peer *peer;
288
b3419363
DM
289 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
290 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
291 inet_putpeer(peer);
6431cbc2
DM
292 else
293 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
294}
295
296static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
297 int how)
298{
299 struct rt6_info *rt = (struct rt6_info *)dst;
300 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 301 struct net_device *loopback_dev =
c346dca1 302 dev_net(dev)->loopback_dev;
1da177e4 303
38308473 304 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
305 struct inet6_dev *loopback_idev =
306 in6_dev_get(loopback_dev);
38308473 307 if (loopback_idev) {
1da177e4
LT
308 rt->rt6i_idev = loopback_idev;
309 in6_dev_put(idev);
310 }
311 }
312}
313
314static __inline__ int rt6_check_expired(const struct rt6_info *rt)
315{
a02cec21 316 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 317 time_after(jiffies, rt->dst.expires);
1da177e4
LT
318}
319
b71d1d42 320static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 321{
a02cec21
ED
322 return ipv6_addr_type(daddr) &
323 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
324}
325
1da177e4 326/*
c71099ac 327 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
328 */
329
8ed67789
DL
330static inline struct rt6_info *rt6_device_match(struct net *net,
331 struct rt6_info *rt,
b71d1d42 332 const struct in6_addr *saddr,
1da177e4 333 int oif,
d420895e 334 int flags)
1da177e4
LT
335{
336 struct rt6_info *local = NULL;
337 struct rt6_info *sprt;
338
dd3abc4e
YH
339 if (!oif && ipv6_addr_any(saddr))
340 goto out;
341
d8d1f30b 342 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 343 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
344
345 if (oif) {
1da177e4
LT
346 if (dev->ifindex == oif)
347 return sprt;
348 if (dev->flags & IFF_LOOPBACK) {
38308473 349 if (!sprt->rt6i_idev ||
1da177e4 350 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 351 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 352 continue;
1ab1457c 353 if (local && (!oif ||
1da177e4
LT
354 local->rt6i_idev->dev->ifindex == oif))
355 continue;
356 }
357 local = sprt;
358 }
dd3abc4e
YH
359 } else {
360 if (ipv6_chk_addr(net, saddr, dev,
361 flags & RT6_LOOKUP_F_IFACE))
362 return sprt;
1da177e4 363 }
dd3abc4e 364 }
1da177e4 365
dd3abc4e 366 if (oif) {
1da177e4
LT
367 if (local)
368 return local;
369
d420895e 370 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 371 return net->ipv6.ip6_null_entry;
1da177e4 372 }
dd3abc4e 373out:
1da177e4
LT
374 return rt;
375}
376
27097255
YH
377#ifdef CONFIG_IPV6_ROUTER_PREF
378static void rt6_probe(struct rt6_info *rt)
379{
f2c31e32 380 struct neighbour *neigh;
27097255
YH
381 /*
382 * Okay, this does not seem to be appropriate
383 * for now, however, we need to check if it
384 * is really so; aka Router Reachability Probing.
385 *
386 * Router Reachability Probe MUST be rate-limited
387 * to no more than one per minute.
388 */
f2c31e32 389 rcu_read_lock();
27217455 390 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 391 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 392 goto out;
27097255
YH
393 read_lock_bh(&neigh->lock);
394 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 395 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
396 struct in6_addr mcaddr;
397 struct in6_addr *target;
398
399 neigh->updated = jiffies;
400 read_unlock_bh(&neigh->lock);
401
402 target = (struct in6_addr *)&neigh->primary_key;
403 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 404 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 405 } else {
27097255 406 read_unlock_bh(&neigh->lock);
f2c31e32
ED
407 }
408out:
409 rcu_read_unlock();
27097255
YH
410}
411#else
412static inline void rt6_probe(struct rt6_info *rt)
413{
27097255
YH
414}
415#endif
416
1da177e4 417/*
554cfb7e 418 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 419 */
b6f99a21 420static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 421{
d1918542 422 struct net_device *dev = rt->dst.dev;
161980f4 423 if (!oif || dev->ifindex == oif)
554cfb7e 424 return 2;
161980f4
DM
425 if ((dev->flags & IFF_LOOPBACK) &&
426 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
427 return 1;
428 return 0;
554cfb7e 429}
1da177e4 430
b6f99a21 431static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 432{
f2c31e32 433 struct neighbour *neigh;
398bcbeb 434 int m;
f2c31e32
ED
435
436 rcu_read_lock();
27217455 437 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
438 if (rt->rt6i_flags & RTF_NONEXTHOP ||
439 !(rt->rt6i_flags & RTF_GATEWAY))
440 m = 1;
441 else if (neigh) {
554cfb7e
YH
442 read_lock_bh(&neigh->lock);
443 if (neigh->nud_state & NUD_VALID)
4d0c5911 444 m = 2;
398bcbeb
YH
445#ifdef CONFIG_IPV6_ROUTER_PREF
446 else if (neigh->nud_state & NUD_FAILED)
447 m = 0;
448#endif
449 else
ea73ee23 450 m = 1;
554cfb7e 451 read_unlock_bh(&neigh->lock);
398bcbeb
YH
452 } else
453 m = 0;
f2c31e32 454 rcu_read_unlock();
554cfb7e 455 return m;
1da177e4
LT
456}
457
554cfb7e
YH
458static int rt6_score_route(struct rt6_info *rt, int oif,
459 int strict)
1da177e4 460{
4d0c5911 461 int m, n;
1ab1457c 462
4d0c5911 463 m = rt6_check_dev(rt, oif);
77d16f45 464 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 465 return -1;
ebacaaa0
YH
466#ifdef CONFIG_IPV6_ROUTER_PREF
467 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
468#endif
4d0c5911 469 n = rt6_check_neigh(rt);
557e92ef 470 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
471 return -1;
472 return m;
473}
474
f11e6659
DM
475static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
476 int *mpri, struct rt6_info *match)
554cfb7e 477{
f11e6659
DM
478 int m;
479
480 if (rt6_check_expired(rt))
481 goto out;
482
483 m = rt6_score_route(rt, oif, strict);
484 if (m < 0)
485 goto out;
486
487 if (m > *mpri) {
488 if (strict & RT6_LOOKUP_F_REACHABLE)
489 rt6_probe(match);
490 *mpri = m;
491 match = rt;
492 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
493 rt6_probe(rt);
494 }
495
496out:
497 return match;
498}
499
500static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
501 struct rt6_info *rr_head,
502 u32 metric, int oif, int strict)
503{
504 struct rt6_info *rt, *match;
554cfb7e 505 int mpri = -1;
1da177e4 506
f11e6659
DM
507 match = NULL;
508 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 509 rt = rt->dst.rt6_next)
f11e6659
DM
510 match = find_match(rt, oif, strict, &mpri, match);
511 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 512 rt = rt->dst.rt6_next)
f11e6659 513 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 514
f11e6659
DM
515 return match;
516}
1da177e4 517
f11e6659
DM
518static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
519{
520 struct rt6_info *match, *rt0;
8ed67789 521 struct net *net;
1da177e4 522
f11e6659
DM
523 rt0 = fn->rr_ptr;
524 if (!rt0)
525 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 526
f11e6659 527 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 528
554cfb7e 529 if (!match &&
f11e6659 530 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 531 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 532
554cfb7e 533 /* no entries matched; do round-robin */
f11e6659
DM
534 if (!next || next->rt6i_metric != rt0->rt6i_metric)
535 next = fn->leaf;
536
537 if (next != rt0)
538 fn->rr_ptr = next;
1da177e4 539 }
1da177e4 540
d1918542 541 net = dev_net(rt0->dst.dev);
a02cec21 542 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
543}
544
70ceb4f5
YH
545#ifdef CONFIG_IPV6_ROUTE_INFO
546int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 547 const struct in6_addr *gwaddr)
70ceb4f5 548{
c346dca1 549 struct net *net = dev_net(dev);
70ceb4f5
YH
550 struct route_info *rinfo = (struct route_info *) opt;
551 struct in6_addr prefix_buf, *prefix;
552 unsigned int pref;
4bed72e4 553 unsigned long lifetime;
70ceb4f5
YH
554 struct rt6_info *rt;
555
556 if (len < sizeof(struct route_info)) {
557 return -EINVAL;
558 }
559
560 /* Sanity check for prefix_len and length */
561 if (rinfo->length > 3) {
562 return -EINVAL;
563 } else if (rinfo->prefix_len > 128) {
564 return -EINVAL;
565 } else if (rinfo->prefix_len > 64) {
566 if (rinfo->length < 2) {
567 return -EINVAL;
568 }
569 } else if (rinfo->prefix_len > 0) {
570 if (rinfo->length < 1) {
571 return -EINVAL;
572 }
573 }
574
575 pref = rinfo->route_pref;
576 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 577 return -EINVAL;
70ceb4f5 578
4bed72e4 579 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
580
581 if (rinfo->length == 3)
582 prefix = (struct in6_addr *)rinfo->prefix;
583 else {
584 /* this function is safe */
585 ipv6_addr_prefix(&prefix_buf,
586 (struct in6_addr *)rinfo->prefix,
587 rinfo->prefix_len);
588 prefix = &prefix_buf;
589 }
590
efa2cea0
DL
591 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
592 dev->ifindex);
70ceb4f5
YH
593
594 if (rt && !lifetime) {
e0a1ad73 595 ip6_del_rt(rt);
70ceb4f5
YH
596 rt = NULL;
597 }
598
599 if (!rt && lifetime)
efa2cea0 600 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
601 pref);
602 else if (rt)
603 rt->rt6i_flags = RTF_ROUTEINFO |
604 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
605
606 if (rt) {
4bed72e4 607 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
608 rt->rt6i_flags &= ~RTF_EXPIRES;
609 } else {
d1918542 610 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
611 rt->rt6i_flags |= RTF_EXPIRES;
612 }
d8d1f30b 613 dst_release(&rt->dst);
70ceb4f5
YH
614 }
615 return 0;
616}
617#endif
618
8ed67789 619#define BACKTRACK(__net, saddr) \
982f56f3 620do { \
8ed67789 621 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 622 struct fib6_node *pn; \
e0eda7bb 623 while (1) { \
982f56f3
YH
624 if (fn->fn_flags & RTN_TL_ROOT) \
625 goto out; \
626 pn = fn->parent; \
627 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 628 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
629 else \
630 fn = pn; \
631 if (fn->fn_flags & RTN_RTINFO) \
632 goto restart; \
c71099ac 633 } \
c71099ac 634 } \
38308473 635} while (0)
c71099ac 636
8ed67789
DL
637static struct rt6_info *ip6_pol_route_lookup(struct net *net,
638 struct fib6_table *table,
4c9483b2 639 struct flowi6 *fl6, int flags)
1da177e4
LT
640{
641 struct fib6_node *fn;
642 struct rt6_info *rt;
643
c71099ac 644 read_lock_bh(&table->tb6_lock);
4c9483b2 645 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
646restart:
647 rt = fn->leaf;
4c9483b2
DM
648 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
649 BACKTRACK(net, &fl6->saddr);
c71099ac 650out:
d8d1f30b 651 dst_use(&rt->dst, jiffies);
c71099ac 652 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
653 return rt;
654
655}
656
ea6e574e
FW
657struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
658 int flags)
659{
660 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
661}
662EXPORT_SYMBOL_GPL(ip6_route_lookup);
663
9acd9f3a
YH
664struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
665 const struct in6_addr *saddr, int oif, int strict)
c71099ac 666{
4c9483b2
DM
667 struct flowi6 fl6 = {
668 .flowi6_oif = oif,
669 .daddr = *daddr,
c71099ac
TG
670 };
671 struct dst_entry *dst;
77d16f45 672 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 673
adaa70bb 674 if (saddr) {
4c9483b2 675 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
676 flags |= RT6_LOOKUP_F_HAS_SADDR;
677 }
678
4c9483b2 679 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
680 if (dst->error == 0)
681 return (struct rt6_info *) dst;
682
683 dst_release(dst);
684
1da177e4
LT
685 return NULL;
686}
687
7159039a
YH
688EXPORT_SYMBOL(rt6_lookup);
689
c71099ac 690/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
691 It takes new route entry, the addition fails by any reason the
692 route is freed. In any case, if caller does not hold it, it may
693 be destroyed.
694 */
695
86872cb5 696static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
697{
698 int err;
c71099ac 699 struct fib6_table *table;
1da177e4 700
c71099ac
TG
701 table = rt->rt6i_table;
702 write_lock_bh(&table->tb6_lock);
86872cb5 703 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 704 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
705
706 return err;
707}
708
40e22e8f
TG
709int ip6_ins_rt(struct rt6_info *rt)
710{
4d1169c1 711 struct nl_info info = {
d1918542 712 .nl_net = dev_net(rt->dst.dev),
4d1169c1 713 };
528c4ceb 714 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
715}
716
21efcfa0
ED
717static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
718 const struct in6_addr *daddr,
b71d1d42 719 const struct in6_addr *saddr)
1da177e4 720{
1da177e4
LT
721 struct rt6_info *rt;
722
723 /*
724 * Clone the route.
725 */
726
21efcfa0 727 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
728
729 if (rt) {
14deae41
DM
730 int attempts = !in_softirq();
731
38308473 732 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 733 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 734 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 735 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 736 rt->rt6i_gateway = *daddr;
58c4fb86 737 }
1da177e4 738
1da177e4 739 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
740
741#ifdef CONFIG_IPV6_SUBTREES
742 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 743 rt->rt6i_src.addr = *saddr;
1da177e4
LT
744 rt->rt6i_src.plen = 128;
745 }
746#endif
747
14deae41 748 retry:
f83c7790 749 if (rt6_bind_neighbour(rt)) {
d1918542 750 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
751 int saved_rt_min_interval =
752 net->ipv6.sysctl.ip6_rt_gc_min_interval;
753 int saved_rt_elasticity =
754 net->ipv6.sysctl.ip6_rt_gc_elasticity;
755
756 if (attempts-- > 0) {
757 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
758 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
759
86393e52 760 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
761
762 net->ipv6.sysctl.ip6_rt_gc_elasticity =
763 saved_rt_elasticity;
764 net->ipv6.sysctl.ip6_rt_gc_min_interval =
765 saved_rt_min_interval;
766 goto retry;
767 }
768
769 if (net_ratelimit())
770 printk(KERN_WARNING
7e1b33e5 771 "ipv6: Neighbour table overflow.\n");
d8d1f30b 772 dst_free(&rt->dst);
14deae41
DM
773 return NULL;
774 }
95a9a5ba 775 }
1da177e4 776
95a9a5ba
YH
777 return rt;
778}
1da177e4 779
21efcfa0
ED
780static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
781 const struct in6_addr *daddr)
299d9939 782{
21efcfa0
ED
783 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
784
299d9939 785 if (rt) {
299d9939 786 rt->rt6i_flags |= RTF_CACHE;
27217455 787 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
788 }
789 return rt;
790}
791
8ed67789 792static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 793 struct flowi6 *fl6, int flags)
1da177e4
LT
794{
795 struct fib6_node *fn;
519fbd87 796 struct rt6_info *rt, *nrt;
c71099ac 797 int strict = 0;
1da177e4 798 int attempts = 3;
519fbd87 799 int err;
53b7997f 800 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 801
77d16f45 802 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
803
804relookup:
c71099ac 805 read_lock_bh(&table->tb6_lock);
1da177e4 806
8238dd06 807restart_2:
4c9483b2 808 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
809
810restart:
4acad72d 811 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 812
4c9483b2 813 BACKTRACK(net, &fl6->saddr);
8ed67789 814 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 815 rt->rt6i_flags & RTF_CACHE)
1ddef044 816 goto out;
1da177e4 817
d8d1f30b 818 dst_hold(&rt->dst);
c71099ac 819 read_unlock_bh(&table->tb6_lock);
fb9de91e 820
27217455 821 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 822 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 823 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 824 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
825 else
826 goto out2;
e40cf353 827
d8d1f30b 828 dst_release(&rt->dst);
8ed67789 829 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 830
d8d1f30b 831 dst_hold(&rt->dst);
519fbd87 832 if (nrt) {
40e22e8f 833 err = ip6_ins_rt(nrt);
519fbd87 834 if (!err)
1da177e4 835 goto out2;
1da177e4 836 }
1da177e4 837
519fbd87
YH
838 if (--attempts <= 0)
839 goto out2;
840
841 /*
c71099ac 842 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
843 * released someone could insert this route. Relookup.
844 */
d8d1f30b 845 dst_release(&rt->dst);
519fbd87
YH
846 goto relookup;
847
848out:
8238dd06
YH
849 if (reachable) {
850 reachable = 0;
851 goto restart_2;
852 }
d8d1f30b 853 dst_hold(&rt->dst);
c71099ac 854 read_unlock_bh(&table->tb6_lock);
1da177e4 855out2:
d8d1f30b
CG
856 rt->dst.lastuse = jiffies;
857 rt->dst.__use++;
c71099ac
TG
858
859 return rt;
1da177e4
LT
860}
861
8ed67789 862static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 863 struct flowi6 *fl6, int flags)
4acad72d 864{
4c9483b2 865 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
866}
867
c71099ac
TG
868void ip6_route_input(struct sk_buff *skb)
869{
b71d1d42 870 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 871 struct net *net = dev_net(skb->dev);
adaa70bb 872 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
873 struct flowi6 fl6 = {
874 .flowi6_iif = skb->dev->ifindex,
875 .daddr = iph->daddr,
876 .saddr = iph->saddr,
38308473 877 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
878 .flowi6_mark = skb->mark,
879 .flowi6_proto = iph->nexthdr,
c71099ac 880 };
adaa70bb 881
1d6e55f1 882 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 883 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 884
4c9483b2 885 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
886}
887
8ed67789 888static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 889 struct flowi6 *fl6, int flags)
1da177e4 890{
4c9483b2 891 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
892}
893
9c7a4f9c 894struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 895 struct flowi6 *fl6)
c71099ac
TG
896{
897 int flags = 0;
898
4c9483b2 899 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 900 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 901
4c9483b2 902 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 903 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
904 else if (sk)
905 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 906
4c9483b2 907 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
908}
909
7159039a 910EXPORT_SYMBOL(ip6_route_output);
1da177e4 911
2774c131 912struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 913{
5c1e6aa3 914 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
915 struct dst_entry *new = NULL;
916
5c1e6aa3 917 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 918 if (rt) {
cf911662
DM
919 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
920
d8d1f30b 921 new = &rt->dst;
14e50e57 922
14e50e57 923 new->__use = 1;
352e512c
HX
924 new->input = dst_discard;
925 new->output = dst_discard;
14e50e57 926
21efcfa0
ED
927 if (dst_metrics_read_only(&ort->dst))
928 new->_metrics = ort->dst._metrics;
929 else
930 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
931 rt->rt6i_idev = ort->rt6i_idev;
932 if (rt->rt6i_idev)
933 in6_dev_hold(rt->rt6i_idev);
d1918542 934 rt->dst.expires = 0;
14e50e57 935
4e3fd7a0 936 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
937 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
938 rt->rt6i_metric = 0;
939
940 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
941#ifdef CONFIG_IPV6_SUBTREES
942 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
943#endif
944
945 dst_free(new);
946 }
947
69ead7af
DM
948 dst_release(dst_orig);
949 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 950}
14e50e57 951
1da177e4
LT
952/*
953 * Destination cache support functions
954 */
955
956static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
957{
958 struct rt6_info *rt;
959
960 rt = (struct rt6_info *) dst;
961
6431cbc2
DM
962 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
963 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
964 if (!rt->rt6i_peer)
965 rt6_bind_peer(rt, 0);
966 rt->rt6i_peer_genid = rt6_peer_genid();
967 }
1da177e4 968 return dst;
6431cbc2 969 }
1da177e4
LT
970 return NULL;
971}
972
973static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
974{
975 struct rt6_info *rt = (struct rt6_info *) dst;
976
977 if (rt) {
54c1a859
YH
978 if (rt->rt6i_flags & RTF_CACHE) {
979 if (rt6_check_expired(rt)) {
980 ip6_del_rt(rt);
981 dst = NULL;
982 }
983 } else {
1da177e4 984 dst_release(dst);
54c1a859
YH
985 dst = NULL;
986 }
1da177e4 987 }
54c1a859 988 return dst;
1da177e4
LT
989}
990
991static void ip6_link_failure(struct sk_buff *skb)
992{
993 struct rt6_info *rt;
994
3ffe533c 995 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 996
adf30907 997 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 998 if (rt) {
38308473 999 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1000 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1001 rt->rt6i_flags |= RTF_EXPIRES;
1002 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1003 rt->rt6i_node->fn_sernum = -1;
1004 }
1005}
1006
1007static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1008{
1009 struct rt6_info *rt6 = (struct rt6_info*)dst;
1010
1011 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1012 rt6->rt6i_flags |= RTF_MODIFIED;
1013 if (mtu < IPV6_MIN_MTU) {
defb3519 1014 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1015 mtu = IPV6_MIN_MTU;
defb3519
DM
1016 features |= RTAX_FEATURE_ALLFRAG;
1017 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1018 }
defb3519 1019 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1020 }
1021}
1022
0dbaee3b 1023static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1024{
0dbaee3b
DM
1025 struct net_device *dev = dst->dev;
1026 unsigned int mtu = dst_mtu(dst);
1027 struct net *net = dev_net(dev);
1028
1da177e4
LT
1029 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1030
5578689a
DL
1031 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1032 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1033
1034 /*
1ab1457c
YH
1035 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1036 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1037 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1038 * rely only on pmtu discovery"
1039 */
1040 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1041 mtu = IPV6_MAXPLEN;
1042 return mtu;
1043}
1044
ebb762f2 1045static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1046{
d33e4553 1047 struct inet6_dev *idev;
618f9bc7
SK
1048 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1049
1050 if (mtu)
1051 return mtu;
1052
1053 mtu = IPV6_MIN_MTU;
d33e4553
DM
1054
1055 rcu_read_lock();
1056 idev = __in6_dev_get(dst->dev);
1057 if (idev)
1058 mtu = idev->cnf.mtu6;
1059 rcu_read_unlock();
1060
1061 return mtu;
1062}
1063
3b00944c
YH
1064static struct dst_entry *icmp6_dst_gc_list;
1065static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1066
3b00944c 1067struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1068 struct neighbour *neigh,
87a11578 1069 struct flowi6 *fl6)
1da177e4 1070{
87a11578 1071 struct dst_entry *dst;
1da177e4
LT
1072 struct rt6_info *rt;
1073 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1074 struct net *net = dev_net(dev);
1da177e4 1075
38308473 1076 if (unlikely(!idev))
1da177e4
LT
1077 return NULL;
1078
957c665f 1079 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1080 if (unlikely(!rt)) {
1da177e4 1081 in6_dev_put(idev);
87a11578 1082 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1083 goto out;
1084 }
1085
1da177e4
LT
1086 if (neigh)
1087 neigh_hold(neigh);
14deae41 1088 else {
f83c7790 1089 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6
DM
1090 if (IS_ERR(neigh)) {
1091 dst_free(&rt->dst);
1092 return ERR_CAST(neigh);
1093 }
14deae41 1094 }
1da177e4 1095
8e2ec639
YZ
1096 rt->dst.flags |= DST_HOST;
1097 rt->dst.output = ip6_output;
69cce1d1 1098 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1099 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1100 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1101 rt->rt6i_dst.plen = 128;
1102 rt->rt6i_idev = idev;
7011687f 1103 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1104
3b00944c 1105 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1106 rt->dst.next = icmp6_dst_gc_list;
1107 icmp6_dst_gc_list = &rt->dst;
3b00944c 1108 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1109
5578689a 1110 fib6_force_start_gc(net);
1da177e4 1111
87a11578
DM
1112 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1113
1da177e4 1114out:
87a11578 1115 return dst;
1da177e4
LT
1116}
1117
3d0f24a7 1118int icmp6_dst_gc(void)
1da177e4 1119{
e9476e95 1120 struct dst_entry *dst, **pprev;
3d0f24a7 1121 int more = 0;
1da177e4 1122
3b00944c
YH
1123 spin_lock_bh(&icmp6_dst_lock);
1124 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1125
1da177e4
LT
1126 while ((dst = *pprev) != NULL) {
1127 if (!atomic_read(&dst->__refcnt)) {
1128 *pprev = dst->next;
1129 dst_free(dst);
1da177e4
LT
1130 } else {
1131 pprev = &dst->next;
3d0f24a7 1132 ++more;
1da177e4
LT
1133 }
1134 }
1135
3b00944c 1136 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1137
3d0f24a7 1138 return more;
1da177e4
LT
1139}
1140
1e493d19
DM
1141static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1142 void *arg)
1143{
1144 struct dst_entry *dst, **pprev;
1145
1146 spin_lock_bh(&icmp6_dst_lock);
1147 pprev = &icmp6_dst_gc_list;
1148 while ((dst = *pprev) != NULL) {
1149 struct rt6_info *rt = (struct rt6_info *) dst;
1150 if (func(rt, arg)) {
1151 *pprev = dst->next;
1152 dst_free(dst);
1153 } else {
1154 pprev = &dst->next;
1155 }
1156 }
1157 spin_unlock_bh(&icmp6_dst_lock);
1158}
1159
569d3645 1160static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1161{
1da177e4 1162 unsigned long now = jiffies;
86393e52 1163 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1164 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1165 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1166 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1167 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1168 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1169 int entries;
7019b78e 1170
fc66f95c 1171 entries = dst_entries_get_fast(ops);
7019b78e 1172 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1173 entries <= rt_max_size)
1da177e4
LT
1174 goto out;
1175
6891a346
BT
1176 net->ipv6.ip6_rt_gc_expire++;
1177 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1178 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1179 entries = dst_entries_get_slow(ops);
1180 if (entries < ops->gc_thresh)
7019b78e 1181 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1182out:
7019b78e 1183 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1184 return entries > rt_max_size;
1da177e4
LT
1185}
1186
1187/* Clean host part of a prefix. Not necessary in radix tree,
1188 but results in cleaner routing tables.
1189
1190 Remove it only when all the things will work!
1191 */
1192
6b75d090 1193int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1194{
5170ae82 1195 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1196 if (hoplimit == 0) {
6b75d090 1197 struct net_device *dev = dst->dev;
c68f24cc
ED
1198 struct inet6_dev *idev;
1199
1200 rcu_read_lock();
1201 idev = __in6_dev_get(dev);
1202 if (idev)
6b75d090 1203 hoplimit = idev->cnf.hop_limit;
c68f24cc 1204 else
53b7997f 1205 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1206 rcu_read_unlock();
1da177e4
LT
1207 }
1208 return hoplimit;
1209}
abbf46ae 1210EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1211
1212/*
1213 *
1214 */
1215
86872cb5 1216int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1217{
1218 int err;
5578689a 1219 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1220 struct rt6_info *rt = NULL;
1221 struct net_device *dev = NULL;
1222 struct inet6_dev *idev = NULL;
c71099ac 1223 struct fib6_table *table;
1da177e4
LT
1224 int addr_type;
1225
86872cb5 1226 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1227 return -EINVAL;
1228#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1229 if (cfg->fc_src_len)
1da177e4
LT
1230 return -EINVAL;
1231#endif
86872cb5 1232 if (cfg->fc_ifindex) {
1da177e4 1233 err = -ENODEV;
5578689a 1234 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1235 if (!dev)
1236 goto out;
1237 idev = in6_dev_get(dev);
1238 if (!idev)
1239 goto out;
1240 }
1241
86872cb5
TG
1242 if (cfg->fc_metric == 0)
1243 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1244
d71314b4 1245 err = -ENOBUFS;
38308473
DM
1246 if (cfg->fc_nlinfo.nlh &&
1247 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1248 table = fib6_get_table(net, cfg->fc_table);
38308473 1249 if (!table) {
d71314b4
MV
1250 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1251 table = fib6_new_table(net, cfg->fc_table);
1252 }
1253 } else {
1254 table = fib6_new_table(net, cfg->fc_table);
1255 }
38308473
DM
1256
1257 if (!table)
c71099ac 1258 goto out;
c71099ac 1259
957c665f 1260 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1261
38308473 1262 if (!rt) {
1da177e4
LT
1263 err = -ENOMEM;
1264 goto out;
1265 }
1266
d8d1f30b 1267 rt->dst.obsolete = -1;
d1918542 1268 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1269 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1270 0;
1da177e4 1271
86872cb5
TG
1272 if (cfg->fc_protocol == RTPROT_UNSPEC)
1273 cfg->fc_protocol = RTPROT_BOOT;
1274 rt->rt6i_protocol = cfg->fc_protocol;
1275
1276 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1277
1278 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1279 rt->dst.input = ip6_mc_input;
ab79ad14
1280 else if (cfg->fc_flags & RTF_LOCAL)
1281 rt->dst.input = ip6_input;
1da177e4 1282 else
d8d1f30b 1283 rt->dst.input = ip6_forward;
1da177e4 1284
d8d1f30b 1285 rt->dst.output = ip6_output;
1da177e4 1286
86872cb5
TG
1287 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1288 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1289 if (rt->rt6i_dst.plen == 128)
11d53b49 1290 rt->dst.flags |= DST_HOST;
1da177e4 1291
8e2ec639
YZ
1292 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1293 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1294 if (!metrics) {
1295 err = -ENOMEM;
1296 goto out;
1297 }
1298 dst_init_metrics(&rt->dst, metrics, 0);
1299 }
1da177e4 1300#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1301 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1302 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1303#endif
1304
86872cb5 1305 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1306
1307 /* We cannot add true routes via loopback here,
1308 they would result in kernel looping; promote them to reject routes
1309 */
86872cb5 1310 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1311 (dev && (dev->flags & IFF_LOOPBACK) &&
1312 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1313 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1314 /* hold loopback dev/idev if we haven't done so. */
5578689a 1315 if (dev != net->loopback_dev) {
1da177e4
LT
1316 if (dev) {
1317 dev_put(dev);
1318 in6_dev_put(idev);
1319 }
5578689a 1320 dev = net->loopback_dev;
1da177e4
LT
1321 dev_hold(dev);
1322 idev = in6_dev_get(dev);
1323 if (!idev) {
1324 err = -ENODEV;
1325 goto out;
1326 }
1327 }
d8d1f30b
CG
1328 rt->dst.output = ip6_pkt_discard_out;
1329 rt->dst.input = ip6_pkt_discard;
1330 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1331 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1332 goto install_route;
1333 }
1334
86872cb5 1335 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1336 const struct in6_addr *gw_addr;
1da177e4
LT
1337 int gwa_type;
1338
86872cb5 1339 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1340 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1341 gwa_type = ipv6_addr_type(gw_addr);
1342
1343 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1344 struct rt6_info *grt;
1345
1346 /* IPv6 strictly inhibits using not link-local
1347 addresses as nexthop address.
1348 Otherwise, router will not able to send redirects.
1349 It is very good, but in some (rare!) circumstances
1350 (SIT, PtP, NBMA NOARP links) it is handy to allow
1351 some exceptions. --ANK
1352 */
1353 err = -EINVAL;
38308473 1354 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1355 goto out;
1356
5578689a 1357 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1358
1359 err = -EHOSTUNREACH;
38308473 1360 if (!grt)
1da177e4
LT
1361 goto out;
1362 if (dev) {
d1918542 1363 if (dev != grt->dst.dev) {
d8d1f30b 1364 dst_release(&grt->dst);
1da177e4
LT
1365 goto out;
1366 }
1367 } else {
d1918542 1368 dev = grt->dst.dev;
1da177e4
LT
1369 idev = grt->rt6i_idev;
1370 dev_hold(dev);
1371 in6_dev_hold(grt->rt6i_idev);
1372 }
38308473 1373 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1374 err = 0;
d8d1f30b 1375 dst_release(&grt->dst);
1da177e4
LT
1376
1377 if (err)
1378 goto out;
1379 }
1380 err = -EINVAL;
38308473 1381 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1382 goto out;
1383 }
1384
1385 err = -ENODEV;
38308473 1386 if (!dev)
1da177e4
LT
1387 goto out;
1388
c3968a85
DW
1389 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1390 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1391 err = -EINVAL;
1392 goto out;
1393 }
4e3fd7a0 1394 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1395 rt->rt6i_prefsrc.plen = 128;
1396 } else
1397 rt->rt6i_prefsrc.plen = 0;
1398
86872cb5 1399 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
f83c7790
DM
1400 err = rt6_bind_neighbour(rt);
1401 if (err)
1da177e4 1402 goto out;
1da177e4
LT
1403 }
1404
86872cb5 1405 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1406
1407install_route:
86872cb5
TG
1408 if (cfg->fc_mx) {
1409 struct nlattr *nla;
1410 int remaining;
1411
1412 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1413 int type = nla_type(nla);
86872cb5
TG
1414
1415 if (type) {
1416 if (type > RTAX_MAX) {
1da177e4
LT
1417 err = -EINVAL;
1418 goto out;
1419 }
86872cb5 1420
defb3519 1421 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1422 }
1da177e4
LT
1423 }
1424 }
1425
d8d1f30b 1426 rt->dst.dev = dev;
1da177e4 1427 rt->rt6i_idev = idev;
c71099ac 1428 rt->rt6i_table = table;
63152fc0 1429
c346dca1 1430 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1431
86872cb5 1432 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1433
1434out:
1435 if (dev)
1436 dev_put(dev);
1437 if (idev)
1438 in6_dev_put(idev);
1439 if (rt)
d8d1f30b 1440 dst_free(&rt->dst);
1da177e4
LT
1441 return err;
1442}
1443
86872cb5 1444static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1445{
1446 int err;
c71099ac 1447 struct fib6_table *table;
d1918542 1448 struct net *net = dev_net(rt->dst.dev);
1da177e4 1449
8ed67789 1450 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1451 return -ENOENT;
1452
c71099ac
TG
1453 table = rt->rt6i_table;
1454 write_lock_bh(&table->tb6_lock);
1da177e4 1455
86872cb5 1456 err = fib6_del(rt, info);
d8d1f30b 1457 dst_release(&rt->dst);
1da177e4 1458
c71099ac 1459 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1460
1461 return err;
1462}
1463
e0a1ad73
TG
1464int ip6_del_rt(struct rt6_info *rt)
1465{
4d1169c1 1466 struct nl_info info = {
d1918542 1467 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1468 };
528c4ceb 1469 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1470}
1471
86872cb5 1472static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1473{
c71099ac 1474 struct fib6_table *table;
1da177e4
LT
1475 struct fib6_node *fn;
1476 struct rt6_info *rt;
1477 int err = -ESRCH;
1478
5578689a 1479 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1480 if (!table)
c71099ac
TG
1481 return err;
1482
1483 read_lock_bh(&table->tb6_lock);
1da177e4 1484
c71099ac 1485 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1486 &cfg->fc_dst, cfg->fc_dst_len,
1487 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1488
1da177e4 1489 if (fn) {
d8d1f30b 1490 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1491 if (cfg->fc_ifindex &&
d1918542
DM
1492 (!rt->dst.dev ||
1493 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1494 continue;
86872cb5
TG
1495 if (cfg->fc_flags & RTF_GATEWAY &&
1496 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1497 continue;
86872cb5 1498 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1499 continue;
d8d1f30b 1500 dst_hold(&rt->dst);
c71099ac 1501 read_unlock_bh(&table->tb6_lock);
1da177e4 1502
86872cb5 1503 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1504 }
1505 }
c71099ac 1506 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1507
1508 return err;
1509}
1510
1511/*
1512 * Handle redirects
1513 */
a6279458 1514struct ip6rd_flowi {
4c9483b2 1515 struct flowi6 fl6;
a6279458
YH
1516 struct in6_addr gateway;
1517};
1518
8ed67789
DL
1519static struct rt6_info *__ip6_route_redirect(struct net *net,
1520 struct fib6_table *table,
4c9483b2 1521 struct flowi6 *fl6,
a6279458 1522 int flags)
1da177e4 1523{
4c9483b2 1524 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1525 struct rt6_info *rt;
e843b9e1 1526 struct fib6_node *fn;
c71099ac 1527
1da177e4 1528 /*
e843b9e1
YH
1529 * Get the "current" route for this destination and
1530 * check if the redirect has come from approriate router.
1531 *
1532 * RFC 2461 specifies that redirects should only be
1533 * accepted if they come from the nexthop to the target.
1534 * Due to the way the routes are chosen, this notion
1535 * is a bit fuzzy and one might need to check all possible
1536 * routes.
1da177e4 1537 */
1da177e4 1538
c71099ac 1539 read_lock_bh(&table->tb6_lock);
4c9483b2 1540 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1541restart:
d8d1f30b 1542 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1543 /*
1544 * Current route is on-link; redirect is always invalid.
1545 *
1546 * Seems, previous statement is not true. It could
1547 * be node, which looks for us as on-link (f.e. proxy ndisc)
1548 * But then router serving it might decide, that we should
1549 * know truth 8)8) --ANK (980726).
1550 */
1551 if (rt6_check_expired(rt))
1552 continue;
1553 if (!(rt->rt6i_flags & RTF_GATEWAY))
1554 continue;
d1918542 1555 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1556 continue;
a6279458 1557 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1558 continue;
1559 break;
1560 }
a6279458 1561
cb15d9c2 1562 if (!rt)
8ed67789 1563 rt = net->ipv6.ip6_null_entry;
4c9483b2 1564 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1565out:
d8d1f30b 1566 dst_hold(&rt->dst);
a6279458 1567
c71099ac 1568 read_unlock_bh(&table->tb6_lock);
e843b9e1 1569
a6279458
YH
1570 return rt;
1571};
1572
b71d1d42
ED
1573static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1574 const struct in6_addr *src,
1575 const struct in6_addr *gateway,
a6279458
YH
1576 struct net_device *dev)
1577{
adaa70bb 1578 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1579 struct net *net = dev_net(dev);
a6279458 1580 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1581 .fl6 = {
1582 .flowi6_oif = dev->ifindex,
1583 .daddr = *dest,
1584 .saddr = *src,
a6279458 1585 },
a6279458 1586 };
adaa70bb 1587
4e3fd7a0 1588 rdfl.gateway = *gateway;
86c36ce4 1589
adaa70bb
TG
1590 if (rt6_need_strict(dest))
1591 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1592
4c9483b2 1593 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1594 flags, __ip6_route_redirect);
a6279458
YH
1595}
1596
b71d1d42
ED
1597void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1598 const struct in6_addr *saddr,
a6279458
YH
1599 struct neighbour *neigh, u8 *lladdr, int on_link)
1600{
1601 struct rt6_info *rt, *nrt = NULL;
1602 struct netevent_redirect netevent;
c346dca1 1603 struct net *net = dev_net(neigh->dev);
a6279458
YH
1604
1605 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1606
8ed67789 1607 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1608 if (net_ratelimit())
1609 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1610 "for redirect target\n");
a6279458 1611 goto out;
1da177e4
LT
1612 }
1613
1da177e4
LT
1614 /*
1615 * We have finally decided to accept it.
1616 */
1617
1ab1457c 1618 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1619 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1620 NEIGH_UPDATE_F_OVERRIDE|
1621 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1622 NEIGH_UPDATE_F_ISROUTER))
1623 );
1624
1625 /*
1626 * Redirect received -> path was valid.
1627 * Look, redirects are sent only in response to data packets,
1628 * so that this nexthop apparently is reachable. --ANK
1629 */
d8d1f30b 1630 dst_confirm(&rt->dst);
1da177e4
LT
1631
1632 /* Duplicate redirect: silently ignore. */
27217455 1633 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1634 goto out;
1635
21efcfa0 1636 nrt = ip6_rt_copy(rt, dest);
38308473 1637 if (!nrt)
1da177e4
LT
1638 goto out;
1639
1640 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1641 if (on_link)
1642 nrt->rt6i_flags &= ~RTF_GATEWAY;
1643
4e3fd7a0 1644 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1645 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1646
40e22e8f 1647 if (ip6_ins_rt(nrt))
1da177e4
LT
1648 goto out;
1649
d8d1f30b
CG
1650 netevent.old = &rt->dst;
1651 netevent.new = &nrt->dst;
8d71740c
TT
1652 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1653
38308473 1654 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1655 ip6_del_rt(rt);
1da177e4
LT
1656 return;
1657 }
1658
1659out:
d8d1f30b 1660 dst_release(&rt->dst);
1da177e4
LT
1661}
1662
1663/*
1664 * Handle ICMP "packet too big" messages
1665 * i.e. Path MTU discovery
1666 */
1667
b71d1d42 1668static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1669 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1670{
1671 struct rt6_info *rt, *nrt;
1672 int allfrag = 0;
d3052b55 1673again:
ae878ae2 1674 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1675 if (!rt)
1da177e4
LT
1676 return;
1677
d3052b55
AV
1678 if (rt6_check_expired(rt)) {
1679 ip6_del_rt(rt);
1680 goto again;
1681 }
1682
d8d1f30b 1683 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1684 goto out;
1685
1686 if (pmtu < IPV6_MIN_MTU) {
1687 /*
1ab1457c 1688 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1689 * MTU (1280) and a fragment header should always be included
1690 * after a node receiving Too Big message reporting PMTU is
1691 * less than the IPv6 Minimum Link MTU.
1692 */
1693 pmtu = IPV6_MIN_MTU;
1694 allfrag = 1;
1695 }
1696
1697 /* New mtu received -> path was valid.
1698 They are sent only in response to data packets,
1699 so that this nexthop apparently is reachable. --ANK
1700 */
d8d1f30b 1701 dst_confirm(&rt->dst);
1da177e4
LT
1702
1703 /* Host route. If it is static, it would be better
1704 not to override it, but add new one, so that
1705 when cache entry will expire old pmtu
1706 would return automatically.
1707 */
1708 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1709 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1710 if (allfrag) {
1711 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1712 features |= RTAX_FEATURE_ALLFRAG;
1713 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1714 }
d8d1f30b 1715 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1716 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1717 goto out;
1718 }
1719
1720 /* Network route.
1721 Two cases are possible:
1722 1. It is connected route. Action: COW
1723 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1724 */
27217455 1725 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1726 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1727 else
1728 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1729
d5315b50 1730 if (nrt) {
defb3519
DM
1731 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1732 if (allfrag) {
1733 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1734 features |= RTAX_FEATURE_ALLFRAG;
1735 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1736 }
a1e78363
YH
1737
1738 /* According to RFC 1981, detecting PMTU increase shouldn't be
1739 * happened within 5 mins, the recommended timer is 10 mins.
1740 * Here this route expiration time is set to ip6_rt_mtu_expires
1741 * which is 10 mins. After 10 mins the decreased pmtu is expired
1742 * and detecting PMTU increase will be automatically happened.
1743 */
d8d1f30b 1744 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1745 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1746
40e22e8f 1747 ip6_ins_rt(nrt);
1da177e4 1748 }
1da177e4 1749out:
d8d1f30b 1750 dst_release(&rt->dst);
1da177e4
LT
1751}
1752
b71d1d42 1753void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1754 struct net_device *dev, u32 pmtu)
1755{
1756 struct net *net = dev_net(dev);
1757
1758 /*
1759 * RFC 1981 states that a node "MUST reduce the size of the packets it
1760 * is sending along the path" that caused the Packet Too Big message.
1761 * Since it's not possible in the general case to determine which
1762 * interface was used to send the original packet, we update the MTU
1763 * on the interface that will be used to send future packets. We also
1764 * update the MTU on the interface that received the Packet Too Big in
1765 * case the original packet was forced out that interface with
1766 * SO_BINDTODEVICE or similar. This is the next best thing to the
1767 * correct behaviour, which would be to update the MTU on all
1768 * interfaces.
1769 */
1770 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1771 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1772}
1773
1da177e4
LT
1774/*
1775 * Misc support functions
1776 */
1777
21efcfa0
ED
1778static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1779 const struct in6_addr *dest)
1da177e4 1780{
d1918542 1781 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1782 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1783 ort->dst.dev, 0);
1da177e4
LT
1784
1785 if (rt) {
d8d1f30b
CG
1786 rt->dst.input = ort->dst.input;
1787 rt->dst.output = ort->dst.output;
8e2ec639 1788 rt->dst.flags |= DST_HOST;
d8d1f30b 1789
4e3fd7a0 1790 rt->rt6i_dst.addr = *dest;
8e2ec639 1791 rt->rt6i_dst.plen = 128;
defb3519 1792 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1793 rt->dst.error = ort->dst.error;
1da177e4
LT
1794 rt->rt6i_idev = ort->rt6i_idev;
1795 if (rt->rt6i_idev)
1796 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1797 rt->dst.lastuse = jiffies;
d1918542 1798 rt->dst.expires = 0;
1da177e4 1799
4e3fd7a0 1800 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1801 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1802 rt->rt6i_metric = 0;
1803
1da177e4
LT
1804#ifdef CONFIG_IPV6_SUBTREES
1805 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1806#endif
0f6c6392 1807 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1808 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1809 }
1810 return rt;
1811}
1812
70ceb4f5 1813#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1814static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1815 const struct in6_addr *prefix, int prefixlen,
1816 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1817{
1818 struct fib6_node *fn;
1819 struct rt6_info *rt = NULL;
c71099ac
TG
1820 struct fib6_table *table;
1821
efa2cea0 1822 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1823 if (!table)
c71099ac 1824 return NULL;
70ceb4f5 1825
c71099ac
TG
1826 write_lock_bh(&table->tb6_lock);
1827 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1828 if (!fn)
1829 goto out;
1830
d8d1f30b 1831 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1832 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1833 continue;
1834 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1835 continue;
1836 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1837 continue;
d8d1f30b 1838 dst_hold(&rt->dst);
70ceb4f5
YH
1839 break;
1840 }
1841out:
c71099ac 1842 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1843 return rt;
1844}
1845
efa2cea0 1846static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1847 const struct in6_addr *prefix, int prefixlen,
1848 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1849 unsigned pref)
1850{
86872cb5
TG
1851 struct fib6_config cfg = {
1852 .fc_table = RT6_TABLE_INFO,
238fc7ea 1853 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1854 .fc_ifindex = ifindex,
1855 .fc_dst_len = prefixlen,
1856 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1857 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1858 .fc_nlinfo.pid = 0,
1859 .fc_nlinfo.nlh = NULL,
1860 .fc_nlinfo.nl_net = net,
86872cb5
TG
1861 };
1862
4e3fd7a0
AD
1863 cfg.fc_dst = *prefix;
1864 cfg.fc_gateway = *gwaddr;
70ceb4f5 1865
e317da96
YH
1866 /* We should treat it as a default route if prefix length is 0. */
1867 if (!prefixlen)
86872cb5 1868 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1869
86872cb5 1870 ip6_route_add(&cfg);
70ceb4f5 1871
efa2cea0 1872 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1873}
1874#endif
1875
b71d1d42 1876struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1877{
1da177e4 1878 struct rt6_info *rt;
c71099ac 1879 struct fib6_table *table;
1da177e4 1880
c346dca1 1881 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1882 if (!table)
c71099ac 1883 return NULL;
1da177e4 1884
c71099ac 1885 write_lock_bh(&table->tb6_lock);
d8d1f30b 1886 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1887 if (dev == rt->dst.dev &&
045927ff 1888 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1889 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1890 break;
1891 }
1892 if (rt)
d8d1f30b 1893 dst_hold(&rt->dst);
c71099ac 1894 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1895 return rt;
1896}
1897
b71d1d42 1898struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1899 struct net_device *dev,
1900 unsigned int pref)
1da177e4 1901{
86872cb5
TG
1902 struct fib6_config cfg = {
1903 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1904 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1905 .fc_ifindex = dev->ifindex,
1906 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1907 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1908 .fc_nlinfo.pid = 0,
1909 .fc_nlinfo.nlh = NULL,
c346dca1 1910 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1911 };
1da177e4 1912
4e3fd7a0 1913 cfg.fc_gateway = *gwaddr;
1da177e4 1914
86872cb5 1915 ip6_route_add(&cfg);
1da177e4 1916
1da177e4
LT
1917 return rt6_get_dflt_router(gwaddr, dev);
1918}
1919
7b4da532 1920void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1921{
1922 struct rt6_info *rt;
c71099ac
TG
1923 struct fib6_table *table;
1924
1925 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1926 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1927 if (!table)
c71099ac 1928 return;
1da177e4
LT
1929
1930restart:
c71099ac 1931 read_lock_bh(&table->tb6_lock);
d8d1f30b 1932 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1933 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1934 dst_hold(&rt->dst);
c71099ac 1935 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1936 ip6_del_rt(rt);
1da177e4
LT
1937 goto restart;
1938 }
1939 }
c71099ac 1940 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1941}
1942
5578689a
DL
1943static void rtmsg_to_fib6_config(struct net *net,
1944 struct in6_rtmsg *rtmsg,
86872cb5
TG
1945 struct fib6_config *cfg)
1946{
1947 memset(cfg, 0, sizeof(*cfg));
1948
1949 cfg->fc_table = RT6_TABLE_MAIN;
1950 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1951 cfg->fc_metric = rtmsg->rtmsg_metric;
1952 cfg->fc_expires = rtmsg->rtmsg_info;
1953 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1954 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1955 cfg->fc_flags = rtmsg->rtmsg_flags;
1956
5578689a 1957 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1958
4e3fd7a0
AD
1959 cfg->fc_dst = rtmsg->rtmsg_dst;
1960 cfg->fc_src = rtmsg->rtmsg_src;
1961 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1962}
1963
5578689a 1964int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1965{
86872cb5 1966 struct fib6_config cfg;
1da177e4
LT
1967 struct in6_rtmsg rtmsg;
1968 int err;
1969
1970 switch(cmd) {
1971 case SIOCADDRT: /* Add a route */
1972 case SIOCDELRT: /* Delete a route */
1973 if (!capable(CAP_NET_ADMIN))
1974 return -EPERM;
1975 err = copy_from_user(&rtmsg, arg,
1976 sizeof(struct in6_rtmsg));
1977 if (err)
1978 return -EFAULT;
86872cb5 1979
5578689a 1980 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1981
1da177e4
LT
1982 rtnl_lock();
1983 switch (cmd) {
1984 case SIOCADDRT:
86872cb5 1985 err = ip6_route_add(&cfg);
1da177e4
LT
1986 break;
1987 case SIOCDELRT:
86872cb5 1988 err = ip6_route_del(&cfg);
1da177e4
LT
1989 break;
1990 default:
1991 err = -EINVAL;
1992 }
1993 rtnl_unlock();
1994
1995 return err;
3ff50b79 1996 }
1da177e4
LT
1997
1998 return -EINVAL;
1999}
2000
2001/*
2002 * Drop the packet on the floor
2003 */
2004
d5fdd6ba 2005static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2006{
612f09e8 2007 int type;
adf30907 2008 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2009 switch (ipstats_mib_noroutes) {
2010 case IPSTATS_MIB_INNOROUTES:
0660e03f 2011 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2012 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2013 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2014 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2015 break;
2016 }
2017 /* FALLTHROUGH */
2018 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2019 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2020 ipstats_mib_noroutes);
612f09e8
YH
2021 break;
2022 }
3ffe533c 2023 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2024 kfree_skb(skb);
2025 return 0;
2026}
2027
9ce8ade0
TG
2028static int ip6_pkt_discard(struct sk_buff *skb)
2029{
612f09e8 2030 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2031}
2032
20380731 2033static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2034{
adf30907 2035 skb->dev = skb_dst(skb)->dev;
612f09e8 2036 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2037}
2038
6723ab54
DM
2039#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2040
9ce8ade0
TG
2041static int ip6_pkt_prohibit(struct sk_buff *skb)
2042{
612f09e8 2043 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2044}
2045
2046static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2047{
adf30907 2048 skb->dev = skb_dst(skb)->dev;
612f09e8 2049 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2050}
2051
6723ab54
DM
2052#endif
2053
1da177e4
LT
2054/*
2055 * Allocate a dst for local (unicast / anycast) address.
2056 */
2057
2058struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2059 const struct in6_addr *addr,
8f031519 2060 bool anycast)
1da177e4 2061{
c346dca1 2062 struct net *net = dev_net(idev->dev);
5c1e6aa3 2063 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2064 net->loopback_dev, 0);
f83c7790 2065 int err;
1da177e4 2066
38308473 2067 if (!rt) {
40385653
BG
2068 if (net_ratelimit())
2069 pr_warning("IPv6: Maximum number of routes reached,"
2070 " consider increasing route/max_size.\n");
1da177e4 2071 return ERR_PTR(-ENOMEM);
40385653 2072 }
1da177e4 2073
1da177e4
LT
2074 in6_dev_hold(idev);
2075
11d53b49 2076 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2077 rt->dst.input = ip6_input;
2078 rt->dst.output = ip6_output;
1da177e4 2079 rt->rt6i_idev = idev;
d8d1f30b 2080 rt->dst.obsolete = -1;
1da177e4
LT
2081
2082 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2083 if (anycast)
2084 rt->rt6i_flags |= RTF_ANYCAST;
2085 else
1da177e4 2086 rt->rt6i_flags |= RTF_LOCAL;
f83c7790
DM
2087 err = rt6_bind_neighbour(rt);
2088 if (err) {
d8d1f30b 2089 dst_free(&rt->dst);
f83c7790 2090 return ERR_PTR(err);
1da177e4
LT
2091 }
2092
4e3fd7a0 2093 rt->rt6i_dst.addr = *addr;
1da177e4 2094 rt->rt6i_dst.plen = 128;
5578689a 2095 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2096
d8d1f30b 2097 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2098
2099 return rt;
2100}
2101
c3968a85
DW
2102int ip6_route_get_saddr(struct net *net,
2103 struct rt6_info *rt,
b71d1d42 2104 const struct in6_addr *daddr,
c3968a85
DW
2105 unsigned int prefs,
2106 struct in6_addr *saddr)
2107{
2108 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2109 int err = 0;
2110 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2111 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2112 else
2113 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2114 daddr, prefs, saddr);
2115 return err;
2116}
2117
2118/* remove deleted ip from prefsrc entries */
2119struct arg_dev_net_ip {
2120 struct net_device *dev;
2121 struct net *net;
2122 struct in6_addr *addr;
2123};
2124
2125static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2126{
2127 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2128 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2129 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2130
d1918542 2131 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2132 rt != net->ipv6.ip6_null_entry &&
2133 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2134 /* remove prefsrc entry */
2135 rt->rt6i_prefsrc.plen = 0;
2136 }
2137 return 0;
2138}
2139
2140void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2141{
2142 struct net *net = dev_net(ifp->idev->dev);
2143 struct arg_dev_net_ip adni = {
2144 .dev = ifp->idev->dev,
2145 .net = net,
2146 .addr = &ifp->addr,
2147 };
2148 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2149}
2150
8ed67789
DL
2151struct arg_dev_net {
2152 struct net_device *dev;
2153 struct net *net;
2154};
2155
1da177e4
LT
2156static int fib6_ifdown(struct rt6_info *rt, void *arg)
2157{
bc3ef660 2158 const struct arg_dev_net *adn = arg;
2159 const struct net_device *dev = adn->dev;
8ed67789 2160
d1918542 2161 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2162 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2163 return -1;
c159d30c 2164
1da177e4
LT
2165 return 0;
2166}
2167
f3db4851 2168void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2169{
8ed67789
DL
2170 struct arg_dev_net adn = {
2171 .dev = dev,
2172 .net = net,
2173 };
2174
2175 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2176 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2177}
2178
2179struct rt6_mtu_change_arg
2180{
2181 struct net_device *dev;
2182 unsigned mtu;
2183};
2184
2185static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2186{
2187 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2188 struct inet6_dev *idev;
2189
2190 /* In IPv6 pmtu discovery is not optional,
2191 so that RTAX_MTU lock cannot disable it.
2192 We still use this lock to block changes
2193 caused by addrconf/ndisc.
2194 */
2195
2196 idev = __in6_dev_get(arg->dev);
38308473 2197 if (!idev)
1da177e4
LT
2198 return 0;
2199
2200 /* For administrative MTU increase, there is no way to discover
2201 IPv6 PMTU increase, so PMTU increase should be updated here.
2202 Since RFC 1981 doesn't include administrative MTU increase
2203 update PMTU increase is a MUST. (i.e. jumbo frame)
2204 */
2205 /*
2206 If new MTU is less than route PMTU, this new MTU will be the
2207 lowest MTU in the path, update the route PMTU to reflect PMTU
2208 decreases; if new MTU is greater than route PMTU, and the
2209 old MTU is the lowest MTU in the path, update the route PMTU
2210 to reflect the increase. In this case if the other nodes' MTU
2211 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2212 PMTU discouvery.
2213 */
d1918542 2214 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2215 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2216 (dst_mtu(&rt->dst) >= arg->mtu ||
2217 (dst_mtu(&rt->dst) < arg->mtu &&
2218 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2219 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2220 }
1da177e4
LT
2221 return 0;
2222}
2223
2224void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2225{
c71099ac
TG
2226 struct rt6_mtu_change_arg arg = {
2227 .dev = dev,
2228 .mtu = mtu,
2229 };
1da177e4 2230
c346dca1 2231 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2232}
2233
ef7c79ed 2234static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2235 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2236 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2237 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2238 [RTA_PRIORITY] = { .type = NLA_U32 },
2239 [RTA_METRICS] = { .type = NLA_NESTED },
2240};
2241
2242static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2243 struct fib6_config *cfg)
1da177e4 2244{
86872cb5
TG
2245 struct rtmsg *rtm;
2246 struct nlattr *tb[RTA_MAX+1];
2247 int err;
1da177e4 2248
86872cb5
TG
2249 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2250 if (err < 0)
2251 goto errout;
1da177e4 2252
86872cb5
TG
2253 err = -EINVAL;
2254 rtm = nlmsg_data(nlh);
2255 memset(cfg, 0, sizeof(*cfg));
2256
2257 cfg->fc_table = rtm->rtm_table;
2258 cfg->fc_dst_len = rtm->rtm_dst_len;
2259 cfg->fc_src_len = rtm->rtm_src_len;
2260 cfg->fc_flags = RTF_UP;
2261 cfg->fc_protocol = rtm->rtm_protocol;
2262
2263 if (rtm->rtm_type == RTN_UNREACHABLE)
2264 cfg->fc_flags |= RTF_REJECT;
2265
ab79ad14
2266 if (rtm->rtm_type == RTN_LOCAL)
2267 cfg->fc_flags |= RTF_LOCAL;
2268
86872cb5
TG
2269 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2270 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2271 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2272
2273 if (tb[RTA_GATEWAY]) {
2274 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2275 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2276 }
86872cb5
TG
2277
2278 if (tb[RTA_DST]) {
2279 int plen = (rtm->rtm_dst_len + 7) >> 3;
2280
2281 if (nla_len(tb[RTA_DST]) < plen)
2282 goto errout;
2283
2284 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2285 }
86872cb5
TG
2286
2287 if (tb[RTA_SRC]) {
2288 int plen = (rtm->rtm_src_len + 7) >> 3;
2289
2290 if (nla_len(tb[RTA_SRC]) < plen)
2291 goto errout;
2292
2293 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2294 }
86872cb5 2295
c3968a85
DW
2296 if (tb[RTA_PREFSRC])
2297 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2298
86872cb5
TG
2299 if (tb[RTA_OIF])
2300 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2301
2302 if (tb[RTA_PRIORITY])
2303 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2304
2305 if (tb[RTA_METRICS]) {
2306 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2307 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2308 }
86872cb5
TG
2309
2310 if (tb[RTA_TABLE])
2311 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2312
2313 err = 0;
2314errout:
2315 return err;
1da177e4
LT
2316}
2317
c127ea2c 2318static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2319{
86872cb5
TG
2320 struct fib6_config cfg;
2321 int err;
1da177e4 2322
86872cb5
TG
2323 err = rtm_to_fib6_config(skb, nlh, &cfg);
2324 if (err < 0)
2325 return err;
2326
2327 return ip6_route_del(&cfg);
1da177e4
LT
2328}
2329
c127ea2c 2330static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2331{
86872cb5
TG
2332 struct fib6_config cfg;
2333 int err;
1da177e4 2334
86872cb5
TG
2335 err = rtm_to_fib6_config(skb, nlh, &cfg);
2336 if (err < 0)
2337 return err;
2338
2339 return ip6_route_add(&cfg);
1da177e4
LT
2340}
2341
339bf98f
TG
2342static inline size_t rt6_nlmsg_size(void)
2343{
2344 return NLMSG_ALIGN(sizeof(struct rtmsg))
2345 + nla_total_size(16) /* RTA_SRC */
2346 + nla_total_size(16) /* RTA_DST */
2347 + nla_total_size(16) /* RTA_GATEWAY */
2348 + nla_total_size(16) /* RTA_PREFSRC */
2349 + nla_total_size(4) /* RTA_TABLE */
2350 + nla_total_size(4) /* RTA_IIF */
2351 + nla_total_size(4) /* RTA_OIF */
2352 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2353 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2354 + nla_total_size(sizeof(struct rta_cacheinfo));
2355}
2356
191cd582
BH
2357static int rt6_fill_node(struct net *net,
2358 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2359 struct in6_addr *dst, struct in6_addr *src,
2360 int iif, int type, u32 pid, u32 seq,
7bc570c8 2361 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2362{
2363 struct rtmsg *rtm;
2d7202bf 2364 struct nlmsghdr *nlh;
e3703b3d 2365 long expires;
9e762a4a 2366 u32 table;
f2c31e32 2367 struct neighbour *n;
1da177e4
LT
2368
2369 if (prefix) { /* user wants prefix routes only */
2370 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2371 /* success since this is not a prefix route */
2372 return 1;
2373 }
2374 }
2375
2d7202bf 2376 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2377 if (!nlh)
26932566 2378 return -EMSGSIZE;
2d7202bf
TG
2379
2380 rtm = nlmsg_data(nlh);
1da177e4
LT
2381 rtm->rtm_family = AF_INET6;
2382 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2383 rtm->rtm_src_len = rt->rt6i_src.plen;
2384 rtm->rtm_tos = 0;
c71099ac 2385 if (rt->rt6i_table)
9e762a4a 2386 table = rt->rt6i_table->tb6_id;
c71099ac 2387 else
9e762a4a
PM
2388 table = RT6_TABLE_UNSPEC;
2389 rtm->rtm_table = table;
2d7202bf 2390 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2391 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2392 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2393 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2394 rtm->rtm_type = RTN_LOCAL;
d1918542 2395 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2396 rtm->rtm_type = RTN_LOCAL;
2397 else
2398 rtm->rtm_type = RTN_UNICAST;
2399 rtm->rtm_flags = 0;
2400 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2401 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2402 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2403 rtm->rtm_protocol = RTPROT_REDIRECT;
2404 else if (rt->rt6i_flags & RTF_ADDRCONF)
2405 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2406 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2407 rtm->rtm_protocol = RTPROT_RA;
2408
38308473 2409 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2410 rtm->rtm_flags |= RTM_F_CLONED;
2411
2412 if (dst) {
2d7202bf 2413 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2414 rtm->rtm_dst_len = 128;
1da177e4 2415 } else if (rtm->rtm_dst_len)
2d7202bf 2416 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2417#ifdef CONFIG_IPV6_SUBTREES
2418 if (src) {
2d7202bf 2419 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2420 rtm->rtm_src_len = 128;
1da177e4 2421 } else if (rtm->rtm_src_len)
2d7202bf 2422 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2423#endif
7bc570c8
YH
2424 if (iif) {
2425#ifdef CONFIG_IPV6_MROUTE
2426 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2427 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2428 if (err <= 0) {
2429 if (!nowait) {
2430 if (err == 0)
2431 return 0;
2432 goto nla_put_failure;
2433 } else {
2434 if (err == -EMSGSIZE)
2435 goto nla_put_failure;
2436 }
2437 }
2438 } else
2439#endif
2440 NLA_PUT_U32(skb, RTA_IIF, iif);
2441 } else if (dst) {
1da177e4 2442 struct in6_addr saddr_buf;
c3968a85 2443 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2444 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2445 }
2d7202bf 2446
c3968a85
DW
2447 if (rt->rt6i_prefsrc.plen) {
2448 struct in6_addr saddr_buf;
4e3fd7a0 2449 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2450 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2451 }
2452
defb3519 2453 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2454 goto nla_put_failure;
2455
f2c31e32 2456 rcu_read_lock();
27217455 2457 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2458 if (n)
2459 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2460 rcu_read_unlock();
2d7202bf 2461
d8d1f30b 2462 if (rt->dst.dev)
d1918542 2463 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2d7202bf
TG
2464
2465 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2466
36e3deae
YH
2467 if (!(rt->rt6i_flags & RTF_EXPIRES))
2468 expires = 0;
d1918542
DM
2469 else if (rt->dst.expires - jiffies < INT_MAX)
2470 expires = rt->dst.expires - jiffies;
36e3deae
YH
2471 else
2472 expires = INT_MAX;
69cdf8f9 2473
d8d1f30b
CG
2474 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2475 expires, rt->dst.error) < 0)
e3703b3d 2476 goto nla_put_failure;
2d7202bf
TG
2477
2478 return nlmsg_end(skb, nlh);
2479
2480nla_put_failure:
26932566
PM
2481 nlmsg_cancel(skb, nlh);
2482 return -EMSGSIZE;
1da177e4
LT
2483}
2484
1b43af54 2485int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2486{
2487 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2488 int prefix;
2489
2d7202bf
TG
2490 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2491 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2492 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2493 } else
2494 prefix = 0;
2495
191cd582
BH
2496 return rt6_fill_node(arg->net,
2497 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2498 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2499 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2500}
2501
c127ea2c 2502static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2503{
3b1e0a65 2504 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2505 struct nlattr *tb[RTA_MAX+1];
2506 struct rt6_info *rt;
1da177e4 2507 struct sk_buff *skb;
ab364a6f 2508 struct rtmsg *rtm;
4c9483b2 2509 struct flowi6 fl6;
ab364a6f 2510 int err, iif = 0;
1da177e4 2511
ab364a6f
TG
2512 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2513 if (err < 0)
2514 goto errout;
1da177e4 2515
ab364a6f 2516 err = -EINVAL;
4c9483b2 2517 memset(&fl6, 0, sizeof(fl6));
1da177e4 2518
ab364a6f
TG
2519 if (tb[RTA_SRC]) {
2520 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2521 goto errout;
2522
4e3fd7a0 2523 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2524 }
2525
2526 if (tb[RTA_DST]) {
2527 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2528 goto errout;
2529
4e3fd7a0 2530 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2531 }
2532
2533 if (tb[RTA_IIF])
2534 iif = nla_get_u32(tb[RTA_IIF]);
2535
2536 if (tb[RTA_OIF])
4c9483b2 2537 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2538
2539 if (iif) {
2540 struct net_device *dev;
5578689a 2541 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2542 if (!dev) {
2543 err = -ENODEV;
ab364a6f 2544 goto errout;
1da177e4
LT
2545 }
2546 }
2547
ab364a6f 2548 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2549 if (!skb) {
ab364a6f
TG
2550 err = -ENOBUFS;
2551 goto errout;
2552 }
1da177e4 2553
ab364a6f
TG
2554 /* Reserve room for dummy headers, this skb can pass
2555 through good chunk of routing engine.
2556 */
459a98ed 2557 skb_reset_mac_header(skb);
ab364a6f 2558 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2559
4c9483b2 2560 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2561 skb_dst_set(skb, &rt->dst);
1da177e4 2562
4c9483b2 2563 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2564 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2565 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2566 if (err < 0) {
ab364a6f
TG
2567 kfree_skb(skb);
2568 goto errout;
1da177e4
LT
2569 }
2570
5578689a 2571 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2572errout:
1da177e4 2573 return err;
1da177e4
LT
2574}
2575
86872cb5 2576void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2577{
2578 struct sk_buff *skb;
5578689a 2579 struct net *net = info->nl_net;
528c4ceb
DL
2580 u32 seq;
2581 int err;
2582
2583 err = -ENOBUFS;
38308473 2584 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2585
339bf98f 2586 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2587 if (!skb)
21713ebc
TG
2588 goto errout;
2589
191cd582 2590 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2591 event, info->pid, seq, 0, 0, 0);
26932566
PM
2592 if (err < 0) {
2593 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2594 WARN_ON(err == -EMSGSIZE);
2595 kfree_skb(skb);
2596 goto errout;
2597 }
1ce85fe4
PNA
2598 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2599 info->nlh, gfp_any());
2600 return;
21713ebc
TG
2601errout:
2602 if (err < 0)
5578689a 2603 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2604}
2605
8ed67789
DL
2606static int ip6_route_dev_notify(struct notifier_block *this,
2607 unsigned long event, void *data)
2608{
2609 struct net_device *dev = (struct net_device *)data;
c346dca1 2610 struct net *net = dev_net(dev);
8ed67789
DL
2611
2612 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2613 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2614 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2615#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2616 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2617 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2618 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2619 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2620#endif
2621 }
2622
2623 return NOTIFY_OK;
2624}
2625
1da177e4
LT
2626/*
2627 * /proc
2628 */
2629
2630#ifdef CONFIG_PROC_FS
2631
1da177e4
LT
2632struct rt6_proc_arg
2633{
2634 char *buffer;
2635 int offset;
2636 int length;
2637 int skip;
2638 int len;
2639};
2640
2641static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2642{
33120b30 2643 struct seq_file *m = p_arg;
69cce1d1 2644 struct neighbour *n;
1da177e4 2645
4b7a4274 2646 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2647
2648#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2649 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2650#else
33120b30 2651 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2652#endif
f2c31e32 2653 rcu_read_lock();
27217455 2654 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2655 if (n) {
2656 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2657 } else {
33120b30 2658 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2659 }
f2c31e32 2660 rcu_read_unlock();
33120b30 2661 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2662 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2663 rt->dst.__use, rt->rt6i_flags,
d1918542 2664 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2665 return 0;
2666}
2667
33120b30 2668static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2669{
f3db4851
DL
2670 struct net *net = (struct net *)m->private;
2671 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2672 return 0;
2673}
1da177e4 2674
33120b30
AD
2675static int ipv6_route_open(struct inode *inode, struct file *file)
2676{
de05c557 2677 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2678}
2679
33120b30
AD
2680static const struct file_operations ipv6_route_proc_fops = {
2681 .owner = THIS_MODULE,
2682 .open = ipv6_route_open,
2683 .read = seq_read,
2684 .llseek = seq_lseek,
b6fcbdb4 2685 .release = single_release_net,
33120b30
AD
2686};
2687
1da177e4
LT
2688static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2689{
69ddb805 2690 struct net *net = (struct net *)seq->private;
1da177e4 2691 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2692 net->ipv6.rt6_stats->fib_nodes,
2693 net->ipv6.rt6_stats->fib_route_nodes,
2694 net->ipv6.rt6_stats->fib_rt_alloc,
2695 net->ipv6.rt6_stats->fib_rt_entries,
2696 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2697 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2698 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2699
2700 return 0;
2701}
2702
2703static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2704{
de05c557 2705 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2706}
2707
9a32144e 2708static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2709 .owner = THIS_MODULE,
2710 .open = rt6_stats_seq_open,
2711 .read = seq_read,
2712 .llseek = seq_lseek,
b6fcbdb4 2713 .release = single_release_net,
1da177e4
LT
2714};
2715#endif /* CONFIG_PROC_FS */
2716
2717#ifdef CONFIG_SYSCTL
2718
1da177e4 2719static
8d65af78 2720int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2721 void __user *buffer, size_t *lenp, loff_t *ppos)
2722{
c486da34
LAG
2723 struct net *net;
2724 int delay;
2725 if (!write)
1da177e4 2726 return -EINVAL;
c486da34
LAG
2727
2728 net = (struct net *)ctl->extra1;
2729 delay = net->ipv6.sysctl.flush_delay;
2730 proc_dointvec(ctl, write, buffer, lenp, ppos);
2731 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2732 return 0;
1da177e4
LT
2733}
2734
760f2d01 2735ctl_table ipv6_route_table_template[] = {
1ab1457c 2736 {
1da177e4 2737 .procname = "flush",
4990509f 2738 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2739 .maxlen = sizeof(int),
89c8b3a1 2740 .mode = 0200,
6d9f239a 2741 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2742 },
2743 {
1da177e4 2744 .procname = "gc_thresh",
9a7ec3a9 2745 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2746 .maxlen = sizeof(int),
2747 .mode = 0644,
6d9f239a 2748 .proc_handler = proc_dointvec,
1da177e4
LT
2749 },
2750 {
1da177e4 2751 .procname = "max_size",
4990509f 2752 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2753 .maxlen = sizeof(int),
2754 .mode = 0644,
6d9f239a 2755 .proc_handler = proc_dointvec,
1da177e4
LT
2756 },
2757 {
1da177e4 2758 .procname = "gc_min_interval",
4990509f 2759 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2760 .maxlen = sizeof(int),
2761 .mode = 0644,
6d9f239a 2762 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2763 },
2764 {
1da177e4 2765 .procname = "gc_timeout",
4990509f 2766 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2767 .maxlen = sizeof(int),
2768 .mode = 0644,
6d9f239a 2769 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2770 },
2771 {
1da177e4 2772 .procname = "gc_interval",
4990509f 2773 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2774 .maxlen = sizeof(int),
2775 .mode = 0644,
6d9f239a 2776 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2777 },
2778 {
1da177e4 2779 .procname = "gc_elasticity",
4990509f 2780 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2781 .maxlen = sizeof(int),
2782 .mode = 0644,
f3d3f616 2783 .proc_handler = proc_dointvec,
1da177e4
LT
2784 },
2785 {
1da177e4 2786 .procname = "mtu_expires",
4990509f 2787 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2788 .maxlen = sizeof(int),
2789 .mode = 0644,
6d9f239a 2790 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2791 },
2792 {
1da177e4 2793 .procname = "min_adv_mss",
4990509f 2794 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2795 .maxlen = sizeof(int),
2796 .mode = 0644,
f3d3f616 2797 .proc_handler = proc_dointvec,
1da177e4
LT
2798 },
2799 {
1da177e4 2800 .procname = "gc_min_interval_ms",
4990509f 2801 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2802 .maxlen = sizeof(int),
2803 .mode = 0644,
6d9f239a 2804 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2805 },
f8572d8f 2806 { }
1da177e4
LT
2807};
2808
2c8c1e72 2809struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2810{
2811 struct ctl_table *table;
2812
2813 table = kmemdup(ipv6_route_table_template,
2814 sizeof(ipv6_route_table_template),
2815 GFP_KERNEL);
5ee09105
YH
2816
2817 if (table) {
2818 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2819 table[0].extra1 = net;
86393e52 2820 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2821 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2822 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2823 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2824 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2825 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2826 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2827 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2828 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2829 }
2830
760f2d01
DL
2831 return table;
2832}
1da177e4
LT
2833#endif
2834
2c8c1e72 2835static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2836{
633d424b 2837 int ret = -ENOMEM;
8ed67789 2838
86393e52
AD
2839 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2840 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2841
fc66f95c
ED
2842 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2843 goto out_ip6_dst_ops;
2844
8ed67789
DL
2845 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2846 sizeof(*net->ipv6.ip6_null_entry),
2847 GFP_KERNEL);
2848 if (!net->ipv6.ip6_null_entry)
fc66f95c 2849 goto out_ip6_dst_entries;
d8d1f30b 2850 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2851 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2852 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2853 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2854 ip6_template_metrics, true);
8ed67789
DL
2855
2856#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2857 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2858 sizeof(*net->ipv6.ip6_prohibit_entry),
2859 GFP_KERNEL);
68fffc67
PZ
2860 if (!net->ipv6.ip6_prohibit_entry)
2861 goto out_ip6_null_entry;
d8d1f30b 2862 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2863 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2864 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2865 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2866 ip6_template_metrics, true);
8ed67789
DL
2867
2868 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2869 sizeof(*net->ipv6.ip6_blk_hole_entry),
2870 GFP_KERNEL);
68fffc67
PZ
2871 if (!net->ipv6.ip6_blk_hole_entry)
2872 goto out_ip6_prohibit_entry;
d8d1f30b 2873 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2874 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2875 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2876 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2877 ip6_template_metrics, true);
8ed67789
DL
2878#endif
2879
b339a47c
PZ
2880 net->ipv6.sysctl.flush_delay = 0;
2881 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2882 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2883 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2884 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2885 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2886 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2887 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2888
cdb18761
DL
2889#ifdef CONFIG_PROC_FS
2890 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2891 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2892#endif
6891a346
BT
2893 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2894
8ed67789
DL
2895 ret = 0;
2896out:
2897 return ret;
f2fc6a54 2898
68fffc67
PZ
2899#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2900out_ip6_prohibit_entry:
2901 kfree(net->ipv6.ip6_prohibit_entry);
2902out_ip6_null_entry:
2903 kfree(net->ipv6.ip6_null_entry);
2904#endif
fc66f95c
ED
2905out_ip6_dst_entries:
2906 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2907out_ip6_dst_ops:
f2fc6a54 2908 goto out;
cdb18761
DL
2909}
2910
2c8c1e72 2911static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2912{
2913#ifdef CONFIG_PROC_FS
2914 proc_net_remove(net, "ipv6_route");
2915 proc_net_remove(net, "rt6_stats");
2916#endif
8ed67789
DL
2917 kfree(net->ipv6.ip6_null_entry);
2918#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2919 kfree(net->ipv6.ip6_prohibit_entry);
2920 kfree(net->ipv6.ip6_blk_hole_entry);
2921#endif
41bb78b4 2922 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2923}
2924
2925static struct pernet_operations ip6_route_net_ops = {
2926 .init = ip6_route_net_init,
2927 .exit = ip6_route_net_exit,
2928};
2929
8ed67789
DL
2930static struct notifier_block ip6_route_dev_notifier = {
2931 .notifier_call = ip6_route_dev_notify,
2932 .priority = 0,
2933};
2934
433d49c3 2935int __init ip6_route_init(void)
1da177e4 2936{
433d49c3
DL
2937 int ret;
2938
9a7ec3a9
DL
2939 ret = -ENOMEM;
2940 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2941 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2942 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2943 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2944 goto out;
14e50e57 2945
fc66f95c 2946 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2947 if (ret)
bdb3289f 2948 goto out_kmem_cache;
bdb3289f 2949
fc66f95c
ED
2950 ret = register_pernet_subsys(&ip6_route_net_ops);
2951 if (ret)
2952 goto out_dst_entries;
2953
5dc121e9
AE
2954 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2955
8ed67789
DL
2956 /* Registering of the loopback is done before this portion of code,
2957 * the loopback reference in rt6_info will not be taken, do it
2958 * manually for init_net */
d8d1f30b 2959 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2960 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2961 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2962 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2963 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2964 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2965 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2966 #endif
433d49c3
DL
2967 ret = fib6_init();
2968 if (ret)
8ed67789 2969 goto out_register_subsys;
433d49c3 2970
433d49c3
DL
2971 ret = xfrm6_init();
2972 if (ret)
cdb18761 2973 goto out_fib6_init;
c35b7e72 2974
433d49c3
DL
2975 ret = fib6_rules_init();
2976 if (ret)
2977 goto xfrm6_init;
7e5449c2 2978
433d49c3 2979 ret = -ENOBUFS;
c7ac8679
GR
2980 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2981 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2982 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2983 goto fib6_rules_init;
c127ea2c 2984
8ed67789 2985 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2986 if (ret)
2987 goto fib6_rules_init;
8ed67789 2988
433d49c3
DL
2989out:
2990 return ret;
2991
2992fib6_rules_init:
433d49c3
DL
2993 fib6_rules_cleanup();
2994xfrm6_init:
433d49c3 2995 xfrm6_fini();
433d49c3 2996out_fib6_init:
433d49c3 2997 fib6_gc_cleanup();
8ed67789
DL
2998out_register_subsys:
2999 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3000out_dst_entries:
3001 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3002out_kmem_cache:
f2fc6a54 3003 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3004 goto out;
1da177e4
LT
3005}
3006
3007void ip6_route_cleanup(void)
3008{
8ed67789 3009 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3010 fib6_rules_cleanup();
1da177e4 3011 xfrm6_fini();
1da177e4 3012 fib6_gc_cleanup();
8ed67789 3013 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3014 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3015 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3016}
This page took 1.008283 seconds and 5 git commands to generate.