net: Implement read-only protection and COW'ing of metrics.
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
0dbaee3b 106 .default_advmss = ip6_default_advmss,
d33e4553 107 .default_mtu = ip6_default_mtu,
62fa8a84 108 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
109 .destroy = ip6_dst_destroy,
110 .ifdown = ip6_dst_ifdown,
111 .negative_advice = ip6_negative_advice,
112 .link_failure = ip6_link_failure,
113 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 114 .local_out = __ip6_local_out,
1da177e4
LT
115};
116
14e50e57
DM
117static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118{
119}
120
121static struct dst_ops ip6_dst_blackhole_ops = {
122 .family = AF_INET6,
09640e63 123 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
124 .destroy = ip6_dst_destroy,
125 .check = ip6_dst_check,
126 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
127};
128
62fa8a84
DM
129static const u32 ip6_template_metrics[RTAX_MAX] = {
130 [RTAX_HOPLIMIT - 1] = 255,
131};
132
bdb3289f 133static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
134 .dst = {
135 .__refcnt = ATOMIC_INIT(1),
136 .__use = 1,
137 .obsolete = -1,
138 .error = -ENETUNREACH,
d8d1f30b
CG
139 .input = ip6_pkt_discard,
140 .output = ip6_pkt_discard_out,
1da177e4
LT
141 },
142 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 143 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
146};
147
101367c2
TG
148#ifdef CONFIG_IPV6_MULTIPLE_TABLES
149
6723ab54
DM
150static int ip6_pkt_prohibit(struct sk_buff *skb);
151static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 152
280a34c8 153static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
154 .dst = {
155 .__refcnt = ATOMIC_INIT(1),
156 .__use = 1,
157 .obsolete = -1,
158 .error = -EACCES,
d8d1f30b
CG
159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
101367c2
TG
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 163 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
bdb3289f 168static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
169 .dst = {
170 .__refcnt = ATOMIC_INIT(1),
171 .__use = 1,
172 .obsolete = -1,
173 .error = -EINVAL,
d8d1f30b
CG
174 .input = dst_discard,
175 .output = dst_discard,
101367c2
TG
176 },
177 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 178 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
179 .rt6i_metric = ~(u32) 0,
180 .rt6i_ref = ATOMIC_INIT(1),
181};
182
183#endif
184
1da177e4 185/* allocate dst with ip6_dst_ops */
f2fc6a54 186static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 187{
f2fc6a54 188 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
189}
190
191static void ip6_dst_destroy(struct dst_entry *dst)
192{
193 struct rt6_info *rt = (struct rt6_info *)dst;
194 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 195 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
196
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
1ab1457c 200 }
62fa8a84 201 dst_destroy_metrics_generic(dst);
b3419363
DM
202 if (peer) {
203 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
204 rt->rt6i_peer = NULL;
205 inet_putpeer(peer);
206 }
207}
208
209void rt6_bind_peer(struct rt6_info *rt, int create)
210{
211 struct inet_peer *peer;
212
213 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
214 return;
215
216 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
217 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
218 inet_putpeer(peer);
1da177e4
LT
219}
220
221static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
222 int how)
223{
224 struct rt6_info *rt = (struct rt6_info *)dst;
225 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 226 struct net_device *loopback_dev =
c346dca1 227 dev_net(dev)->loopback_dev;
1da177e4 228
5a3e55d6
DL
229 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
230 struct inet6_dev *loopback_idev =
231 in6_dev_get(loopback_dev);
1da177e4
LT
232 if (loopback_idev != NULL) {
233 rt->rt6i_idev = loopback_idev;
234 in6_dev_put(idev);
235 }
236 }
237}
238
239static __inline__ int rt6_check_expired(const struct rt6_info *rt)
240{
a02cec21
ED
241 return (rt->rt6i_flags & RTF_EXPIRES) &&
242 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
243}
244
c71099ac
TG
245static inline int rt6_need_strict(struct in6_addr *daddr)
246{
a02cec21
ED
247 return ipv6_addr_type(daddr) &
248 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
249}
250
1da177e4 251/*
c71099ac 252 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
253 */
254
8ed67789
DL
255static inline struct rt6_info *rt6_device_match(struct net *net,
256 struct rt6_info *rt,
dd3abc4e 257 struct in6_addr *saddr,
1da177e4 258 int oif,
d420895e 259 int flags)
1da177e4
LT
260{
261 struct rt6_info *local = NULL;
262 struct rt6_info *sprt;
263
dd3abc4e
YH
264 if (!oif && ipv6_addr_any(saddr))
265 goto out;
266
d8d1f30b 267 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
268 struct net_device *dev = sprt->rt6i_dev;
269
270 if (oif) {
1da177e4
LT
271 if (dev->ifindex == oif)
272 return sprt;
273 if (dev->flags & IFF_LOOPBACK) {
274 if (sprt->rt6i_idev == NULL ||
275 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 276 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 277 continue;
1ab1457c 278 if (local && (!oif ||
1da177e4
LT
279 local->rt6i_idev->dev->ifindex == oif))
280 continue;
281 }
282 local = sprt;
283 }
dd3abc4e
YH
284 } else {
285 if (ipv6_chk_addr(net, saddr, dev,
286 flags & RT6_LOOKUP_F_IFACE))
287 return sprt;
1da177e4 288 }
dd3abc4e 289 }
1da177e4 290
dd3abc4e 291 if (oif) {
1da177e4
LT
292 if (local)
293 return local;
294
d420895e 295 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 296 return net->ipv6.ip6_null_entry;
1da177e4 297 }
dd3abc4e 298out:
1da177e4
LT
299 return rt;
300}
301
27097255
YH
302#ifdef CONFIG_IPV6_ROUTER_PREF
303static void rt6_probe(struct rt6_info *rt)
304{
305 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
306 /*
307 * Okay, this does not seem to be appropriate
308 * for now, however, we need to check if it
309 * is really so; aka Router Reachability Probing.
310 *
311 * Router Reachability Probe MUST be rate-limited
312 * to no more than one per minute.
313 */
314 if (!neigh || (neigh->nud_state & NUD_VALID))
315 return;
316 read_lock_bh(&neigh->lock);
317 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 318 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
319 struct in6_addr mcaddr;
320 struct in6_addr *target;
321
322 neigh->updated = jiffies;
323 read_unlock_bh(&neigh->lock);
324
325 target = (struct in6_addr *)&neigh->primary_key;
326 addrconf_addr_solict_mult(target, &mcaddr);
327 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
328 } else
329 read_unlock_bh(&neigh->lock);
330}
331#else
332static inline void rt6_probe(struct rt6_info *rt)
333{
27097255
YH
334}
335#endif
336
1da177e4 337/*
554cfb7e 338 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 339 */
b6f99a21 340static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
341{
342 struct net_device *dev = rt->rt6i_dev;
161980f4 343 if (!oif || dev->ifindex == oif)
554cfb7e 344 return 2;
161980f4
DM
345 if ((dev->flags & IFF_LOOPBACK) &&
346 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
347 return 1;
348 return 0;
554cfb7e 349}
1da177e4 350
b6f99a21 351static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 352{
554cfb7e 353 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 354 int m;
4d0c5911
YH
355 if (rt->rt6i_flags & RTF_NONEXTHOP ||
356 !(rt->rt6i_flags & RTF_GATEWAY))
357 m = 1;
358 else if (neigh) {
554cfb7e
YH
359 read_lock_bh(&neigh->lock);
360 if (neigh->nud_state & NUD_VALID)
4d0c5911 361 m = 2;
398bcbeb
YH
362#ifdef CONFIG_IPV6_ROUTER_PREF
363 else if (neigh->nud_state & NUD_FAILED)
364 m = 0;
365#endif
366 else
ea73ee23 367 m = 1;
554cfb7e 368 read_unlock_bh(&neigh->lock);
398bcbeb
YH
369 } else
370 m = 0;
554cfb7e 371 return m;
1da177e4
LT
372}
373
554cfb7e
YH
374static int rt6_score_route(struct rt6_info *rt, int oif,
375 int strict)
1da177e4 376{
4d0c5911 377 int m, n;
1ab1457c 378
4d0c5911 379 m = rt6_check_dev(rt, oif);
77d16f45 380 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 381 return -1;
ebacaaa0
YH
382#ifdef CONFIG_IPV6_ROUTER_PREF
383 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
384#endif
4d0c5911 385 n = rt6_check_neigh(rt);
557e92ef 386 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
387 return -1;
388 return m;
389}
390
f11e6659
DM
391static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
392 int *mpri, struct rt6_info *match)
554cfb7e 393{
f11e6659
DM
394 int m;
395
396 if (rt6_check_expired(rt))
397 goto out;
398
399 m = rt6_score_route(rt, oif, strict);
400 if (m < 0)
401 goto out;
402
403 if (m > *mpri) {
404 if (strict & RT6_LOOKUP_F_REACHABLE)
405 rt6_probe(match);
406 *mpri = m;
407 match = rt;
408 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
409 rt6_probe(rt);
410 }
411
412out:
413 return match;
414}
415
416static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
417 struct rt6_info *rr_head,
418 u32 metric, int oif, int strict)
419{
420 struct rt6_info *rt, *match;
554cfb7e 421 int mpri = -1;
1da177e4 422
f11e6659
DM
423 match = NULL;
424 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 425 rt = rt->dst.rt6_next)
f11e6659
DM
426 match = find_match(rt, oif, strict, &mpri, match);
427 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 428 rt = rt->dst.rt6_next)
f11e6659 429 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 430
f11e6659
DM
431 return match;
432}
1da177e4 433
f11e6659
DM
434static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
435{
436 struct rt6_info *match, *rt0;
8ed67789 437 struct net *net;
1da177e4 438
f11e6659 439 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 440 __func__, fn->leaf, oif);
554cfb7e 441
f11e6659
DM
442 rt0 = fn->rr_ptr;
443 if (!rt0)
444 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 445
f11e6659 446 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 447
554cfb7e 448 if (!match &&
f11e6659 449 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 450 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 451
554cfb7e 452 /* no entries matched; do round-robin */
f11e6659
DM
453 if (!next || next->rt6i_metric != rt0->rt6i_metric)
454 next = fn->leaf;
455
456 if (next != rt0)
457 fn->rr_ptr = next;
1da177e4 458 }
1da177e4 459
f11e6659 460 RT6_TRACE("%s() => %p\n",
0dc47877 461 __func__, match);
1da177e4 462
c346dca1 463 net = dev_net(rt0->rt6i_dev);
a02cec21 464 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
465}
466
70ceb4f5
YH
467#ifdef CONFIG_IPV6_ROUTE_INFO
468int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
469 struct in6_addr *gwaddr)
470{
c346dca1 471 struct net *net = dev_net(dev);
70ceb4f5
YH
472 struct route_info *rinfo = (struct route_info *) opt;
473 struct in6_addr prefix_buf, *prefix;
474 unsigned int pref;
4bed72e4 475 unsigned long lifetime;
70ceb4f5
YH
476 struct rt6_info *rt;
477
478 if (len < sizeof(struct route_info)) {
479 return -EINVAL;
480 }
481
482 /* Sanity check for prefix_len and length */
483 if (rinfo->length > 3) {
484 return -EINVAL;
485 } else if (rinfo->prefix_len > 128) {
486 return -EINVAL;
487 } else if (rinfo->prefix_len > 64) {
488 if (rinfo->length < 2) {
489 return -EINVAL;
490 }
491 } else if (rinfo->prefix_len > 0) {
492 if (rinfo->length < 1) {
493 return -EINVAL;
494 }
495 }
496
497 pref = rinfo->route_pref;
498 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 499 return -EINVAL;
70ceb4f5 500
4bed72e4 501 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
502
503 if (rinfo->length == 3)
504 prefix = (struct in6_addr *)rinfo->prefix;
505 else {
506 /* this function is safe */
507 ipv6_addr_prefix(&prefix_buf,
508 (struct in6_addr *)rinfo->prefix,
509 rinfo->prefix_len);
510 prefix = &prefix_buf;
511 }
512
efa2cea0
DL
513 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
514 dev->ifindex);
70ceb4f5
YH
515
516 if (rt && !lifetime) {
e0a1ad73 517 ip6_del_rt(rt);
70ceb4f5
YH
518 rt = NULL;
519 }
520
521 if (!rt && lifetime)
efa2cea0 522 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
523 pref);
524 else if (rt)
525 rt->rt6i_flags = RTF_ROUTEINFO |
526 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
527
528 if (rt) {
4bed72e4 529 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
530 rt->rt6i_flags &= ~RTF_EXPIRES;
531 } else {
532 rt->rt6i_expires = jiffies + HZ * lifetime;
533 rt->rt6i_flags |= RTF_EXPIRES;
534 }
d8d1f30b 535 dst_release(&rt->dst);
70ceb4f5
YH
536 }
537 return 0;
538}
539#endif
540
8ed67789 541#define BACKTRACK(__net, saddr) \
982f56f3 542do { \
8ed67789 543 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 544 struct fib6_node *pn; \
e0eda7bb 545 while (1) { \
982f56f3
YH
546 if (fn->fn_flags & RTN_TL_ROOT) \
547 goto out; \
548 pn = fn->parent; \
549 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 550 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
551 else \
552 fn = pn; \
553 if (fn->fn_flags & RTN_RTINFO) \
554 goto restart; \
c71099ac 555 } \
c71099ac 556 } \
982f56f3 557} while(0)
c71099ac 558
8ed67789
DL
559static struct rt6_info *ip6_pol_route_lookup(struct net *net,
560 struct fib6_table *table,
c71099ac 561 struct flowi *fl, int flags)
1da177e4
LT
562{
563 struct fib6_node *fn;
564 struct rt6_info *rt;
565
c71099ac
TG
566 read_lock_bh(&table->tb6_lock);
567 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
568restart:
569 rt = fn->leaf;
dd3abc4e 570 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 571 BACKTRACK(net, &fl->fl6_src);
c71099ac 572out:
d8d1f30b 573 dst_use(&rt->dst, jiffies);
c71099ac 574 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
575 return rt;
576
577}
578
9acd9f3a
YH
579struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
580 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
581{
582 struct flowi fl = {
583 .oif = oif,
5811662b 584 .fl6_dst = *daddr,
c71099ac
TG
585 };
586 struct dst_entry *dst;
77d16f45 587 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 588
adaa70bb
TG
589 if (saddr) {
590 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
591 flags |= RT6_LOOKUP_F_HAS_SADDR;
592 }
593
606a2b48 594 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
595 if (dst->error == 0)
596 return (struct rt6_info *) dst;
597
598 dst_release(dst);
599
1da177e4
LT
600 return NULL;
601}
602
7159039a
YH
603EXPORT_SYMBOL(rt6_lookup);
604
c71099ac 605/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
606 It takes new route entry, the addition fails by any reason the
607 route is freed. In any case, if caller does not hold it, it may
608 be destroyed.
609 */
610
86872cb5 611static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
612{
613 int err;
c71099ac 614 struct fib6_table *table;
1da177e4 615
c71099ac
TG
616 table = rt->rt6i_table;
617 write_lock_bh(&table->tb6_lock);
86872cb5 618 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 619 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
620
621 return err;
622}
623
40e22e8f
TG
624int ip6_ins_rt(struct rt6_info *rt)
625{
4d1169c1 626 struct nl_info info = {
c346dca1 627 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 628 };
528c4ceb 629 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
630}
631
95a9a5ba
YH
632static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
633 struct in6_addr *saddr)
1da177e4 634{
1da177e4
LT
635 struct rt6_info *rt;
636
637 /*
638 * Clone the route.
639 */
640
641 rt = ip6_rt_copy(ort);
642
643 if (rt) {
14deae41
DM
644 struct neighbour *neigh;
645 int attempts = !in_softirq();
646
58c4fb86
YH
647 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
648 if (rt->rt6i_dst.plen != 128 &&
649 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
650 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 651 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 652 }
1da177e4 653
58c4fb86 654 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
655 rt->rt6i_dst.plen = 128;
656 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 657 rt->dst.flags |= DST_HOST;
1da177e4
LT
658
659#ifdef CONFIG_IPV6_SUBTREES
660 if (rt->rt6i_src.plen && saddr) {
661 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
662 rt->rt6i_src.plen = 128;
663 }
664#endif
665
14deae41
DM
666 retry:
667 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
668 if (IS_ERR(neigh)) {
669 struct net *net = dev_net(rt->rt6i_dev);
670 int saved_rt_min_interval =
671 net->ipv6.sysctl.ip6_rt_gc_min_interval;
672 int saved_rt_elasticity =
673 net->ipv6.sysctl.ip6_rt_gc_elasticity;
674
675 if (attempts-- > 0) {
676 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
677 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
678
86393e52 679 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
680
681 net->ipv6.sysctl.ip6_rt_gc_elasticity =
682 saved_rt_elasticity;
683 net->ipv6.sysctl.ip6_rt_gc_min_interval =
684 saved_rt_min_interval;
685 goto retry;
686 }
687
688 if (net_ratelimit())
689 printk(KERN_WARNING
7e1b33e5 690 "ipv6: Neighbour table overflow.\n");
d8d1f30b 691 dst_free(&rt->dst);
14deae41
DM
692 return NULL;
693 }
694 rt->rt6i_nexthop = neigh;
1da177e4 695
95a9a5ba 696 }
1da177e4 697
95a9a5ba
YH
698 return rt;
699}
1da177e4 700
299d9939
YH
701static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
702{
703 struct rt6_info *rt = ip6_rt_copy(ort);
704 if (rt) {
705 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
706 rt->rt6i_dst.plen = 128;
707 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 708 rt->dst.flags |= DST_HOST;
299d9939
YH
709 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
710 }
711 return rt;
712}
713
8ed67789
DL
714static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
715 struct flowi *fl, int flags)
1da177e4
LT
716{
717 struct fib6_node *fn;
519fbd87 718 struct rt6_info *rt, *nrt;
c71099ac 719 int strict = 0;
1da177e4 720 int attempts = 3;
519fbd87 721 int err;
53b7997f 722 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 723
77d16f45 724 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
725
726relookup:
c71099ac 727 read_lock_bh(&table->tb6_lock);
1da177e4 728
8238dd06 729restart_2:
c71099ac 730 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
731
732restart:
4acad72d 733 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
734
735 BACKTRACK(net, &fl->fl6_src);
736 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 737 rt->rt6i_flags & RTF_CACHE)
1ddef044 738 goto out;
1da177e4 739
d8d1f30b 740 dst_hold(&rt->dst);
c71099ac 741 read_unlock_bh(&table->tb6_lock);
fb9de91e 742
519fbd87 743 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 744 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 745 else
c71099ac 746 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 747
d8d1f30b 748 dst_release(&rt->dst);
8ed67789 749 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 750
d8d1f30b 751 dst_hold(&rt->dst);
519fbd87 752 if (nrt) {
40e22e8f 753 err = ip6_ins_rt(nrt);
519fbd87 754 if (!err)
1da177e4 755 goto out2;
1da177e4 756 }
1da177e4 757
519fbd87
YH
758 if (--attempts <= 0)
759 goto out2;
760
761 /*
c71099ac 762 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
763 * released someone could insert this route. Relookup.
764 */
d8d1f30b 765 dst_release(&rt->dst);
519fbd87
YH
766 goto relookup;
767
768out:
8238dd06
YH
769 if (reachable) {
770 reachable = 0;
771 goto restart_2;
772 }
d8d1f30b 773 dst_hold(&rt->dst);
c71099ac 774 read_unlock_bh(&table->tb6_lock);
1da177e4 775out2:
d8d1f30b
CG
776 rt->dst.lastuse = jiffies;
777 rt->dst.__use++;
c71099ac
TG
778
779 return rt;
1da177e4
LT
780}
781
8ed67789 782static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
783 struct flowi *fl, int flags)
784{
8ed67789 785 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
786}
787
c71099ac
TG
788void ip6_route_input(struct sk_buff *skb)
789{
0660e03f 790 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 791 struct net *net = dev_net(skb->dev);
adaa70bb 792 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
793 struct flowi fl = {
794 .iif = skb->dev->ifindex,
5811662b
CG
795 .fl6_dst = iph->daddr,
796 .fl6_src = iph->saddr,
797 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 798 .mark = skb->mark,
c71099ac
TG
799 .proto = iph->nexthdr,
800 };
adaa70bb 801
1d6e55f1 802 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 803 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 804
adf30907 805 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
806}
807
8ed67789 808static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 809 struct flowi *fl, int flags)
1da177e4 810{
8ed67789 811 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
812}
813
4591db4f
DL
814struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
815 struct flowi *fl)
c71099ac
TG
816{
817 int flags = 0;
818
6057fd78 819 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 820 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 821
adaa70bb
TG
822 if (!ipv6_addr_any(&fl->fl6_src))
823 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
824 else if (sk)
825 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 826
4591db4f 827 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
828}
829
7159039a 830EXPORT_SYMBOL(ip6_route_output);
1da177e4 831
14e50e57
DM
832int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
833{
834 struct rt6_info *ort = (struct rt6_info *) *dstp;
835 struct rt6_info *rt = (struct rt6_info *)
836 dst_alloc(&ip6_dst_blackhole_ops);
837 struct dst_entry *new = NULL;
838
839 if (rt) {
d8d1f30b 840 new = &rt->dst;
14e50e57
DM
841
842 atomic_set(&new->__refcnt, 1);
843 new->__use = 1;
352e512c
HX
844 new->input = dst_discard;
845 new->output = dst_discard;
14e50e57 846
defb3519 847 dst_copy_metrics(new, &ort->dst);
d8d1f30b 848 new->dev = ort->dst.dev;
14e50e57
DM
849 if (new->dev)
850 dev_hold(new->dev);
851 rt->rt6i_idev = ort->rt6i_idev;
852 if (rt->rt6i_idev)
853 in6_dev_hold(rt->rt6i_idev);
854 rt->rt6i_expires = 0;
855
856 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
857 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
858 rt->rt6i_metric = 0;
859
860 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
861#ifdef CONFIG_IPV6_SUBTREES
862 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
863#endif
864
865 dst_free(new);
866 }
867
868 dst_release(*dstp);
869 *dstp = new;
a02cec21 870 return new ? 0 : -ENOMEM;
14e50e57
DM
871}
872EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
873
1da177e4
LT
874/*
875 * Destination cache support functions
876 */
877
878static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
879{
880 struct rt6_info *rt;
881
882 rt = (struct rt6_info *) dst;
883
10414444 884 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
885 return dst;
886
887 return NULL;
888}
889
890static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
891{
892 struct rt6_info *rt = (struct rt6_info *) dst;
893
894 if (rt) {
54c1a859
YH
895 if (rt->rt6i_flags & RTF_CACHE) {
896 if (rt6_check_expired(rt)) {
897 ip6_del_rt(rt);
898 dst = NULL;
899 }
900 } else {
1da177e4 901 dst_release(dst);
54c1a859
YH
902 dst = NULL;
903 }
1da177e4 904 }
54c1a859 905 return dst;
1da177e4
LT
906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
3ffe533c 912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 913
adf30907 914 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 917 dst_set_expires(&rt->dst, 0);
1da177e4
LT
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
defb3519 931 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 932 mtu = IPV6_MIN_MTU;
defb3519
DM
933 features |= RTAX_FEATURE_ALLFRAG;
934 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 935 }
defb3519 936 dst_metric_set(dst, RTAX_MTU, mtu);
8d71740c 937 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
938 }
939}
940
0dbaee3b 941static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 942{
0dbaee3b
DM
943 struct net_device *dev = dst->dev;
944 unsigned int mtu = dst_mtu(dst);
945 struct net *net = dev_net(dev);
946
1da177e4
LT
947 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
948
5578689a
DL
949 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
950 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
951
952 /*
1ab1457c
YH
953 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
954 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
955 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
956 * rely only on pmtu discovery"
957 */
958 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
959 mtu = IPV6_MAXPLEN;
960 return mtu;
961}
962
d33e4553
DM
963static unsigned int ip6_default_mtu(const struct dst_entry *dst)
964{
965 unsigned int mtu = IPV6_MIN_MTU;
966 struct inet6_dev *idev;
967
968 rcu_read_lock();
969 idev = __in6_dev_get(dst->dev);
970 if (idev)
971 mtu = idev->cnf.mtu6;
972 rcu_read_unlock();
973
974 return mtu;
975}
976
3b00944c
YH
977static struct dst_entry *icmp6_dst_gc_list;
978static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 979
3b00944c 980struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 981 struct neighbour *neigh,
9acd9f3a 982 const struct in6_addr *addr)
1da177e4
LT
983{
984 struct rt6_info *rt;
985 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 986 struct net *net = dev_net(dev);
1da177e4
LT
987
988 if (unlikely(idev == NULL))
989 return NULL;
990
86393e52 991 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
992 if (unlikely(rt == NULL)) {
993 in6_dev_put(idev);
994 goto out;
995 }
996
997 dev_hold(dev);
998 if (neigh)
999 neigh_hold(neigh);
14deae41 1000 else {
1da177e4 1001 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1002 if (IS_ERR(neigh))
1003 neigh = NULL;
1004 }
1da177e4
LT
1005
1006 rt->rt6i_dev = dev;
1007 rt->rt6i_idev = idev;
1008 rt->rt6i_nexthop = neigh;
d8d1f30b 1009 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1010 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1011 rt->dst.output = ip6_output;
1da177e4
LT
1012
1013#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1014 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1015 ? DST_HOST
1da177e4
LT
1016 : 0;
1017 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1018 rt->rt6i_dst.plen = 128;
1019#endif
1020
3b00944c 1021 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1022 rt->dst.next = icmp6_dst_gc_list;
1023 icmp6_dst_gc_list = &rt->dst;
3b00944c 1024 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1025
5578689a 1026 fib6_force_start_gc(net);
1da177e4
LT
1027
1028out:
d8d1f30b 1029 return &rt->dst;
1da177e4
LT
1030}
1031
3d0f24a7 1032int icmp6_dst_gc(void)
1da177e4
LT
1033{
1034 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1035 int more = 0;
1da177e4
LT
1036
1037 next = NULL;
5d0bbeeb 1038
3b00944c
YH
1039 spin_lock_bh(&icmp6_dst_lock);
1040 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1041
1da177e4
LT
1042 while ((dst = *pprev) != NULL) {
1043 if (!atomic_read(&dst->__refcnt)) {
1044 *pprev = dst->next;
1045 dst_free(dst);
1da177e4
LT
1046 } else {
1047 pprev = &dst->next;
3d0f24a7 1048 ++more;
1da177e4
LT
1049 }
1050 }
1051
3b00944c 1052 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1053
3d0f24a7 1054 return more;
1da177e4
LT
1055}
1056
1e493d19
DM
1057static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1058 void *arg)
1059{
1060 struct dst_entry *dst, **pprev;
1061
1062 spin_lock_bh(&icmp6_dst_lock);
1063 pprev = &icmp6_dst_gc_list;
1064 while ((dst = *pprev) != NULL) {
1065 struct rt6_info *rt = (struct rt6_info *) dst;
1066 if (func(rt, arg)) {
1067 *pprev = dst->next;
1068 dst_free(dst);
1069 } else {
1070 pprev = &dst->next;
1071 }
1072 }
1073 spin_unlock_bh(&icmp6_dst_lock);
1074}
1075
569d3645 1076static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1077{
1da177e4 1078 unsigned long now = jiffies;
86393e52 1079 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1080 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1081 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1082 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1083 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1084 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1085 int entries;
7019b78e 1086
fc66f95c 1087 entries = dst_entries_get_fast(ops);
7019b78e 1088 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1089 entries <= rt_max_size)
1da177e4
LT
1090 goto out;
1091
6891a346
BT
1092 net->ipv6.ip6_rt_gc_expire++;
1093 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1094 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1095 entries = dst_entries_get_slow(ops);
1096 if (entries < ops->gc_thresh)
7019b78e 1097 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1098out:
7019b78e 1099 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1100 return entries > rt_max_size;
1da177e4
LT
1101}
1102
1103/* Clean host part of a prefix. Not necessary in radix tree,
1104 but results in cleaner routing tables.
1105
1106 Remove it only when all the things will work!
1107 */
1108
6b75d090 1109int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1110{
5170ae82 1111 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1112 if (hoplimit == 0) {
6b75d090 1113 struct net_device *dev = dst->dev;
c68f24cc
ED
1114 struct inet6_dev *idev;
1115
1116 rcu_read_lock();
1117 idev = __in6_dev_get(dev);
1118 if (idev)
6b75d090 1119 hoplimit = idev->cnf.hop_limit;
c68f24cc 1120 else
53b7997f 1121 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1122 rcu_read_unlock();
1da177e4
LT
1123 }
1124 return hoplimit;
1125}
abbf46ae 1126EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1127
1128/*
1129 *
1130 */
1131
86872cb5 1132int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1133{
1134 int err;
5578689a 1135 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1136 struct rt6_info *rt = NULL;
1137 struct net_device *dev = NULL;
1138 struct inet6_dev *idev = NULL;
c71099ac 1139 struct fib6_table *table;
1da177e4
LT
1140 int addr_type;
1141
86872cb5 1142 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1143 return -EINVAL;
1144#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1145 if (cfg->fc_src_len)
1da177e4
LT
1146 return -EINVAL;
1147#endif
86872cb5 1148 if (cfg->fc_ifindex) {
1da177e4 1149 err = -ENODEV;
5578689a 1150 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1151 if (!dev)
1152 goto out;
1153 idev = in6_dev_get(dev);
1154 if (!idev)
1155 goto out;
1156 }
1157
86872cb5
TG
1158 if (cfg->fc_metric == 0)
1159 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1160
5578689a 1161 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1162 if (table == NULL) {
1163 err = -ENOBUFS;
1164 goto out;
1165 }
1166
86393e52 1167 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1168
1169 if (rt == NULL) {
1170 err = -ENOMEM;
1171 goto out;
1172 }
1173
d8d1f30b 1174 rt->dst.obsolete = -1;
6f704992
YH
1175 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1176 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1177 0;
1da177e4 1178
86872cb5
TG
1179 if (cfg->fc_protocol == RTPROT_UNSPEC)
1180 cfg->fc_protocol = RTPROT_BOOT;
1181 rt->rt6i_protocol = cfg->fc_protocol;
1182
1183 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1184
1185 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1186 rt->dst.input = ip6_mc_input;
ab79ad14
1187 else if (cfg->fc_flags & RTF_LOCAL)
1188 rt->dst.input = ip6_input;
1da177e4 1189 else
d8d1f30b 1190 rt->dst.input = ip6_forward;
1da177e4 1191
d8d1f30b 1192 rt->dst.output = ip6_output;
1da177e4 1193
86872cb5
TG
1194 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1195 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1196 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1197 rt->dst.flags = DST_HOST;
1da177e4
LT
1198
1199#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1200 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1201 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1202#endif
1203
86872cb5 1204 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1205
1206 /* We cannot add true routes via loopback here,
1207 they would result in kernel looping; promote them to reject routes
1208 */
86872cb5 1209 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1210 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1211 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1212 /* hold loopback dev/idev if we haven't done so. */
5578689a 1213 if (dev != net->loopback_dev) {
1da177e4
LT
1214 if (dev) {
1215 dev_put(dev);
1216 in6_dev_put(idev);
1217 }
5578689a 1218 dev = net->loopback_dev;
1da177e4
LT
1219 dev_hold(dev);
1220 idev = in6_dev_get(dev);
1221 if (!idev) {
1222 err = -ENODEV;
1223 goto out;
1224 }
1225 }
d8d1f30b
CG
1226 rt->dst.output = ip6_pkt_discard_out;
1227 rt->dst.input = ip6_pkt_discard;
1228 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1229 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1230 goto install_route;
1231 }
1232
86872cb5 1233 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1234 struct in6_addr *gw_addr;
1235 int gwa_type;
1236
86872cb5
TG
1237 gw_addr = &cfg->fc_gateway;
1238 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1239 gwa_type = ipv6_addr_type(gw_addr);
1240
1241 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1242 struct rt6_info *grt;
1243
1244 /* IPv6 strictly inhibits using not link-local
1245 addresses as nexthop address.
1246 Otherwise, router will not able to send redirects.
1247 It is very good, but in some (rare!) circumstances
1248 (SIT, PtP, NBMA NOARP links) it is handy to allow
1249 some exceptions. --ANK
1250 */
1251 err = -EINVAL;
1252 if (!(gwa_type&IPV6_ADDR_UNICAST))
1253 goto out;
1254
5578689a 1255 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1256
1257 err = -EHOSTUNREACH;
1258 if (grt == NULL)
1259 goto out;
1260 if (dev) {
1261 if (dev != grt->rt6i_dev) {
d8d1f30b 1262 dst_release(&grt->dst);
1da177e4
LT
1263 goto out;
1264 }
1265 } else {
1266 dev = grt->rt6i_dev;
1267 idev = grt->rt6i_idev;
1268 dev_hold(dev);
1269 in6_dev_hold(grt->rt6i_idev);
1270 }
1271 if (!(grt->rt6i_flags&RTF_GATEWAY))
1272 err = 0;
d8d1f30b 1273 dst_release(&grt->dst);
1da177e4
LT
1274
1275 if (err)
1276 goto out;
1277 }
1278 err = -EINVAL;
1279 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1280 goto out;
1281 }
1282
1283 err = -ENODEV;
1284 if (dev == NULL)
1285 goto out;
1286
86872cb5 1287 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1288 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1289 if (IS_ERR(rt->rt6i_nexthop)) {
1290 err = PTR_ERR(rt->rt6i_nexthop);
1291 rt->rt6i_nexthop = NULL;
1292 goto out;
1293 }
1294 }
1295
86872cb5 1296 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1297
1298install_route:
86872cb5
TG
1299 if (cfg->fc_mx) {
1300 struct nlattr *nla;
1301 int remaining;
1302
1303 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1304 int type = nla_type(nla);
86872cb5
TG
1305
1306 if (type) {
1307 if (type > RTAX_MAX) {
1da177e4
LT
1308 err = -EINVAL;
1309 goto out;
1310 }
86872cb5 1311
defb3519 1312 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1313 }
1da177e4
LT
1314 }
1315 }
1316
d8d1f30b 1317 rt->dst.dev = dev;
1da177e4 1318 rt->rt6i_idev = idev;
c71099ac 1319 rt->rt6i_table = table;
63152fc0 1320
c346dca1 1321 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1322
86872cb5 1323 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1324
1325out:
1326 if (dev)
1327 dev_put(dev);
1328 if (idev)
1329 in6_dev_put(idev);
1330 if (rt)
d8d1f30b 1331 dst_free(&rt->dst);
1da177e4
LT
1332 return err;
1333}
1334
86872cb5 1335static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1336{
1337 int err;
c71099ac 1338 struct fib6_table *table;
c346dca1 1339 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1340
8ed67789 1341 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1342 return -ENOENT;
1343
c71099ac
TG
1344 table = rt->rt6i_table;
1345 write_lock_bh(&table->tb6_lock);
1da177e4 1346
86872cb5 1347 err = fib6_del(rt, info);
d8d1f30b 1348 dst_release(&rt->dst);
1da177e4 1349
c71099ac 1350 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1351
1352 return err;
1353}
1354
e0a1ad73
TG
1355int ip6_del_rt(struct rt6_info *rt)
1356{
4d1169c1 1357 struct nl_info info = {
c346dca1 1358 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1359 };
528c4ceb 1360 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1361}
1362
86872cb5 1363static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1364{
c71099ac 1365 struct fib6_table *table;
1da177e4
LT
1366 struct fib6_node *fn;
1367 struct rt6_info *rt;
1368 int err = -ESRCH;
1369
5578689a 1370 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1371 if (table == NULL)
1372 return err;
1373
1374 read_lock_bh(&table->tb6_lock);
1da177e4 1375
c71099ac 1376 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1377 &cfg->fc_dst, cfg->fc_dst_len,
1378 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1379
1da177e4 1380 if (fn) {
d8d1f30b 1381 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1382 if (cfg->fc_ifindex &&
1da177e4 1383 (rt->rt6i_dev == NULL ||
86872cb5 1384 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1385 continue;
86872cb5
TG
1386 if (cfg->fc_flags & RTF_GATEWAY &&
1387 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1388 continue;
86872cb5 1389 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1390 continue;
d8d1f30b 1391 dst_hold(&rt->dst);
c71099ac 1392 read_unlock_bh(&table->tb6_lock);
1da177e4 1393
86872cb5 1394 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1395 }
1396 }
c71099ac 1397 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1398
1399 return err;
1400}
1401
1402/*
1403 * Handle redirects
1404 */
a6279458
YH
1405struct ip6rd_flowi {
1406 struct flowi fl;
1407 struct in6_addr gateway;
1408};
1409
8ed67789
DL
1410static struct rt6_info *__ip6_route_redirect(struct net *net,
1411 struct fib6_table *table,
a6279458
YH
1412 struct flowi *fl,
1413 int flags)
1da177e4 1414{
a6279458
YH
1415 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1416 struct rt6_info *rt;
e843b9e1 1417 struct fib6_node *fn;
c71099ac 1418
1da177e4 1419 /*
e843b9e1
YH
1420 * Get the "current" route for this destination and
1421 * check if the redirect has come from approriate router.
1422 *
1423 * RFC 2461 specifies that redirects should only be
1424 * accepted if they come from the nexthop to the target.
1425 * Due to the way the routes are chosen, this notion
1426 * is a bit fuzzy and one might need to check all possible
1427 * routes.
1da177e4 1428 */
1da177e4 1429
c71099ac 1430 read_lock_bh(&table->tb6_lock);
a6279458 1431 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1432restart:
d8d1f30b 1433 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1434 /*
1435 * Current route is on-link; redirect is always invalid.
1436 *
1437 * Seems, previous statement is not true. It could
1438 * be node, which looks for us as on-link (f.e. proxy ndisc)
1439 * But then router serving it might decide, that we should
1440 * know truth 8)8) --ANK (980726).
1441 */
1442 if (rt6_check_expired(rt))
1443 continue;
1444 if (!(rt->rt6i_flags & RTF_GATEWAY))
1445 continue;
a6279458 1446 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1447 continue;
a6279458 1448 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1449 continue;
1450 break;
1451 }
a6279458 1452
cb15d9c2 1453 if (!rt)
8ed67789
DL
1454 rt = net->ipv6.ip6_null_entry;
1455 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1456out:
d8d1f30b 1457 dst_hold(&rt->dst);
a6279458 1458
c71099ac 1459 read_unlock_bh(&table->tb6_lock);
e843b9e1 1460
a6279458
YH
1461 return rt;
1462};
1463
1464static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1465 struct in6_addr *src,
1466 struct in6_addr *gateway,
1467 struct net_device *dev)
1468{
adaa70bb 1469 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1470 struct net *net = dev_net(dev);
a6279458
YH
1471 struct ip6rd_flowi rdfl = {
1472 .fl = {
1473 .oif = dev->ifindex,
5811662b
CG
1474 .fl6_dst = *dest,
1475 .fl6_src = *src,
a6279458 1476 },
a6279458 1477 };
adaa70bb 1478
86c36ce4
BH
1479 ipv6_addr_copy(&rdfl.gateway, gateway);
1480
adaa70bb
TG
1481 if (rt6_need_strict(dest))
1482 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1483
5578689a 1484 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1485 flags, __ip6_route_redirect);
a6279458
YH
1486}
1487
1488void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1489 struct in6_addr *saddr,
1490 struct neighbour *neigh, u8 *lladdr, int on_link)
1491{
1492 struct rt6_info *rt, *nrt = NULL;
1493 struct netevent_redirect netevent;
c346dca1 1494 struct net *net = dev_net(neigh->dev);
a6279458
YH
1495
1496 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1497
8ed67789 1498 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1499 if (net_ratelimit())
1500 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1501 "for redirect target\n");
a6279458 1502 goto out;
1da177e4
LT
1503 }
1504
1da177e4
LT
1505 /*
1506 * We have finally decided to accept it.
1507 */
1508
1ab1457c 1509 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1510 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1511 NEIGH_UPDATE_F_OVERRIDE|
1512 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1513 NEIGH_UPDATE_F_ISROUTER))
1514 );
1515
1516 /*
1517 * Redirect received -> path was valid.
1518 * Look, redirects are sent only in response to data packets,
1519 * so that this nexthop apparently is reachable. --ANK
1520 */
d8d1f30b 1521 dst_confirm(&rt->dst);
1da177e4
LT
1522
1523 /* Duplicate redirect: silently ignore. */
d8d1f30b 1524 if (neigh == rt->dst.neighbour)
1da177e4
LT
1525 goto out;
1526
1527 nrt = ip6_rt_copy(rt);
1528 if (nrt == NULL)
1529 goto out;
1530
1531 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1532 if (on_link)
1533 nrt->rt6i_flags &= ~RTF_GATEWAY;
1534
1535 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1536 nrt->rt6i_dst.plen = 128;
d8d1f30b 1537 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1538
1539 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1540 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1541
40e22e8f 1542 if (ip6_ins_rt(nrt))
1da177e4
LT
1543 goto out;
1544
d8d1f30b
CG
1545 netevent.old = &rt->dst;
1546 netevent.new = &nrt->dst;
8d71740c
TT
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
1da177e4 1549 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1550 ip6_del_rt(rt);
1da177e4
LT
1551 return;
1552 }
1553
1554out:
d8d1f30b 1555 dst_release(&rt->dst);
1da177e4
LT
1556}
1557
1558/*
1559 * Handle ICMP "packet too big" messages
1560 * i.e. Path MTU discovery
1561 */
1562
ae878ae2
1563static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1564 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1565{
1566 struct rt6_info *rt, *nrt;
1567 int allfrag = 0;
d3052b55 1568again:
ae878ae2 1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1570 if (rt == NULL)
1571 return;
1572
d3052b55
AV
1573 if (rt6_check_expired(rt)) {
1574 ip6_del_rt(rt);
1575 goto again;
1576 }
1577
d8d1f30b 1578 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1579 goto out;
1580
1581 if (pmtu < IPV6_MIN_MTU) {
1582 /*
1ab1457c 1583 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1584 * MTU (1280) and a fragment header should always be included
1585 * after a node receiving Too Big message reporting PMTU is
1586 * less than the IPv6 Minimum Link MTU.
1587 */
1588 pmtu = IPV6_MIN_MTU;
1589 allfrag = 1;
1590 }
1591
1592 /* New mtu received -> path was valid.
1593 They are sent only in response to data packets,
1594 so that this nexthop apparently is reachable. --ANK
1595 */
d8d1f30b 1596 dst_confirm(&rt->dst);
1da177e4
LT
1597
1598 /* Host route. If it is static, it would be better
1599 not to override it, but add new one, so that
1600 when cache entry will expire old pmtu
1601 would return automatically.
1602 */
1603 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1604 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1605 if (allfrag) {
1606 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1607 features |= RTAX_FEATURE_ALLFRAG;
1608 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1609 }
d8d1f30b 1610 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1611 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1612 goto out;
1613 }
1614
1615 /* Network route.
1616 Two cases are possible:
1617 1. It is connected route. Action: COW
1618 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1619 */
d5315b50 1620 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1621 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1622 else
1623 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1624
d5315b50 1625 if (nrt) {
defb3519
DM
1626 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1627 if (allfrag) {
1628 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1629 features |= RTAX_FEATURE_ALLFRAG;
1630 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1631 }
a1e78363
YH
1632
1633 /* According to RFC 1981, detecting PMTU increase shouldn't be
1634 * happened within 5 mins, the recommended timer is 10 mins.
1635 * Here this route expiration time is set to ip6_rt_mtu_expires
1636 * which is 10 mins. After 10 mins the decreased pmtu is expired
1637 * and detecting PMTU increase will be automatically happened.
1638 */
d8d1f30b 1639 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1640 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1641
40e22e8f 1642 ip6_ins_rt(nrt);
1da177e4 1643 }
1da177e4 1644out:
d8d1f30b 1645 dst_release(&rt->dst);
1da177e4
LT
1646}
1647
ae878ae2
1648void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1649 struct net_device *dev, u32 pmtu)
1650{
1651 struct net *net = dev_net(dev);
1652
1653 /*
1654 * RFC 1981 states that a node "MUST reduce the size of the packets it
1655 * is sending along the path" that caused the Packet Too Big message.
1656 * Since it's not possible in the general case to determine which
1657 * interface was used to send the original packet, we update the MTU
1658 * on the interface that will be used to send future packets. We also
1659 * update the MTU on the interface that received the Packet Too Big in
1660 * case the original packet was forced out that interface with
1661 * SO_BINDTODEVICE or similar. This is the next best thing to the
1662 * correct behaviour, which would be to update the MTU on all
1663 * interfaces.
1664 */
1665 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1666 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1667}
1668
1da177e4
LT
1669/*
1670 * Misc support functions
1671 */
1672
1673static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1674{
c346dca1 1675 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1676 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1677
1678 if (rt) {
d8d1f30b
CG
1679 rt->dst.input = ort->dst.input;
1680 rt->dst.output = ort->dst.output;
1681
defb3519 1682 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1683 rt->dst.error = ort->dst.error;
1684 rt->dst.dev = ort->dst.dev;
1685 if (rt->dst.dev)
1686 dev_hold(rt->dst.dev);
1da177e4
LT
1687 rt->rt6i_idev = ort->rt6i_idev;
1688 if (rt->rt6i_idev)
1689 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1690 rt->dst.lastuse = jiffies;
1da177e4
LT
1691 rt->rt6i_expires = 0;
1692
1693 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1694 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1695 rt->rt6i_metric = 0;
1696
1697 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1698#ifdef CONFIG_IPV6_SUBTREES
1699 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1700#endif
c71099ac 1701 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1702 }
1703 return rt;
1704}
1705
70ceb4f5 1706#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1707static struct rt6_info *rt6_get_route_info(struct net *net,
1708 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1709 struct in6_addr *gwaddr, int ifindex)
1710{
1711 struct fib6_node *fn;
1712 struct rt6_info *rt = NULL;
c71099ac
TG
1713 struct fib6_table *table;
1714
efa2cea0 1715 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1716 if (table == NULL)
1717 return NULL;
70ceb4f5 1718
c71099ac
TG
1719 write_lock_bh(&table->tb6_lock);
1720 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1721 if (!fn)
1722 goto out;
1723
d8d1f30b 1724 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1725 if (rt->rt6i_dev->ifindex != ifindex)
1726 continue;
1727 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1728 continue;
1729 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1730 continue;
d8d1f30b 1731 dst_hold(&rt->dst);
70ceb4f5
YH
1732 break;
1733 }
1734out:
c71099ac 1735 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1736 return rt;
1737}
1738
efa2cea0
DL
1739static struct rt6_info *rt6_add_route_info(struct net *net,
1740 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1741 struct in6_addr *gwaddr, int ifindex,
1742 unsigned pref)
1743{
86872cb5
TG
1744 struct fib6_config cfg = {
1745 .fc_table = RT6_TABLE_INFO,
238fc7ea 1746 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1747 .fc_ifindex = ifindex,
1748 .fc_dst_len = prefixlen,
1749 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1750 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1751 .fc_nlinfo.pid = 0,
1752 .fc_nlinfo.nlh = NULL,
1753 .fc_nlinfo.nl_net = net,
86872cb5
TG
1754 };
1755
1756 ipv6_addr_copy(&cfg.fc_dst, prefix);
1757 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1758
e317da96
YH
1759 /* We should treat it as a default route if prefix length is 0. */
1760 if (!prefixlen)
86872cb5 1761 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1762
86872cb5 1763 ip6_route_add(&cfg);
70ceb4f5 1764
efa2cea0 1765 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1766}
1767#endif
1768
1da177e4 1769struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1770{
1da177e4 1771 struct rt6_info *rt;
c71099ac 1772 struct fib6_table *table;
1da177e4 1773
c346dca1 1774 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1775 if (table == NULL)
1776 return NULL;
1da177e4 1777
c71099ac 1778 write_lock_bh(&table->tb6_lock);
d8d1f30b 1779 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1780 if (dev == rt->rt6i_dev &&
045927ff 1781 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1782 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1783 break;
1784 }
1785 if (rt)
d8d1f30b 1786 dst_hold(&rt->dst);
c71099ac 1787 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1788 return rt;
1789}
1790
1791struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1792 struct net_device *dev,
1793 unsigned int pref)
1da177e4 1794{
86872cb5
TG
1795 struct fib6_config cfg = {
1796 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1797 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1798 .fc_ifindex = dev->ifindex,
1799 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1800 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1801 .fc_nlinfo.pid = 0,
1802 .fc_nlinfo.nlh = NULL,
c346dca1 1803 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1804 };
1da177e4 1805
86872cb5 1806 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1807
86872cb5 1808 ip6_route_add(&cfg);
1da177e4 1809
1da177e4
LT
1810 return rt6_get_dflt_router(gwaddr, dev);
1811}
1812
7b4da532 1813void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1814{
1815 struct rt6_info *rt;
c71099ac
TG
1816 struct fib6_table *table;
1817
1818 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1819 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1820 if (table == NULL)
1821 return;
1da177e4
LT
1822
1823restart:
c71099ac 1824 read_lock_bh(&table->tb6_lock);
d8d1f30b 1825 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1826 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1827 dst_hold(&rt->dst);
c71099ac 1828 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1829 ip6_del_rt(rt);
1da177e4
LT
1830 goto restart;
1831 }
1832 }
c71099ac 1833 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1834}
1835
5578689a
DL
1836static void rtmsg_to_fib6_config(struct net *net,
1837 struct in6_rtmsg *rtmsg,
86872cb5
TG
1838 struct fib6_config *cfg)
1839{
1840 memset(cfg, 0, sizeof(*cfg));
1841
1842 cfg->fc_table = RT6_TABLE_MAIN;
1843 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1844 cfg->fc_metric = rtmsg->rtmsg_metric;
1845 cfg->fc_expires = rtmsg->rtmsg_info;
1846 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1847 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1848 cfg->fc_flags = rtmsg->rtmsg_flags;
1849
5578689a 1850 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1851
86872cb5
TG
1852 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1853 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1854 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1855}
1856
5578689a 1857int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1858{
86872cb5 1859 struct fib6_config cfg;
1da177e4
LT
1860 struct in6_rtmsg rtmsg;
1861 int err;
1862
1863 switch(cmd) {
1864 case SIOCADDRT: /* Add a route */
1865 case SIOCDELRT: /* Delete a route */
1866 if (!capable(CAP_NET_ADMIN))
1867 return -EPERM;
1868 err = copy_from_user(&rtmsg, arg,
1869 sizeof(struct in6_rtmsg));
1870 if (err)
1871 return -EFAULT;
86872cb5 1872
5578689a 1873 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1874
1da177e4
LT
1875 rtnl_lock();
1876 switch (cmd) {
1877 case SIOCADDRT:
86872cb5 1878 err = ip6_route_add(&cfg);
1da177e4
LT
1879 break;
1880 case SIOCDELRT:
86872cb5 1881 err = ip6_route_del(&cfg);
1da177e4
LT
1882 break;
1883 default:
1884 err = -EINVAL;
1885 }
1886 rtnl_unlock();
1887
1888 return err;
3ff50b79 1889 }
1da177e4
LT
1890
1891 return -EINVAL;
1892}
1893
1894/*
1895 * Drop the packet on the floor
1896 */
1897
d5fdd6ba 1898static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1899{
612f09e8 1900 int type;
adf30907 1901 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1902 switch (ipstats_mib_noroutes) {
1903 case IPSTATS_MIB_INNOROUTES:
0660e03f 1904 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1905 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1908 break;
1909 }
1910 /* FALLTHROUGH */
1911 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1912 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1913 ipstats_mib_noroutes);
612f09e8
YH
1914 break;
1915 }
3ffe533c 1916 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1917 kfree_skb(skb);
1918 return 0;
1919}
1920
9ce8ade0
TG
1921static int ip6_pkt_discard(struct sk_buff *skb)
1922{
612f09e8 1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1924}
1925
20380731 1926static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1927{
adf30907 1928 skb->dev = skb_dst(skb)->dev;
612f09e8 1929 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1930}
1931
6723ab54
DM
1932#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1933
9ce8ade0
TG
1934static int ip6_pkt_prohibit(struct sk_buff *skb)
1935{
612f09e8 1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1937}
1938
1939static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1940{
adf30907 1941 skb->dev = skb_dst(skb)->dev;
612f09e8 1942 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1943}
1944
6723ab54
DM
1945#endif
1946
1da177e4
LT
1947/*
1948 * Allocate a dst for local (unicast / anycast) address.
1949 */
1950
1951struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1952 const struct in6_addr *addr,
1953 int anycast)
1954{
c346dca1 1955 struct net *net = dev_net(idev->dev);
86393e52 1956 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1957 struct neighbour *neigh;
1da177e4 1958
40385653
BG
1959 if (rt == NULL) {
1960 if (net_ratelimit())
1961 pr_warning("IPv6: Maximum number of routes reached,"
1962 " consider increasing route/max_size.\n");
1da177e4 1963 return ERR_PTR(-ENOMEM);
40385653 1964 }
1da177e4 1965
5578689a 1966 dev_hold(net->loopback_dev);
1da177e4
LT
1967 in6_dev_hold(idev);
1968
d8d1f30b
CG
1969 rt->dst.flags = DST_HOST;
1970 rt->dst.input = ip6_input;
1971 rt->dst.output = ip6_output;
5578689a 1972 rt->rt6i_dev = net->loopback_dev;
1da177e4 1973 rt->rt6i_idev = idev;
defb3519 1974 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1975 rt->dst.obsolete = -1;
1da177e4
LT
1976
1977 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1978 if (anycast)
1979 rt->rt6i_flags |= RTF_ANYCAST;
1980 else
1da177e4 1981 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1982 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1983 if (IS_ERR(neigh)) {
d8d1f30b 1984 dst_free(&rt->dst);
14deae41
DM
1985
1986 /* We are casting this because that is the return
1987 * value type. But an errno encoded pointer is the
1988 * same regardless of the underlying pointer type,
1989 * and that's what we are returning. So this is OK.
1990 */
1991 return (struct rt6_info *) neigh;
1da177e4 1992 }
14deae41 1993 rt->rt6i_nexthop = neigh;
1da177e4
LT
1994
1995 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1996 rt->rt6i_dst.plen = 128;
5578689a 1997 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1998
d8d1f30b 1999 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2000
2001 return rt;
2002}
2003
8ed67789
DL
2004struct arg_dev_net {
2005 struct net_device *dev;
2006 struct net *net;
2007};
2008
1da177e4
LT
2009static int fib6_ifdown(struct rt6_info *rt, void *arg)
2010{
bc3ef660 2011 const struct arg_dev_net *adn = arg;
2012 const struct net_device *dev = adn->dev;
8ed67789 2013
bc3ef660 2014 if ((rt->rt6i_dev == dev || dev == NULL) &&
2015 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2016 RT6_TRACE("deleted by ifdown %p\n", rt);
2017 return -1;
2018 }
2019 return 0;
2020}
2021
f3db4851 2022void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2023{
8ed67789
DL
2024 struct arg_dev_net adn = {
2025 .dev = dev,
2026 .net = net,
2027 };
2028
2029 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2030 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2031}
2032
2033struct rt6_mtu_change_arg
2034{
2035 struct net_device *dev;
2036 unsigned mtu;
2037};
2038
2039static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2040{
2041 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2042 struct inet6_dev *idev;
2043
2044 /* In IPv6 pmtu discovery is not optional,
2045 so that RTAX_MTU lock cannot disable it.
2046 We still use this lock to block changes
2047 caused by addrconf/ndisc.
2048 */
2049
2050 idev = __in6_dev_get(arg->dev);
2051 if (idev == NULL)
2052 return 0;
2053
2054 /* For administrative MTU increase, there is no way to discover
2055 IPv6 PMTU increase, so PMTU increase should be updated here.
2056 Since RFC 1981 doesn't include administrative MTU increase
2057 update PMTU increase is a MUST. (i.e. jumbo frame)
2058 */
2059 /*
2060 If new MTU is less than route PMTU, this new MTU will be the
2061 lowest MTU in the path, update the route PMTU to reflect PMTU
2062 decreases; if new MTU is greater than route PMTU, and the
2063 old MTU is the lowest MTU in the path, update the route PMTU
2064 to reflect the increase. In this case if the other nodes' MTU
2065 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2066 PMTU discouvery.
2067 */
2068 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2069 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2070 (dst_mtu(&rt->dst) >= arg->mtu ||
2071 (dst_mtu(&rt->dst) < arg->mtu &&
2072 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2073 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2074 }
1da177e4
LT
2075 return 0;
2076}
2077
2078void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2079{
c71099ac
TG
2080 struct rt6_mtu_change_arg arg = {
2081 .dev = dev,
2082 .mtu = mtu,
2083 };
1da177e4 2084
c346dca1 2085 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2086}
2087
ef7c79ed 2088static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2089 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2090 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2091 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2092 [RTA_PRIORITY] = { .type = NLA_U32 },
2093 [RTA_METRICS] = { .type = NLA_NESTED },
2094};
2095
2096static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2097 struct fib6_config *cfg)
1da177e4 2098{
86872cb5
TG
2099 struct rtmsg *rtm;
2100 struct nlattr *tb[RTA_MAX+1];
2101 int err;
1da177e4 2102
86872cb5
TG
2103 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2104 if (err < 0)
2105 goto errout;
1da177e4 2106
86872cb5
TG
2107 err = -EINVAL;
2108 rtm = nlmsg_data(nlh);
2109 memset(cfg, 0, sizeof(*cfg));
2110
2111 cfg->fc_table = rtm->rtm_table;
2112 cfg->fc_dst_len = rtm->rtm_dst_len;
2113 cfg->fc_src_len = rtm->rtm_src_len;
2114 cfg->fc_flags = RTF_UP;
2115 cfg->fc_protocol = rtm->rtm_protocol;
2116
2117 if (rtm->rtm_type == RTN_UNREACHABLE)
2118 cfg->fc_flags |= RTF_REJECT;
2119
ab79ad14
2120 if (rtm->rtm_type == RTN_LOCAL)
2121 cfg->fc_flags |= RTF_LOCAL;
2122
86872cb5
TG
2123 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2124 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2125 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2126
2127 if (tb[RTA_GATEWAY]) {
2128 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2129 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2130 }
86872cb5
TG
2131
2132 if (tb[RTA_DST]) {
2133 int plen = (rtm->rtm_dst_len + 7) >> 3;
2134
2135 if (nla_len(tb[RTA_DST]) < plen)
2136 goto errout;
2137
2138 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2139 }
86872cb5
TG
2140
2141 if (tb[RTA_SRC]) {
2142 int plen = (rtm->rtm_src_len + 7) >> 3;
2143
2144 if (nla_len(tb[RTA_SRC]) < plen)
2145 goto errout;
2146
2147 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2148 }
86872cb5
TG
2149
2150 if (tb[RTA_OIF])
2151 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2152
2153 if (tb[RTA_PRIORITY])
2154 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2155
2156 if (tb[RTA_METRICS]) {
2157 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2158 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2159 }
86872cb5
TG
2160
2161 if (tb[RTA_TABLE])
2162 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2163
2164 err = 0;
2165errout:
2166 return err;
1da177e4
LT
2167}
2168
c127ea2c 2169static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2170{
86872cb5
TG
2171 struct fib6_config cfg;
2172 int err;
1da177e4 2173
86872cb5
TG
2174 err = rtm_to_fib6_config(skb, nlh, &cfg);
2175 if (err < 0)
2176 return err;
2177
2178 return ip6_route_del(&cfg);
1da177e4
LT
2179}
2180
c127ea2c 2181static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2182{
86872cb5
TG
2183 struct fib6_config cfg;
2184 int err;
1da177e4 2185
86872cb5
TG
2186 err = rtm_to_fib6_config(skb, nlh, &cfg);
2187 if (err < 0)
2188 return err;
2189
2190 return ip6_route_add(&cfg);
1da177e4
LT
2191}
2192
339bf98f
TG
2193static inline size_t rt6_nlmsg_size(void)
2194{
2195 return NLMSG_ALIGN(sizeof(struct rtmsg))
2196 + nla_total_size(16) /* RTA_SRC */
2197 + nla_total_size(16) /* RTA_DST */
2198 + nla_total_size(16) /* RTA_GATEWAY */
2199 + nla_total_size(16) /* RTA_PREFSRC */
2200 + nla_total_size(4) /* RTA_TABLE */
2201 + nla_total_size(4) /* RTA_IIF */
2202 + nla_total_size(4) /* RTA_OIF */
2203 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2204 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2205 + nla_total_size(sizeof(struct rta_cacheinfo));
2206}
2207
191cd582
BH
2208static int rt6_fill_node(struct net *net,
2209 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2210 struct in6_addr *dst, struct in6_addr *src,
2211 int iif, int type, u32 pid, u32 seq,
7bc570c8 2212 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2213{
2214 struct rtmsg *rtm;
2d7202bf 2215 struct nlmsghdr *nlh;
e3703b3d 2216 long expires;
9e762a4a 2217 u32 table;
1da177e4
LT
2218
2219 if (prefix) { /* user wants prefix routes only */
2220 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2221 /* success since this is not a prefix route */
2222 return 1;
2223 }
2224 }
2225
2d7202bf
TG
2226 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2227 if (nlh == NULL)
26932566 2228 return -EMSGSIZE;
2d7202bf
TG
2229
2230 rtm = nlmsg_data(nlh);
1da177e4
LT
2231 rtm->rtm_family = AF_INET6;
2232 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2233 rtm->rtm_src_len = rt->rt6i_src.plen;
2234 rtm->rtm_tos = 0;
c71099ac 2235 if (rt->rt6i_table)
9e762a4a 2236 table = rt->rt6i_table->tb6_id;
c71099ac 2237 else
9e762a4a
PM
2238 table = RT6_TABLE_UNSPEC;
2239 rtm->rtm_table = table;
2d7202bf 2240 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2241 if (rt->rt6i_flags&RTF_REJECT)
2242 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2243 else if (rt->rt6i_flags&RTF_LOCAL)
2244 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2245 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2246 rtm->rtm_type = RTN_LOCAL;
2247 else
2248 rtm->rtm_type = RTN_UNICAST;
2249 rtm->rtm_flags = 0;
2250 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2251 rtm->rtm_protocol = rt->rt6i_protocol;
2252 if (rt->rt6i_flags&RTF_DYNAMIC)
2253 rtm->rtm_protocol = RTPROT_REDIRECT;
2254 else if (rt->rt6i_flags & RTF_ADDRCONF)
2255 rtm->rtm_protocol = RTPROT_KERNEL;
2256 else if (rt->rt6i_flags&RTF_DEFAULT)
2257 rtm->rtm_protocol = RTPROT_RA;
2258
2259 if (rt->rt6i_flags&RTF_CACHE)
2260 rtm->rtm_flags |= RTM_F_CLONED;
2261
2262 if (dst) {
2d7202bf 2263 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2264 rtm->rtm_dst_len = 128;
1da177e4 2265 } else if (rtm->rtm_dst_len)
2d7202bf 2266 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2267#ifdef CONFIG_IPV6_SUBTREES
2268 if (src) {
2d7202bf 2269 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2270 rtm->rtm_src_len = 128;
1da177e4 2271 } else if (rtm->rtm_src_len)
2d7202bf 2272 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2273#endif
7bc570c8
YH
2274 if (iif) {
2275#ifdef CONFIG_IPV6_MROUTE
2276 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2277 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2278 if (err <= 0) {
2279 if (!nowait) {
2280 if (err == 0)
2281 return 0;
2282 goto nla_put_failure;
2283 } else {
2284 if (err == -EMSGSIZE)
2285 goto nla_put_failure;
2286 }
2287 }
2288 } else
2289#endif
2290 NLA_PUT_U32(skb, RTA_IIF, iif);
2291 } else if (dst) {
d8d1f30b 2292 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2293 struct in6_addr saddr_buf;
191cd582 2294 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2295 dst, 0, &saddr_buf) == 0)
2d7202bf 2296 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2297 }
2d7202bf 2298
defb3519 2299 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2300 goto nla_put_failure;
2301
d8d1f30b
CG
2302 if (rt->dst.neighbour)
2303 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2304
d8d1f30b 2305 if (rt->dst.dev)
2d7202bf
TG
2306 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2307
2308 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2309
36e3deae
YH
2310 if (!(rt->rt6i_flags & RTF_EXPIRES))
2311 expires = 0;
2312 else if (rt->rt6i_expires - jiffies < INT_MAX)
2313 expires = rt->rt6i_expires - jiffies;
2314 else
2315 expires = INT_MAX;
69cdf8f9 2316
d8d1f30b
CG
2317 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2318 expires, rt->dst.error) < 0)
e3703b3d 2319 goto nla_put_failure;
2d7202bf
TG
2320
2321 return nlmsg_end(skb, nlh);
2322
2323nla_put_failure:
26932566
PM
2324 nlmsg_cancel(skb, nlh);
2325 return -EMSGSIZE;
1da177e4
LT
2326}
2327
1b43af54 2328int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2329{
2330 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2331 int prefix;
2332
2d7202bf
TG
2333 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2334 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2335 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2336 } else
2337 prefix = 0;
2338
191cd582
BH
2339 return rt6_fill_node(arg->net,
2340 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2341 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2342 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2343}
2344
c127ea2c 2345static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2346{
3b1e0a65 2347 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2348 struct nlattr *tb[RTA_MAX+1];
2349 struct rt6_info *rt;
1da177e4 2350 struct sk_buff *skb;
ab364a6f 2351 struct rtmsg *rtm;
1da177e4 2352 struct flowi fl;
ab364a6f 2353 int err, iif = 0;
1da177e4 2354
ab364a6f
TG
2355 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2356 if (err < 0)
2357 goto errout;
1da177e4 2358
ab364a6f 2359 err = -EINVAL;
1da177e4 2360 memset(&fl, 0, sizeof(fl));
1da177e4 2361
ab364a6f
TG
2362 if (tb[RTA_SRC]) {
2363 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2364 goto errout;
2365
2366 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2367 }
2368
2369 if (tb[RTA_DST]) {
2370 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2371 goto errout;
2372
2373 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2374 }
2375
2376 if (tb[RTA_IIF])
2377 iif = nla_get_u32(tb[RTA_IIF]);
2378
2379 if (tb[RTA_OIF])
2380 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2381
2382 if (iif) {
2383 struct net_device *dev;
5578689a 2384 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2385 if (!dev) {
2386 err = -ENODEV;
ab364a6f 2387 goto errout;
1da177e4
LT
2388 }
2389 }
2390
ab364a6f
TG
2391 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2392 if (skb == NULL) {
2393 err = -ENOBUFS;
2394 goto errout;
2395 }
1da177e4 2396
ab364a6f
TG
2397 /* Reserve room for dummy headers, this skb can pass
2398 through good chunk of routing engine.
2399 */
459a98ed 2400 skb_reset_mac_header(skb);
ab364a6f 2401 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2402
8a3edd80 2403 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2404 skb_dst_set(skb, &rt->dst);
1da177e4 2405
191cd582 2406 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2407 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2408 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2409 if (err < 0) {
ab364a6f
TG
2410 kfree_skb(skb);
2411 goto errout;
1da177e4
LT
2412 }
2413
5578689a 2414 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2415errout:
1da177e4 2416 return err;
1da177e4
LT
2417}
2418
86872cb5 2419void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2420{
2421 struct sk_buff *skb;
5578689a 2422 struct net *net = info->nl_net;
528c4ceb
DL
2423 u32 seq;
2424 int err;
2425
2426 err = -ENOBUFS;
2427 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2428
339bf98f 2429 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2430 if (skb == NULL)
2431 goto errout;
2432
191cd582 2433 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2434 event, info->pid, seq, 0, 0, 0);
26932566
PM
2435 if (err < 0) {
2436 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2437 WARN_ON(err == -EMSGSIZE);
2438 kfree_skb(skb);
2439 goto errout;
2440 }
1ce85fe4
PNA
2441 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2442 info->nlh, gfp_any());
2443 return;
21713ebc
TG
2444errout:
2445 if (err < 0)
5578689a 2446 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2447}
2448
8ed67789
DL
2449static int ip6_route_dev_notify(struct notifier_block *this,
2450 unsigned long event, void *data)
2451{
2452 struct net_device *dev = (struct net_device *)data;
c346dca1 2453 struct net *net = dev_net(dev);
8ed67789
DL
2454
2455 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2456 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2457 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2458#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2459 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2460 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2461 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2462 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2463#endif
2464 }
2465
2466 return NOTIFY_OK;
2467}
2468
1da177e4
LT
2469/*
2470 * /proc
2471 */
2472
2473#ifdef CONFIG_PROC_FS
2474
1da177e4
LT
2475struct rt6_proc_arg
2476{
2477 char *buffer;
2478 int offset;
2479 int length;
2480 int skip;
2481 int len;
2482};
2483
2484static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2485{
33120b30 2486 struct seq_file *m = p_arg;
1da177e4 2487
4b7a4274 2488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2489
2490#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2491 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2492#else
33120b30 2493 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2494#endif
2495
2496 if (rt->rt6i_nexthop) {
4b7a4274 2497 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2498 } else {
33120b30 2499 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2500 }
33120b30 2501 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2502 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2503 rt->dst.__use, rt->rt6i_flags,
33120b30 2504 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2505 return 0;
2506}
2507
33120b30 2508static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2509{
f3db4851
DL
2510 struct net *net = (struct net *)m->private;
2511 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2512 return 0;
2513}
1da177e4 2514
33120b30
AD
2515static int ipv6_route_open(struct inode *inode, struct file *file)
2516{
de05c557 2517 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2518}
2519
33120b30
AD
2520static const struct file_operations ipv6_route_proc_fops = {
2521 .owner = THIS_MODULE,
2522 .open = ipv6_route_open,
2523 .read = seq_read,
2524 .llseek = seq_lseek,
b6fcbdb4 2525 .release = single_release_net,
33120b30
AD
2526};
2527
1da177e4
LT
2528static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2529{
69ddb805 2530 struct net *net = (struct net *)seq->private;
1da177e4 2531 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2532 net->ipv6.rt6_stats->fib_nodes,
2533 net->ipv6.rt6_stats->fib_route_nodes,
2534 net->ipv6.rt6_stats->fib_rt_alloc,
2535 net->ipv6.rt6_stats->fib_rt_entries,
2536 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2537 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2538 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2539
2540 return 0;
2541}
2542
2543static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2544{
de05c557 2545 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2546}
2547
9a32144e 2548static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2549 .owner = THIS_MODULE,
2550 .open = rt6_stats_seq_open,
2551 .read = seq_read,
2552 .llseek = seq_lseek,
b6fcbdb4 2553 .release = single_release_net,
1da177e4
LT
2554};
2555#endif /* CONFIG_PROC_FS */
2556
2557#ifdef CONFIG_SYSCTL
2558
1da177e4 2559static
8d65af78 2560int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2561 void __user *buffer, size_t *lenp, loff_t *ppos)
2562{
5b7c931d
DL
2563 struct net *net = current->nsproxy->net_ns;
2564 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2565 if (write) {
8d65af78 2566 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2567 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2568 return 0;
2569 } else
2570 return -EINVAL;
2571}
2572
760f2d01 2573ctl_table ipv6_route_table_template[] = {
1ab1457c 2574 {
1da177e4 2575 .procname = "flush",
4990509f 2576 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2577 .maxlen = sizeof(int),
89c8b3a1 2578 .mode = 0200,
6d9f239a 2579 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2580 },
2581 {
1da177e4 2582 .procname = "gc_thresh",
9a7ec3a9 2583 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
6d9f239a 2586 .proc_handler = proc_dointvec,
1da177e4
LT
2587 },
2588 {
1da177e4 2589 .procname = "max_size",
4990509f 2590 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
6d9f239a 2593 .proc_handler = proc_dointvec,
1da177e4
LT
2594 },
2595 {
1da177e4 2596 .procname = "gc_min_interval",
4990509f 2597 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
6d9f239a 2600 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2601 },
2602 {
1da177e4 2603 .procname = "gc_timeout",
4990509f 2604 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2605 .maxlen = sizeof(int),
2606 .mode = 0644,
6d9f239a 2607 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2608 },
2609 {
1da177e4 2610 .procname = "gc_interval",
4990509f 2611 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2612 .maxlen = sizeof(int),
2613 .mode = 0644,
6d9f239a 2614 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2615 },
2616 {
1da177e4 2617 .procname = "gc_elasticity",
4990509f 2618 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2619 .maxlen = sizeof(int),
2620 .mode = 0644,
f3d3f616 2621 .proc_handler = proc_dointvec,
1da177e4
LT
2622 },
2623 {
1da177e4 2624 .procname = "mtu_expires",
4990509f 2625 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2626 .maxlen = sizeof(int),
2627 .mode = 0644,
6d9f239a 2628 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2629 },
2630 {
1da177e4 2631 .procname = "min_adv_mss",
4990509f 2632 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2633 .maxlen = sizeof(int),
2634 .mode = 0644,
f3d3f616 2635 .proc_handler = proc_dointvec,
1da177e4
LT
2636 },
2637 {
1da177e4 2638 .procname = "gc_min_interval_ms",
4990509f 2639 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2640 .maxlen = sizeof(int),
2641 .mode = 0644,
6d9f239a 2642 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2643 },
f8572d8f 2644 { }
1da177e4
LT
2645};
2646
2c8c1e72 2647struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2648{
2649 struct ctl_table *table;
2650
2651 table = kmemdup(ipv6_route_table_template,
2652 sizeof(ipv6_route_table_template),
2653 GFP_KERNEL);
5ee09105
YH
2654
2655 if (table) {
2656 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2657 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2658 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2659 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2661 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2662 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2663 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2664 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2665 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2666 }
2667
760f2d01
DL
2668 return table;
2669}
1da177e4
LT
2670#endif
2671
2c8c1e72 2672static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2673{
633d424b 2674 int ret = -ENOMEM;
8ed67789 2675
86393e52
AD
2676 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2677 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2678
fc66f95c
ED
2679 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2680 goto out_ip6_dst_ops;
2681
8ed67789
DL
2682 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2683 sizeof(*net->ipv6.ip6_null_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_null_entry)
fc66f95c 2686 goto out_ip6_dst_entries;
d8d1f30b 2687 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2688 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2690 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2691 ip6_template_metrics, true);
8ed67789
DL
2692
2693#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2694 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2695 sizeof(*net->ipv6.ip6_prohibit_entry),
2696 GFP_KERNEL);
68fffc67
PZ
2697 if (!net->ipv6.ip6_prohibit_entry)
2698 goto out_ip6_null_entry;
d8d1f30b 2699 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2700 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2701 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2702 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2703 ip6_template_metrics, true);
8ed67789
DL
2704
2705 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2706 sizeof(*net->ipv6.ip6_blk_hole_entry),
2707 GFP_KERNEL);
68fffc67
PZ
2708 if (!net->ipv6.ip6_blk_hole_entry)
2709 goto out_ip6_prohibit_entry;
d8d1f30b 2710 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2711 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2712 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2713 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2714 ip6_template_metrics, true);
8ed67789
DL
2715#endif
2716
b339a47c
PZ
2717 net->ipv6.sysctl.flush_delay = 0;
2718 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2719 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2720 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2721 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2722 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2723 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2724 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2725
cdb18761
DL
2726#ifdef CONFIG_PROC_FS
2727 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2728 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2729#endif
6891a346
BT
2730 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2731
8ed67789
DL
2732 ret = 0;
2733out:
2734 return ret;
f2fc6a54 2735
68fffc67
PZ
2736#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2737out_ip6_prohibit_entry:
2738 kfree(net->ipv6.ip6_prohibit_entry);
2739out_ip6_null_entry:
2740 kfree(net->ipv6.ip6_null_entry);
2741#endif
fc66f95c
ED
2742out_ip6_dst_entries:
2743 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2744out_ip6_dst_ops:
f2fc6a54 2745 goto out;
cdb18761
DL
2746}
2747
2c8c1e72 2748static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2749{
2750#ifdef CONFIG_PROC_FS
2751 proc_net_remove(net, "ipv6_route");
2752 proc_net_remove(net, "rt6_stats");
2753#endif
8ed67789
DL
2754 kfree(net->ipv6.ip6_null_entry);
2755#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2756 kfree(net->ipv6.ip6_prohibit_entry);
2757 kfree(net->ipv6.ip6_blk_hole_entry);
2758#endif
41bb78b4 2759 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2760}
2761
2762static struct pernet_operations ip6_route_net_ops = {
2763 .init = ip6_route_net_init,
2764 .exit = ip6_route_net_exit,
2765};
2766
8ed67789
DL
2767static struct notifier_block ip6_route_dev_notifier = {
2768 .notifier_call = ip6_route_dev_notify,
2769 .priority = 0,
2770};
2771
433d49c3 2772int __init ip6_route_init(void)
1da177e4 2773{
433d49c3
DL
2774 int ret;
2775
9a7ec3a9
DL
2776 ret = -ENOMEM;
2777 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2778 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2779 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2780 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2781 goto out;
14e50e57 2782
fc66f95c 2783 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2784 if (ret)
bdb3289f 2785 goto out_kmem_cache;
bdb3289f 2786
fc66f95c
ED
2787 ret = register_pernet_subsys(&ip6_route_net_ops);
2788 if (ret)
2789 goto out_dst_entries;
2790
5dc121e9
AE
2791 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2792
8ed67789
DL
2793 /* Registering of the loopback is done before this portion of code,
2794 * the loopback reference in rt6_info will not be taken, do it
2795 * manually for init_net */
d8d1f30b 2796 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2797 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2798 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2799 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2800 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2801 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2802 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2803 #endif
433d49c3
DL
2804 ret = fib6_init();
2805 if (ret)
8ed67789 2806 goto out_register_subsys;
433d49c3 2807
433d49c3
DL
2808 ret = xfrm6_init();
2809 if (ret)
cdb18761 2810 goto out_fib6_init;
c35b7e72 2811
433d49c3
DL
2812 ret = fib6_rules_init();
2813 if (ret)
2814 goto xfrm6_init;
7e5449c2 2815
433d49c3
DL
2816 ret = -ENOBUFS;
2817 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2818 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2819 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2820 goto fib6_rules_init;
c127ea2c 2821
8ed67789 2822 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2823 if (ret)
2824 goto fib6_rules_init;
8ed67789 2825
433d49c3
DL
2826out:
2827 return ret;
2828
2829fib6_rules_init:
433d49c3
DL
2830 fib6_rules_cleanup();
2831xfrm6_init:
433d49c3 2832 xfrm6_fini();
433d49c3 2833out_fib6_init:
433d49c3 2834 fib6_gc_cleanup();
8ed67789
DL
2835out_register_subsys:
2836 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2837out_dst_entries:
2838 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2839out_kmem_cache:
f2fc6a54 2840 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2841 goto out;
1da177e4
LT
2842}
2843
2844void ip6_route_cleanup(void)
2845{
8ed67789 2846 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2847 fib6_rules_cleanup();
1da177e4 2848 xfrm6_fini();
1da177e4 2849 fib6_gc_cleanup();
8ed67789 2850 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2851 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2852 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2853}
This page took 0.897046 seconds and 5 git commands to generate.