Merge branch 'omap-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
0dbaee3b 106 .default_advmss = ip6_default_advmss,
d33e4553 107 .default_mtu = ip6_default_mtu,
1da177e4
LT
108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 113 .local_out = __ip6_local_out,
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
09640e63 122 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
126};
127
bdb3289f 128static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
d8d1f30b
CG
134 .input = ip6_pkt_discard,
135 .output = ip6_pkt_discard_out,
1da177e4
LT
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 138 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
101367c2
TG
143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
6723ab54
DM
145static int ip6_pkt_prohibit(struct sk_buff *skb);
146static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 147
280a34c8 148static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
149 .dst = {
150 .__refcnt = ATOMIC_INIT(1),
151 .__use = 1,
152 .obsolete = -1,
153 .error = -EACCES,
d8d1f30b
CG
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
101367c2
TG
156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 158 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
bdb3289f 163static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
d8d1f30b
CG
169 .input = dst_discard,
170 .output = dst_discard,
101367c2
TG
171 },
172 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 173 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
174 .rt6i_metric = ~(u32) 0,
175 .rt6i_ref = ATOMIC_INIT(1),
176};
177
178#endif
179
1da177e4 180/* allocate dst with ip6_dst_ops */
f2fc6a54 181static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 182{
f2fc6a54 183 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
184}
185
186static void ip6_dst_destroy(struct dst_entry *dst)
187{
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 190 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
1ab1457c 195 }
b3419363
DM
196 if (peer) {
197 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
198 rt->rt6i_peer = NULL;
199 inet_putpeer(peer);
200 }
201}
202
203void rt6_bind_peer(struct rt6_info *rt, int create)
204{
205 struct inet_peer *peer;
206
207 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
208 return;
209
210 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
211 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
212 inet_putpeer(peer);
1da177e4
LT
213}
214
215static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
216 int how)
217{
218 struct rt6_info *rt = (struct rt6_info *)dst;
219 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 220 struct net_device *loopback_dev =
c346dca1 221 dev_net(dev)->loopback_dev;
1da177e4 222
5a3e55d6
DL
223 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
224 struct inet6_dev *loopback_idev =
225 in6_dev_get(loopback_dev);
1da177e4
LT
226 if (loopback_idev != NULL) {
227 rt->rt6i_idev = loopback_idev;
228 in6_dev_put(idev);
229 }
230 }
231}
232
233static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234{
a02cec21
ED
235 return (rt->rt6i_flags & RTF_EXPIRES) &&
236 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
237}
238
c71099ac
TG
239static inline int rt6_need_strict(struct in6_addr *daddr)
240{
a02cec21
ED
241 return ipv6_addr_type(daddr) &
242 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
243}
244
1da177e4 245/*
c71099ac 246 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
247 */
248
8ed67789
DL
249static inline struct rt6_info *rt6_device_match(struct net *net,
250 struct rt6_info *rt,
dd3abc4e 251 struct in6_addr *saddr,
1da177e4 252 int oif,
d420895e 253 int flags)
1da177e4
LT
254{
255 struct rt6_info *local = NULL;
256 struct rt6_info *sprt;
257
dd3abc4e
YH
258 if (!oif && ipv6_addr_any(saddr))
259 goto out;
260
d8d1f30b 261 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
262 struct net_device *dev = sprt->rt6i_dev;
263
264 if (oif) {
1da177e4
LT
265 if (dev->ifindex == oif)
266 return sprt;
267 if (dev->flags & IFF_LOOPBACK) {
268 if (sprt->rt6i_idev == NULL ||
269 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 270 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 271 continue;
1ab1457c 272 if (local && (!oif ||
1da177e4
LT
273 local->rt6i_idev->dev->ifindex == oif))
274 continue;
275 }
276 local = sprt;
277 }
dd3abc4e
YH
278 } else {
279 if (ipv6_chk_addr(net, saddr, dev,
280 flags & RT6_LOOKUP_F_IFACE))
281 return sprt;
1da177e4 282 }
dd3abc4e 283 }
1da177e4 284
dd3abc4e 285 if (oif) {
1da177e4
LT
286 if (local)
287 return local;
288
d420895e 289 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 290 return net->ipv6.ip6_null_entry;
1da177e4 291 }
dd3abc4e 292out:
1da177e4
LT
293 return rt;
294}
295
27097255
YH
296#ifdef CONFIG_IPV6_ROUTER_PREF
297static void rt6_probe(struct rt6_info *rt)
298{
299 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
300 /*
301 * Okay, this does not seem to be appropriate
302 * for now, however, we need to check if it
303 * is really so; aka Router Reachability Probing.
304 *
305 * Router Reachability Probe MUST be rate-limited
306 * to no more than one per minute.
307 */
308 if (!neigh || (neigh->nud_state & NUD_VALID))
309 return;
310 read_lock_bh(&neigh->lock);
311 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 312 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
313 struct in6_addr mcaddr;
314 struct in6_addr *target;
315
316 neigh->updated = jiffies;
317 read_unlock_bh(&neigh->lock);
318
319 target = (struct in6_addr *)&neigh->primary_key;
320 addrconf_addr_solict_mult(target, &mcaddr);
321 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
322 } else
323 read_unlock_bh(&neigh->lock);
324}
325#else
326static inline void rt6_probe(struct rt6_info *rt)
327{
27097255
YH
328}
329#endif
330
1da177e4 331/*
554cfb7e 332 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 333 */
b6f99a21 334static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
335{
336 struct net_device *dev = rt->rt6i_dev;
161980f4 337 if (!oif || dev->ifindex == oif)
554cfb7e 338 return 2;
161980f4
DM
339 if ((dev->flags & IFF_LOOPBACK) &&
340 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
341 return 1;
342 return 0;
554cfb7e 343}
1da177e4 344
b6f99a21 345static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 346{
554cfb7e 347 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 348 int m;
4d0c5911
YH
349 if (rt->rt6i_flags & RTF_NONEXTHOP ||
350 !(rt->rt6i_flags & RTF_GATEWAY))
351 m = 1;
352 else if (neigh) {
554cfb7e
YH
353 read_lock_bh(&neigh->lock);
354 if (neigh->nud_state & NUD_VALID)
4d0c5911 355 m = 2;
398bcbeb
YH
356#ifdef CONFIG_IPV6_ROUTER_PREF
357 else if (neigh->nud_state & NUD_FAILED)
358 m = 0;
359#endif
360 else
ea73ee23 361 m = 1;
554cfb7e 362 read_unlock_bh(&neigh->lock);
398bcbeb
YH
363 } else
364 m = 0;
554cfb7e 365 return m;
1da177e4
LT
366}
367
554cfb7e
YH
368static int rt6_score_route(struct rt6_info *rt, int oif,
369 int strict)
1da177e4 370{
4d0c5911 371 int m, n;
1ab1457c 372
4d0c5911 373 m = rt6_check_dev(rt, oif);
77d16f45 374 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 375 return -1;
ebacaaa0
YH
376#ifdef CONFIG_IPV6_ROUTER_PREF
377 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
378#endif
4d0c5911 379 n = rt6_check_neigh(rt);
557e92ef 380 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
381 return -1;
382 return m;
383}
384
f11e6659
DM
385static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
386 int *mpri, struct rt6_info *match)
554cfb7e 387{
f11e6659
DM
388 int m;
389
390 if (rt6_check_expired(rt))
391 goto out;
392
393 m = rt6_score_route(rt, oif, strict);
394 if (m < 0)
395 goto out;
396
397 if (m > *mpri) {
398 if (strict & RT6_LOOKUP_F_REACHABLE)
399 rt6_probe(match);
400 *mpri = m;
401 match = rt;
402 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
403 rt6_probe(rt);
404 }
405
406out:
407 return match;
408}
409
410static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
411 struct rt6_info *rr_head,
412 u32 metric, int oif, int strict)
413{
414 struct rt6_info *rt, *match;
554cfb7e 415 int mpri = -1;
1da177e4 416
f11e6659
DM
417 match = NULL;
418 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 419 rt = rt->dst.rt6_next)
f11e6659
DM
420 match = find_match(rt, oif, strict, &mpri, match);
421 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 422 rt = rt->dst.rt6_next)
f11e6659 423 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 424
f11e6659
DM
425 return match;
426}
1da177e4 427
f11e6659
DM
428static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
429{
430 struct rt6_info *match, *rt0;
8ed67789 431 struct net *net;
1da177e4 432
f11e6659 433 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 434 __func__, fn->leaf, oif);
554cfb7e 435
f11e6659
DM
436 rt0 = fn->rr_ptr;
437 if (!rt0)
438 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 439
f11e6659 440 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 441
554cfb7e 442 if (!match &&
f11e6659 443 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 444 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 445
554cfb7e 446 /* no entries matched; do round-robin */
f11e6659
DM
447 if (!next || next->rt6i_metric != rt0->rt6i_metric)
448 next = fn->leaf;
449
450 if (next != rt0)
451 fn->rr_ptr = next;
1da177e4 452 }
1da177e4 453
f11e6659 454 RT6_TRACE("%s() => %p\n",
0dc47877 455 __func__, match);
1da177e4 456
c346dca1 457 net = dev_net(rt0->rt6i_dev);
a02cec21 458 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
459}
460
70ceb4f5
YH
461#ifdef CONFIG_IPV6_ROUTE_INFO
462int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
463 struct in6_addr *gwaddr)
464{
c346dca1 465 struct net *net = dev_net(dev);
70ceb4f5
YH
466 struct route_info *rinfo = (struct route_info *) opt;
467 struct in6_addr prefix_buf, *prefix;
468 unsigned int pref;
4bed72e4 469 unsigned long lifetime;
70ceb4f5
YH
470 struct rt6_info *rt;
471
472 if (len < sizeof(struct route_info)) {
473 return -EINVAL;
474 }
475
476 /* Sanity check for prefix_len and length */
477 if (rinfo->length > 3) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 128) {
480 return -EINVAL;
481 } else if (rinfo->prefix_len > 64) {
482 if (rinfo->length < 2) {
483 return -EINVAL;
484 }
485 } else if (rinfo->prefix_len > 0) {
486 if (rinfo->length < 1) {
487 return -EINVAL;
488 }
489 }
490
491 pref = rinfo->route_pref;
492 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 493 return -EINVAL;
70ceb4f5 494
4bed72e4 495 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
496
497 if (rinfo->length == 3)
498 prefix = (struct in6_addr *)rinfo->prefix;
499 else {
500 /* this function is safe */
501 ipv6_addr_prefix(&prefix_buf,
502 (struct in6_addr *)rinfo->prefix,
503 rinfo->prefix_len);
504 prefix = &prefix_buf;
505 }
506
efa2cea0
DL
507 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
508 dev->ifindex);
70ceb4f5
YH
509
510 if (rt && !lifetime) {
e0a1ad73 511 ip6_del_rt(rt);
70ceb4f5
YH
512 rt = NULL;
513 }
514
515 if (!rt && lifetime)
efa2cea0 516 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
517 pref);
518 else if (rt)
519 rt->rt6i_flags = RTF_ROUTEINFO |
520 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
521
522 if (rt) {
4bed72e4 523 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
524 rt->rt6i_flags &= ~RTF_EXPIRES;
525 } else {
526 rt->rt6i_expires = jiffies + HZ * lifetime;
527 rt->rt6i_flags |= RTF_EXPIRES;
528 }
d8d1f30b 529 dst_release(&rt->dst);
70ceb4f5
YH
530 }
531 return 0;
532}
533#endif
534
8ed67789 535#define BACKTRACK(__net, saddr) \
982f56f3 536do { \
8ed67789 537 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 538 struct fib6_node *pn; \
e0eda7bb 539 while (1) { \
982f56f3
YH
540 if (fn->fn_flags & RTN_TL_ROOT) \
541 goto out; \
542 pn = fn->parent; \
543 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 544 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
545 else \
546 fn = pn; \
547 if (fn->fn_flags & RTN_RTINFO) \
548 goto restart; \
c71099ac 549 } \
c71099ac 550 } \
982f56f3 551} while(0)
c71099ac 552
8ed67789
DL
553static struct rt6_info *ip6_pol_route_lookup(struct net *net,
554 struct fib6_table *table,
c71099ac 555 struct flowi *fl, int flags)
1da177e4
LT
556{
557 struct fib6_node *fn;
558 struct rt6_info *rt;
559
c71099ac
TG
560 read_lock_bh(&table->tb6_lock);
561 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
562restart:
563 rt = fn->leaf;
dd3abc4e 564 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 565 BACKTRACK(net, &fl->fl6_src);
c71099ac 566out:
d8d1f30b 567 dst_use(&rt->dst, jiffies);
c71099ac 568 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
569 return rt;
570
571}
572
9acd9f3a
YH
573struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
574 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
575{
576 struct flowi fl = {
577 .oif = oif,
5811662b 578 .fl6_dst = *daddr,
c71099ac
TG
579 };
580 struct dst_entry *dst;
77d16f45 581 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 582
adaa70bb
TG
583 if (saddr) {
584 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
585 flags |= RT6_LOOKUP_F_HAS_SADDR;
586 }
587
606a2b48 588 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
589 if (dst->error == 0)
590 return (struct rt6_info *) dst;
591
592 dst_release(dst);
593
1da177e4
LT
594 return NULL;
595}
596
7159039a
YH
597EXPORT_SYMBOL(rt6_lookup);
598
c71099ac 599/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
600 It takes new route entry, the addition fails by any reason the
601 route is freed. In any case, if caller does not hold it, it may
602 be destroyed.
603 */
604
86872cb5 605static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
606{
607 int err;
c71099ac 608 struct fib6_table *table;
1da177e4 609
c71099ac
TG
610 table = rt->rt6i_table;
611 write_lock_bh(&table->tb6_lock);
86872cb5 612 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 613 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
614
615 return err;
616}
617
40e22e8f
TG
618int ip6_ins_rt(struct rt6_info *rt)
619{
4d1169c1 620 struct nl_info info = {
c346dca1 621 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 622 };
528c4ceb 623 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
624}
625
95a9a5ba
YH
626static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
627 struct in6_addr *saddr)
1da177e4 628{
1da177e4
LT
629 struct rt6_info *rt;
630
631 /*
632 * Clone the route.
633 */
634
635 rt = ip6_rt_copy(ort);
636
637 if (rt) {
14deae41
DM
638 struct neighbour *neigh;
639 int attempts = !in_softirq();
640
58c4fb86
YH
641 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
642 if (rt->rt6i_dst.plen != 128 &&
643 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
644 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 645 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 646 }
1da177e4 647
58c4fb86 648 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
649 rt->rt6i_dst.plen = 128;
650 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 651 rt->dst.flags |= DST_HOST;
1da177e4
LT
652
653#ifdef CONFIG_IPV6_SUBTREES
654 if (rt->rt6i_src.plen && saddr) {
655 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
656 rt->rt6i_src.plen = 128;
657 }
658#endif
659
14deae41
DM
660 retry:
661 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
662 if (IS_ERR(neigh)) {
663 struct net *net = dev_net(rt->rt6i_dev);
664 int saved_rt_min_interval =
665 net->ipv6.sysctl.ip6_rt_gc_min_interval;
666 int saved_rt_elasticity =
667 net->ipv6.sysctl.ip6_rt_gc_elasticity;
668
669 if (attempts-- > 0) {
670 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
671 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
672
86393e52 673 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
674
675 net->ipv6.sysctl.ip6_rt_gc_elasticity =
676 saved_rt_elasticity;
677 net->ipv6.sysctl.ip6_rt_gc_min_interval =
678 saved_rt_min_interval;
679 goto retry;
680 }
681
682 if (net_ratelimit())
683 printk(KERN_WARNING
7e1b33e5 684 "ipv6: Neighbour table overflow.\n");
d8d1f30b 685 dst_free(&rt->dst);
14deae41
DM
686 return NULL;
687 }
688 rt->rt6i_nexthop = neigh;
1da177e4 689
95a9a5ba 690 }
1da177e4 691
95a9a5ba
YH
692 return rt;
693}
1da177e4 694
299d9939
YH
695static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
696{
697 struct rt6_info *rt = ip6_rt_copy(ort);
698 if (rt) {
699 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
700 rt->rt6i_dst.plen = 128;
701 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 702 rt->dst.flags |= DST_HOST;
299d9939
YH
703 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
704 }
705 return rt;
706}
707
8ed67789
DL
708static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
709 struct flowi *fl, int flags)
1da177e4
LT
710{
711 struct fib6_node *fn;
519fbd87 712 struct rt6_info *rt, *nrt;
c71099ac 713 int strict = 0;
1da177e4 714 int attempts = 3;
519fbd87 715 int err;
53b7997f 716 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 717
77d16f45 718 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
719
720relookup:
c71099ac 721 read_lock_bh(&table->tb6_lock);
1da177e4 722
8238dd06 723restart_2:
c71099ac 724 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
725
726restart:
4acad72d 727 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
728
729 BACKTRACK(net, &fl->fl6_src);
730 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 731 rt->rt6i_flags & RTF_CACHE)
1ddef044 732 goto out;
1da177e4 733
d8d1f30b 734 dst_hold(&rt->dst);
c71099ac 735 read_unlock_bh(&table->tb6_lock);
fb9de91e 736
519fbd87 737 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 738 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 739 else
c71099ac 740 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 741
d8d1f30b 742 dst_release(&rt->dst);
8ed67789 743 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 744
d8d1f30b 745 dst_hold(&rt->dst);
519fbd87 746 if (nrt) {
40e22e8f 747 err = ip6_ins_rt(nrt);
519fbd87 748 if (!err)
1da177e4 749 goto out2;
1da177e4 750 }
1da177e4 751
519fbd87
YH
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
c71099ac 756 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
757 * released someone could insert this route. Relookup.
758 */
d8d1f30b 759 dst_release(&rt->dst);
519fbd87
YH
760 goto relookup;
761
762out:
8238dd06
YH
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
d8d1f30b 767 dst_hold(&rt->dst);
c71099ac 768 read_unlock_bh(&table->tb6_lock);
1da177e4 769out2:
d8d1f30b
CG
770 rt->dst.lastuse = jiffies;
771 rt->dst.__use++;
c71099ac
TG
772
773 return rt;
1da177e4
LT
774}
775
8ed67789 776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
777 struct flowi *fl, int flags)
778{
8ed67789 779 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
780}
781
c71099ac
TG
782void ip6_route_input(struct sk_buff *skb)
783{
0660e03f 784 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 785 struct net *net = dev_net(skb->dev);
adaa70bb 786 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
5811662b
CG
789 .fl6_dst = iph->daddr,
790 .fl6_src = iph->saddr,
791 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 792 .mark = skb->mark,
c71099ac
TG
793 .proto = iph->nexthdr,
794 };
adaa70bb 795
1d6e55f1 796 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 797 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 798
adf30907 799 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
800}
801
8ed67789 802static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 803 struct flowi *fl, int flags)
1da177e4 804{
8ed67789 805 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
806}
807
4591db4f
DL
808struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
809 struct flowi *fl)
c71099ac
TG
810{
811 int flags = 0;
812
6057fd78 813 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 814 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 815
adaa70bb
TG
816 if (!ipv6_addr_any(&fl->fl6_src))
817 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
818 else if (sk)
819 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 820
4591db4f 821 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
822}
823
7159039a 824EXPORT_SYMBOL(ip6_route_output);
1da177e4 825
14e50e57
DM
826int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
827{
828 struct rt6_info *ort = (struct rt6_info *) *dstp;
829 struct rt6_info *rt = (struct rt6_info *)
830 dst_alloc(&ip6_dst_blackhole_ops);
831 struct dst_entry *new = NULL;
832
833 if (rt) {
d8d1f30b 834 new = &rt->dst;
14e50e57
DM
835
836 atomic_set(&new->__refcnt, 1);
837 new->__use = 1;
352e512c
HX
838 new->input = dst_discard;
839 new->output = dst_discard;
14e50e57 840
defb3519 841 dst_copy_metrics(new, &ort->dst);
d8d1f30b 842 new->dev = ort->dst.dev;
14e50e57
DM
843 if (new->dev)
844 dev_hold(new->dev);
845 rt->rt6i_idev = ort->rt6i_idev;
846 if (rt->rt6i_idev)
847 in6_dev_hold(rt->rt6i_idev);
848 rt->rt6i_expires = 0;
849
850 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
851 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
852 rt->rt6i_metric = 0;
853
854 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
855#ifdef CONFIG_IPV6_SUBTREES
856 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
857#endif
858
859 dst_free(new);
860 }
861
862 dst_release(*dstp);
863 *dstp = new;
a02cec21 864 return new ? 0 : -ENOMEM;
14e50e57
DM
865}
866EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
867
1da177e4
LT
868/*
869 * Destination cache support functions
870 */
871
872static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
873{
874 struct rt6_info *rt;
875
876 rt = (struct rt6_info *) dst;
877
10414444 878 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
879 return dst;
880
881 return NULL;
882}
883
884static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
885{
886 struct rt6_info *rt = (struct rt6_info *) dst;
887
888 if (rt) {
54c1a859
YH
889 if (rt->rt6i_flags & RTF_CACHE) {
890 if (rt6_check_expired(rt)) {
891 ip6_del_rt(rt);
892 dst = NULL;
893 }
894 } else {
1da177e4 895 dst_release(dst);
54c1a859
YH
896 dst = NULL;
897 }
1da177e4 898 }
54c1a859 899 return dst;
1da177e4
LT
900}
901
902static void ip6_link_failure(struct sk_buff *skb)
903{
904 struct rt6_info *rt;
905
3ffe533c 906 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 907
adf30907 908 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
909 if (rt) {
910 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 911 dst_set_expires(&rt->dst, 0);
1da177e4
LT
912 rt->rt6i_flags |= RTF_EXPIRES;
913 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
914 rt->rt6i_node->fn_sernum = -1;
915 }
916}
917
918static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
919{
920 struct rt6_info *rt6 = (struct rt6_info*)dst;
921
922 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
923 rt6->rt6i_flags |= RTF_MODIFIED;
924 if (mtu < IPV6_MIN_MTU) {
defb3519 925 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 926 mtu = IPV6_MIN_MTU;
defb3519
DM
927 features |= RTAX_FEATURE_ALLFRAG;
928 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 929 }
defb3519 930 dst_metric_set(dst, RTAX_MTU, mtu);
8d71740c 931 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
932 }
933}
934
0dbaee3b 935static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 936{
0dbaee3b
DM
937 struct net_device *dev = dst->dev;
938 unsigned int mtu = dst_mtu(dst);
939 struct net *net = dev_net(dev);
940
1da177e4
LT
941 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
942
5578689a
DL
943 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
944 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
945
946 /*
1ab1457c
YH
947 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
948 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
949 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
950 * rely only on pmtu discovery"
951 */
952 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
953 mtu = IPV6_MAXPLEN;
954 return mtu;
955}
956
d33e4553
DM
957static unsigned int ip6_default_mtu(const struct dst_entry *dst)
958{
959 unsigned int mtu = IPV6_MIN_MTU;
960 struct inet6_dev *idev;
961
962 rcu_read_lock();
963 idev = __in6_dev_get(dst->dev);
964 if (idev)
965 mtu = idev->cnf.mtu6;
966 rcu_read_unlock();
967
968 return mtu;
969}
970
3b00944c
YH
971static struct dst_entry *icmp6_dst_gc_list;
972static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 973
3b00944c 974struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 975 struct neighbour *neigh,
9acd9f3a 976 const struct in6_addr *addr)
1da177e4
LT
977{
978 struct rt6_info *rt;
979 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 980 struct net *net = dev_net(dev);
1da177e4
LT
981
982 if (unlikely(idev == NULL))
983 return NULL;
984
86393e52 985 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
986 if (unlikely(rt == NULL)) {
987 in6_dev_put(idev);
988 goto out;
989 }
990
991 dev_hold(dev);
992 if (neigh)
993 neigh_hold(neigh);
14deae41 994 else {
1da177e4 995 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
996 if (IS_ERR(neigh))
997 neigh = NULL;
998 }
1da177e4
LT
999
1000 rt->rt6i_dev = dev;
1001 rt->rt6i_idev = idev;
1002 rt->rt6i_nexthop = neigh;
d8d1f30b 1003 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1004 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1005 rt->dst.output = ip6_output;
1da177e4
LT
1006
1007#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1008 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1009 ? DST_HOST
1da177e4
LT
1010 : 0;
1011 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1012 rt->rt6i_dst.plen = 128;
1013#endif
1014
3b00944c 1015 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1016 rt->dst.next = icmp6_dst_gc_list;
1017 icmp6_dst_gc_list = &rt->dst;
3b00944c 1018 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1019
5578689a 1020 fib6_force_start_gc(net);
1da177e4
LT
1021
1022out:
d8d1f30b 1023 return &rt->dst;
1da177e4
LT
1024}
1025
3d0f24a7 1026int icmp6_dst_gc(void)
1da177e4
LT
1027{
1028 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1029 int more = 0;
1da177e4
LT
1030
1031 next = NULL;
5d0bbeeb 1032
3b00944c
YH
1033 spin_lock_bh(&icmp6_dst_lock);
1034 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1035
1da177e4
LT
1036 while ((dst = *pprev) != NULL) {
1037 if (!atomic_read(&dst->__refcnt)) {
1038 *pprev = dst->next;
1039 dst_free(dst);
1da177e4
LT
1040 } else {
1041 pprev = &dst->next;
3d0f24a7 1042 ++more;
1da177e4
LT
1043 }
1044 }
1045
3b00944c 1046 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1047
3d0f24a7 1048 return more;
1da177e4
LT
1049}
1050
1e493d19
DM
1051static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1052 void *arg)
1053{
1054 struct dst_entry *dst, **pprev;
1055
1056 spin_lock_bh(&icmp6_dst_lock);
1057 pprev = &icmp6_dst_gc_list;
1058 while ((dst = *pprev) != NULL) {
1059 struct rt6_info *rt = (struct rt6_info *) dst;
1060 if (func(rt, arg)) {
1061 *pprev = dst->next;
1062 dst_free(dst);
1063 } else {
1064 pprev = &dst->next;
1065 }
1066 }
1067 spin_unlock_bh(&icmp6_dst_lock);
1068}
1069
569d3645 1070static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1071{
1da177e4 1072 unsigned long now = jiffies;
86393e52 1073 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1074 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1075 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1076 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1077 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1078 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1079 int entries;
7019b78e 1080
fc66f95c 1081 entries = dst_entries_get_fast(ops);
7019b78e 1082 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1083 entries <= rt_max_size)
1da177e4
LT
1084 goto out;
1085
6891a346
BT
1086 net->ipv6.ip6_rt_gc_expire++;
1087 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1088 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1089 entries = dst_entries_get_slow(ops);
1090 if (entries < ops->gc_thresh)
7019b78e 1091 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1092out:
7019b78e 1093 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1094 return entries > rt_max_size;
1da177e4
LT
1095}
1096
1097/* Clean host part of a prefix. Not necessary in radix tree,
1098 but results in cleaner routing tables.
1099
1100 Remove it only when all the things will work!
1101 */
1102
6b75d090 1103int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1104{
5170ae82 1105 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1106 if (hoplimit == 0) {
6b75d090 1107 struct net_device *dev = dst->dev;
c68f24cc
ED
1108 struct inet6_dev *idev;
1109
1110 rcu_read_lock();
1111 idev = __in6_dev_get(dev);
1112 if (idev)
6b75d090 1113 hoplimit = idev->cnf.hop_limit;
c68f24cc 1114 else
53b7997f 1115 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1116 rcu_read_unlock();
1da177e4
LT
1117 }
1118 return hoplimit;
1119}
abbf46ae 1120EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1121
1122/*
1123 *
1124 */
1125
86872cb5 1126int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1127{
1128 int err;
5578689a 1129 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1130 struct rt6_info *rt = NULL;
1131 struct net_device *dev = NULL;
1132 struct inet6_dev *idev = NULL;
c71099ac 1133 struct fib6_table *table;
1da177e4
LT
1134 int addr_type;
1135
86872cb5 1136 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1137 return -EINVAL;
1138#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1139 if (cfg->fc_src_len)
1da177e4
LT
1140 return -EINVAL;
1141#endif
86872cb5 1142 if (cfg->fc_ifindex) {
1da177e4 1143 err = -ENODEV;
5578689a 1144 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1145 if (!dev)
1146 goto out;
1147 idev = in6_dev_get(dev);
1148 if (!idev)
1149 goto out;
1150 }
1151
86872cb5
TG
1152 if (cfg->fc_metric == 0)
1153 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1154
5578689a 1155 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1156 if (table == NULL) {
1157 err = -ENOBUFS;
1158 goto out;
1159 }
1160
86393e52 1161 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1162
1163 if (rt == NULL) {
1164 err = -ENOMEM;
1165 goto out;
1166 }
1167
d8d1f30b 1168 rt->dst.obsolete = -1;
6f704992
YH
1169 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1170 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1171 0;
1da177e4 1172
86872cb5
TG
1173 if (cfg->fc_protocol == RTPROT_UNSPEC)
1174 cfg->fc_protocol = RTPROT_BOOT;
1175 rt->rt6i_protocol = cfg->fc_protocol;
1176
1177 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1178
1179 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1180 rt->dst.input = ip6_mc_input;
ab79ad14
1181 else if (cfg->fc_flags & RTF_LOCAL)
1182 rt->dst.input = ip6_input;
1da177e4 1183 else
d8d1f30b 1184 rt->dst.input = ip6_forward;
1da177e4 1185
d8d1f30b 1186 rt->dst.output = ip6_output;
1da177e4 1187
86872cb5
TG
1188 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1189 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1190 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1191 rt->dst.flags = DST_HOST;
1da177e4
LT
1192
1193#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1194 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1195 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1196#endif
1197
86872cb5 1198 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1199
1200 /* We cannot add true routes via loopback here,
1201 they would result in kernel looping; promote them to reject routes
1202 */
86872cb5 1203 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1204 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1205 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1206 /* hold loopback dev/idev if we haven't done so. */
5578689a 1207 if (dev != net->loopback_dev) {
1da177e4
LT
1208 if (dev) {
1209 dev_put(dev);
1210 in6_dev_put(idev);
1211 }
5578689a 1212 dev = net->loopback_dev;
1da177e4
LT
1213 dev_hold(dev);
1214 idev = in6_dev_get(dev);
1215 if (!idev) {
1216 err = -ENODEV;
1217 goto out;
1218 }
1219 }
d8d1f30b
CG
1220 rt->dst.output = ip6_pkt_discard_out;
1221 rt->dst.input = ip6_pkt_discard;
1222 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1223 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1224 goto install_route;
1225 }
1226
86872cb5 1227 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1228 struct in6_addr *gw_addr;
1229 int gwa_type;
1230
86872cb5
TG
1231 gw_addr = &cfg->fc_gateway;
1232 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1233 gwa_type = ipv6_addr_type(gw_addr);
1234
1235 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1236 struct rt6_info *grt;
1237
1238 /* IPv6 strictly inhibits using not link-local
1239 addresses as nexthop address.
1240 Otherwise, router will not able to send redirects.
1241 It is very good, but in some (rare!) circumstances
1242 (SIT, PtP, NBMA NOARP links) it is handy to allow
1243 some exceptions. --ANK
1244 */
1245 err = -EINVAL;
1246 if (!(gwa_type&IPV6_ADDR_UNICAST))
1247 goto out;
1248
5578689a 1249 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1250
1251 err = -EHOSTUNREACH;
1252 if (grt == NULL)
1253 goto out;
1254 if (dev) {
1255 if (dev != grt->rt6i_dev) {
d8d1f30b 1256 dst_release(&grt->dst);
1da177e4
LT
1257 goto out;
1258 }
1259 } else {
1260 dev = grt->rt6i_dev;
1261 idev = grt->rt6i_idev;
1262 dev_hold(dev);
1263 in6_dev_hold(grt->rt6i_idev);
1264 }
1265 if (!(grt->rt6i_flags&RTF_GATEWAY))
1266 err = 0;
d8d1f30b 1267 dst_release(&grt->dst);
1da177e4
LT
1268
1269 if (err)
1270 goto out;
1271 }
1272 err = -EINVAL;
1273 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1274 goto out;
1275 }
1276
1277 err = -ENODEV;
1278 if (dev == NULL)
1279 goto out;
1280
86872cb5 1281 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1282 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1283 if (IS_ERR(rt->rt6i_nexthop)) {
1284 err = PTR_ERR(rt->rt6i_nexthop);
1285 rt->rt6i_nexthop = NULL;
1286 goto out;
1287 }
1288 }
1289
86872cb5 1290 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1291
1292install_route:
86872cb5
TG
1293 if (cfg->fc_mx) {
1294 struct nlattr *nla;
1295 int remaining;
1296
1297 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1298 int type = nla_type(nla);
86872cb5
TG
1299
1300 if (type) {
1301 if (type > RTAX_MAX) {
1da177e4
LT
1302 err = -EINVAL;
1303 goto out;
1304 }
86872cb5 1305
defb3519 1306 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1307 }
1da177e4
LT
1308 }
1309 }
1310
d8d1f30b 1311 rt->dst.dev = dev;
1da177e4 1312 rt->rt6i_idev = idev;
c71099ac 1313 rt->rt6i_table = table;
63152fc0 1314
c346dca1 1315 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1316
86872cb5 1317 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1318
1319out:
1320 if (dev)
1321 dev_put(dev);
1322 if (idev)
1323 in6_dev_put(idev);
1324 if (rt)
d8d1f30b 1325 dst_free(&rt->dst);
1da177e4
LT
1326 return err;
1327}
1328
86872cb5 1329static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1330{
1331 int err;
c71099ac 1332 struct fib6_table *table;
c346dca1 1333 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1334
8ed67789 1335 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1336 return -ENOENT;
1337
c71099ac
TG
1338 table = rt->rt6i_table;
1339 write_lock_bh(&table->tb6_lock);
1da177e4 1340
86872cb5 1341 err = fib6_del(rt, info);
d8d1f30b 1342 dst_release(&rt->dst);
1da177e4 1343
c71099ac 1344 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1345
1346 return err;
1347}
1348
e0a1ad73
TG
1349int ip6_del_rt(struct rt6_info *rt)
1350{
4d1169c1 1351 struct nl_info info = {
c346dca1 1352 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1353 };
528c4ceb 1354 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1355}
1356
86872cb5 1357static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1358{
c71099ac 1359 struct fib6_table *table;
1da177e4
LT
1360 struct fib6_node *fn;
1361 struct rt6_info *rt;
1362 int err = -ESRCH;
1363
5578689a 1364 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1365 if (table == NULL)
1366 return err;
1367
1368 read_lock_bh(&table->tb6_lock);
1da177e4 1369
c71099ac 1370 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1371 &cfg->fc_dst, cfg->fc_dst_len,
1372 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1373
1da177e4 1374 if (fn) {
d8d1f30b 1375 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1376 if (cfg->fc_ifindex &&
1da177e4 1377 (rt->rt6i_dev == NULL ||
86872cb5 1378 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1379 continue;
86872cb5
TG
1380 if (cfg->fc_flags & RTF_GATEWAY &&
1381 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1382 continue;
86872cb5 1383 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1384 continue;
d8d1f30b 1385 dst_hold(&rt->dst);
c71099ac 1386 read_unlock_bh(&table->tb6_lock);
1da177e4 1387
86872cb5 1388 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1389 }
1390 }
c71099ac 1391 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1392
1393 return err;
1394}
1395
1396/*
1397 * Handle redirects
1398 */
a6279458
YH
1399struct ip6rd_flowi {
1400 struct flowi fl;
1401 struct in6_addr gateway;
1402};
1403
8ed67789
DL
1404static struct rt6_info *__ip6_route_redirect(struct net *net,
1405 struct fib6_table *table,
a6279458
YH
1406 struct flowi *fl,
1407 int flags)
1da177e4 1408{
a6279458
YH
1409 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1410 struct rt6_info *rt;
e843b9e1 1411 struct fib6_node *fn;
c71099ac 1412
1da177e4 1413 /*
e843b9e1
YH
1414 * Get the "current" route for this destination and
1415 * check if the redirect has come from approriate router.
1416 *
1417 * RFC 2461 specifies that redirects should only be
1418 * accepted if they come from the nexthop to the target.
1419 * Due to the way the routes are chosen, this notion
1420 * is a bit fuzzy and one might need to check all possible
1421 * routes.
1da177e4 1422 */
1da177e4 1423
c71099ac 1424 read_lock_bh(&table->tb6_lock);
a6279458 1425 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1426restart:
d8d1f30b 1427 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1428 /*
1429 * Current route is on-link; redirect is always invalid.
1430 *
1431 * Seems, previous statement is not true. It could
1432 * be node, which looks for us as on-link (f.e. proxy ndisc)
1433 * But then router serving it might decide, that we should
1434 * know truth 8)8) --ANK (980726).
1435 */
1436 if (rt6_check_expired(rt))
1437 continue;
1438 if (!(rt->rt6i_flags & RTF_GATEWAY))
1439 continue;
a6279458 1440 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1441 continue;
a6279458 1442 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1443 continue;
1444 break;
1445 }
a6279458 1446
cb15d9c2 1447 if (!rt)
8ed67789
DL
1448 rt = net->ipv6.ip6_null_entry;
1449 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1450out:
d8d1f30b 1451 dst_hold(&rt->dst);
a6279458 1452
c71099ac 1453 read_unlock_bh(&table->tb6_lock);
e843b9e1 1454
a6279458
YH
1455 return rt;
1456};
1457
1458static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1459 struct in6_addr *src,
1460 struct in6_addr *gateway,
1461 struct net_device *dev)
1462{
adaa70bb 1463 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1464 struct net *net = dev_net(dev);
a6279458
YH
1465 struct ip6rd_flowi rdfl = {
1466 .fl = {
1467 .oif = dev->ifindex,
5811662b
CG
1468 .fl6_dst = *dest,
1469 .fl6_src = *src,
a6279458 1470 },
a6279458 1471 };
adaa70bb 1472
86c36ce4
BH
1473 ipv6_addr_copy(&rdfl.gateway, gateway);
1474
adaa70bb
TG
1475 if (rt6_need_strict(dest))
1476 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1477
5578689a 1478 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1479 flags, __ip6_route_redirect);
a6279458
YH
1480}
1481
1482void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1483 struct in6_addr *saddr,
1484 struct neighbour *neigh, u8 *lladdr, int on_link)
1485{
1486 struct rt6_info *rt, *nrt = NULL;
1487 struct netevent_redirect netevent;
c346dca1 1488 struct net *net = dev_net(neigh->dev);
a6279458
YH
1489
1490 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1491
8ed67789 1492 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1493 if (net_ratelimit())
1494 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1495 "for redirect target\n");
a6279458 1496 goto out;
1da177e4
LT
1497 }
1498
1da177e4
LT
1499 /*
1500 * We have finally decided to accept it.
1501 */
1502
1ab1457c 1503 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1504 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1505 NEIGH_UPDATE_F_OVERRIDE|
1506 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1507 NEIGH_UPDATE_F_ISROUTER))
1508 );
1509
1510 /*
1511 * Redirect received -> path was valid.
1512 * Look, redirects are sent only in response to data packets,
1513 * so that this nexthop apparently is reachable. --ANK
1514 */
d8d1f30b 1515 dst_confirm(&rt->dst);
1da177e4
LT
1516
1517 /* Duplicate redirect: silently ignore. */
d8d1f30b 1518 if (neigh == rt->dst.neighbour)
1da177e4
LT
1519 goto out;
1520
1521 nrt = ip6_rt_copy(rt);
1522 if (nrt == NULL)
1523 goto out;
1524
1525 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1526 if (on_link)
1527 nrt->rt6i_flags &= ~RTF_GATEWAY;
1528
1529 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1530 nrt->rt6i_dst.plen = 128;
d8d1f30b 1531 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1532
1533 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1534 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1535
40e22e8f 1536 if (ip6_ins_rt(nrt))
1da177e4
LT
1537 goto out;
1538
d8d1f30b
CG
1539 netevent.old = &rt->dst;
1540 netevent.new = &nrt->dst;
8d71740c
TT
1541 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1542
1da177e4 1543 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1544 ip6_del_rt(rt);
1da177e4
LT
1545 return;
1546 }
1547
1548out:
d8d1f30b 1549 dst_release(&rt->dst);
1da177e4
LT
1550}
1551
1552/*
1553 * Handle ICMP "packet too big" messages
1554 * i.e. Path MTU discovery
1555 */
1556
ae878ae2
1557static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1558 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1559{
1560 struct rt6_info *rt, *nrt;
1561 int allfrag = 0;
d3052b55 1562again:
ae878ae2 1563 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1564 if (rt == NULL)
1565 return;
1566
d3052b55
AV
1567 if (rt6_check_expired(rt)) {
1568 ip6_del_rt(rt);
1569 goto again;
1570 }
1571
d8d1f30b 1572 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1573 goto out;
1574
1575 if (pmtu < IPV6_MIN_MTU) {
1576 /*
1ab1457c 1577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1581 */
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1584 }
1585
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1589 */
d8d1f30b 1590 dst_confirm(&rt->dst);
1da177e4
LT
1591
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1596 */
1597 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1598 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1599 if (allfrag) {
1600 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1601 features |= RTAX_FEATURE_ALLFRAG;
1602 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1603 }
d8d1f30b 1604 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out;
1607 }
1608
1609 /* Network route.
1610 Two cases are possible:
1611 1. It is connected route. Action: COW
1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 */
d5315b50 1614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1615 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1616 else
1617 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1618
d5315b50 1619 if (nrt) {
defb3519
DM
1620 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1621 if (allfrag) {
1622 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1623 features |= RTAX_FEATURE_ALLFRAG;
1624 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1625 }
a1e78363
YH
1626
1627 /* According to RFC 1981, detecting PMTU increase shouldn't be
1628 * happened within 5 mins, the recommended timer is 10 mins.
1629 * Here this route expiration time is set to ip6_rt_mtu_expires
1630 * which is 10 mins. After 10 mins the decreased pmtu is expired
1631 * and detecting PMTU increase will be automatically happened.
1632 */
d8d1f30b 1633 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1634 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1635
40e22e8f 1636 ip6_ins_rt(nrt);
1da177e4 1637 }
1da177e4 1638out:
d8d1f30b 1639 dst_release(&rt->dst);
1da177e4
LT
1640}
1641
ae878ae2
1642void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1643 struct net_device *dev, u32 pmtu)
1644{
1645 struct net *net = dev_net(dev);
1646
1647 /*
1648 * RFC 1981 states that a node "MUST reduce the size of the packets it
1649 * is sending along the path" that caused the Packet Too Big message.
1650 * Since it's not possible in the general case to determine which
1651 * interface was used to send the original packet, we update the MTU
1652 * on the interface that will be used to send future packets. We also
1653 * update the MTU on the interface that received the Packet Too Big in
1654 * case the original packet was forced out that interface with
1655 * SO_BINDTODEVICE or similar. This is the next best thing to the
1656 * correct behaviour, which would be to update the MTU on all
1657 * interfaces.
1658 */
1659 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1660 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1661}
1662
1da177e4
LT
1663/*
1664 * Misc support functions
1665 */
1666
1667static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1668{
c346dca1 1669 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1670 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1671
1672 if (rt) {
d8d1f30b
CG
1673 rt->dst.input = ort->dst.input;
1674 rt->dst.output = ort->dst.output;
1675
defb3519 1676 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1677 rt->dst.error = ort->dst.error;
1678 rt->dst.dev = ort->dst.dev;
1679 if (rt->dst.dev)
1680 dev_hold(rt->dst.dev);
1da177e4
LT
1681 rt->rt6i_idev = ort->rt6i_idev;
1682 if (rt->rt6i_idev)
1683 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1684 rt->dst.lastuse = jiffies;
1da177e4
LT
1685 rt->rt6i_expires = 0;
1686
1687 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1688 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1689 rt->rt6i_metric = 0;
1690
1691 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1692#ifdef CONFIG_IPV6_SUBTREES
1693 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1694#endif
c71099ac 1695 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1696 }
1697 return rt;
1698}
1699
70ceb4f5 1700#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1701static struct rt6_info *rt6_get_route_info(struct net *net,
1702 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1703 struct in6_addr *gwaddr, int ifindex)
1704{
1705 struct fib6_node *fn;
1706 struct rt6_info *rt = NULL;
c71099ac
TG
1707 struct fib6_table *table;
1708
efa2cea0 1709 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1710 if (table == NULL)
1711 return NULL;
70ceb4f5 1712
c71099ac
TG
1713 write_lock_bh(&table->tb6_lock);
1714 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1715 if (!fn)
1716 goto out;
1717
d8d1f30b 1718 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1719 if (rt->rt6i_dev->ifindex != ifindex)
1720 continue;
1721 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1722 continue;
1723 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1724 continue;
d8d1f30b 1725 dst_hold(&rt->dst);
70ceb4f5
YH
1726 break;
1727 }
1728out:
c71099ac 1729 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1730 return rt;
1731}
1732
efa2cea0
DL
1733static struct rt6_info *rt6_add_route_info(struct net *net,
1734 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1735 struct in6_addr *gwaddr, int ifindex,
1736 unsigned pref)
1737{
86872cb5
TG
1738 struct fib6_config cfg = {
1739 .fc_table = RT6_TABLE_INFO,
238fc7ea 1740 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1741 .fc_ifindex = ifindex,
1742 .fc_dst_len = prefixlen,
1743 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1744 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1745 .fc_nlinfo.pid = 0,
1746 .fc_nlinfo.nlh = NULL,
1747 .fc_nlinfo.nl_net = net,
86872cb5
TG
1748 };
1749
1750 ipv6_addr_copy(&cfg.fc_dst, prefix);
1751 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1752
e317da96
YH
1753 /* We should treat it as a default route if prefix length is 0. */
1754 if (!prefixlen)
86872cb5 1755 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1756
86872cb5 1757 ip6_route_add(&cfg);
70ceb4f5 1758
efa2cea0 1759 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1760}
1761#endif
1762
1da177e4 1763struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1764{
1da177e4 1765 struct rt6_info *rt;
c71099ac 1766 struct fib6_table *table;
1da177e4 1767
c346dca1 1768 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1769 if (table == NULL)
1770 return NULL;
1da177e4 1771
c71099ac 1772 write_lock_bh(&table->tb6_lock);
d8d1f30b 1773 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1774 if (dev == rt->rt6i_dev &&
045927ff 1775 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1776 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1777 break;
1778 }
1779 if (rt)
d8d1f30b 1780 dst_hold(&rt->dst);
c71099ac 1781 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1782 return rt;
1783}
1784
1785struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1786 struct net_device *dev,
1787 unsigned int pref)
1da177e4 1788{
86872cb5
TG
1789 struct fib6_config cfg = {
1790 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1791 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1792 .fc_ifindex = dev->ifindex,
1793 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1794 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1795 .fc_nlinfo.pid = 0,
1796 .fc_nlinfo.nlh = NULL,
c346dca1 1797 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1798 };
1da177e4 1799
86872cb5 1800 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1801
86872cb5 1802 ip6_route_add(&cfg);
1da177e4 1803
1da177e4
LT
1804 return rt6_get_dflt_router(gwaddr, dev);
1805}
1806
7b4da532 1807void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1808{
1809 struct rt6_info *rt;
c71099ac
TG
1810 struct fib6_table *table;
1811
1812 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1813 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1814 if (table == NULL)
1815 return;
1da177e4
LT
1816
1817restart:
c71099ac 1818 read_lock_bh(&table->tb6_lock);
d8d1f30b 1819 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1820 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1821 dst_hold(&rt->dst);
c71099ac 1822 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1823 ip6_del_rt(rt);
1da177e4
LT
1824 goto restart;
1825 }
1826 }
c71099ac 1827 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1828}
1829
5578689a
DL
1830static void rtmsg_to_fib6_config(struct net *net,
1831 struct in6_rtmsg *rtmsg,
86872cb5
TG
1832 struct fib6_config *cfg)
1833{
1834 memset(cfg, 0, sizeof(*cfg));
1835
1836 cfg->fc_table = RT6_TABLE_MAIN;
1837 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1838 cfg->fc_metric = rtmsg->rtmsg_metric;
1839 cfg->fc_expires = rtmsg->rtmsg_info;
1840 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1841 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1842 cfg->fc_flags = rtmsg->rtmsg_flags;
1843
5578689a 1844 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1845
86872cb5
TG
1846 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1847 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1848 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1849}
1850
5578689a 1851int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1852{
86872cb5 1853 struct fib6_config cfg;
1da177e4
LT
1854 struct in6_rtmsg rtmsg;
1855 int err;
1856
1857 switch(cmd) {
1858 case SIOCADDRT: /* Add a route */
1859 case SIOCDELRT: /* Delete a route */
1860 if (!capable(CAP_NET_ADMIN))
1861 return -EPERM;
1862 err = copy_from_user(&rtmsg, arg,
1863 sizeof(struct in6_rtmsg));
1864 if (err)
1865 return -EFAULT;
86872cb5 1866
5578689a 1867 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1868
1da177e4
LT
1869 rtnl_lock();
1870 switch (cmd) {
1871 case SIOCADDRT:
86872cb5 1872 err = ip6_route_add(&cfg);
1da177e4
LT
1873 break;
1874 case SIOCDELRT:
86872cb5 1875 err = ip6_route_del(&cfg);
1da177e4
LT
1876 break;
1877 default:
1878 err = -EINVAL;
1879 }
1880 rtnl_unlock();
1881
1882 return err;
3ff50b79 1883 }
1da177e4
LT
1884
1885 return -EINVAL;
1886}
1887
1888/*
1889 * Drop the packet on the floor
1890 */
1891
d5fdd6ba 1892static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1893{
612f09e8 1894 int type;
adf30907 1895 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1896 switch (ipstats_mib_noroutes) {
1897 case IPSTATS_MIB_INNOROUTES:
0660e03f 1898 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1899 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1900 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1901 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1902 break;
1903 }
1904 /* FALLTHROUGH */
1905 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 ipstats_mib_noroutes);
612f09e8
YH
1908 break;
1909 }
3ffe533c 1910 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1911 kfree_skb(skb);
1912 return 0;
1913}
1914
9ce8ade0
TG
1915static int ip6_pkt_discard(struct sk_buff *skb)
1916{
612f09e8 1917 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1918}
1919
20380731 1920static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1921{
adf30907 1922 skb->dev = skb_dst(skb)->dev;
612f09e8 1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1924}
1925
6723ab54
DM
1926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1927
9ce8ade0
TG
1928static int ip6_pkt_prohibit(struct sk_buff *skb)
1929{
612f09e8 1930 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1931}
1932
1933static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1934{
adf30907 1935 skb->dev = skb_dst(skb)->dev;
612f09e8 1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1937}
1938
6723ab54
DM
1939#endif
1940
1da177e4
LT
1941/*
1942 * Allocate a dst for local (unicast / anycast) address.
1943 */
1944
1945struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1946 const struct in6_addr *addr,
1947 int anycast)
1948{
c346dca1 1949 struct net *net = dev_net(idev->dev);
86393e52 1950 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1951 struct neighbour *neigh;
1da177e4 1952
40385653
BG
1953 if (rt == NULL) {
1954 if (net_ratelimit())
1955 pr_warning("IPv6: Maximum number of routes reached,"
1956 " consider increasing route/max_size.\n");
1da177e4 1957 return ERR_PTR(-ENOMEM);
40385653 1958 }
1da177e4 1959
5578689a 1960 dev_hold(net->loopback_dev);
1da177e4
LT
1961 in6_dev_hold(idev);
1962
d8d1f30b
CG
1963 rt->dst.flags = DST_HOST;
1964 rt->dst.input = ip6_input;
1965 rt->dst.output = ip6_output;
5578689a 1966 rt->rt6i_dev = net->loopback_dev;
1da177e4 1967 rt->rt6i_idev = idev;
defb3519 1968 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1969 rt->dst.obsolete = -1;
1da177e4
LT
1970
1971 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1972 if (anycast)
1973 rt->rt6i_flags |= RTF_ANYCAST;
1974 else
1da177e4 1975 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1976 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1977 if (IS_ERR(neigh)) {
d8d1f30b 1978 dst_free(&rt->dst);
14deae41
DM
1979
1980 /* We are casting this because that is the return
1981 * value type. But an errno encoded pointer is the
1982 * same regardless of the underlying pointer type,
1983 * and that's what we are returning. So this is OK.
1984 */
1985 return (struct rt6_info *) neigh;
1da177e4 1986 }
14deae41 1987 rt->rt6i_nexthop = neigh;
1da177e4
LT
1988
1989 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1990 rt->rt6i_dst.plen = 128;
5578689a 1991 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1992
d8d1f30b 1993 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
1994
1995 return rt;
1996}
1997
8ed67789
DL
1998struct arg_dev_net {
1999 struct net_device *dev;
2000 struct net *net;
2001};
2002
1da177e4
LT
2003static int fib6_ifdown(struct rt6_info *rt, void *arg)
2004{
bc3ef660 2005 const struct arg_dev_net *adn = arg;
2006 const struct net_device *dev = adn->dev;
8ed67789 2007
bc3ef660 2008 if ((rt->rt6i_dev == dev || dev == NULL) &&
2009 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2010 RT6_TRACE("deleted by ifdown %p\n", rt);
2011 return -1;
2012 }
2013 return 0;
2014}
2015
f3db4851 2016void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2017{
8ed67789
DL
2018 struct arg_dev_net adn = {
2019 .dev = dev,
2020 .net = net,
2021 };
2022
2023 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2024 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2025}
2026
2027struct rt6_mtu_change_arg
2028{
2029 struct net_device *dev;
2030 unsigned mtu;
2031};
2032
2033static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2034{
2035 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2036 struct inet6_dev *idev;
2037
2038 /* In IPv6 pmtu discovery is not optional,
2039 so that RTAX_MTU lock cannot disable it.
2040 We still use this lock to block changes
2041 caused by addrconf/ndisc.
2042 */
2043
2044 idev = __in6_dev_get(arg->dev);
2045 if (idev == NULL)
2046 return 0;
2047
2048 /* For administrative MTU increase, there is no way to discover
2049 IPv6 PMTU increase, so PMTU increase should be updated here.
2050 Since RFC 1981 doesn't include administrative MTU increase
2051 update PMTU increase is a MUST. (i.e. jumbo frame)
2052 */
2053 /*
2054 If new MTU is less than route PMTU, this new MTU will be the
2055 lowest MTU in the path, update the route PMTU to reflect PMTU
2056 decreases; if new MTU is greater than route PMTU, and the
2057 old MTU is the lowest MTU in the path, update the route PMTU
2058 to reflect the increase. In this case if the other nodes' MTU
2059 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2060 PMTU discouvery.
2061 */
2062 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2063 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2064 (dst_mtu(&rt->dst) >= arg->mtu ||
2065 (dst_mtu(&rt->dst) < arg->mtu &&
2066 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2067 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2068 }
1da177e4
LT
2069 return 0;
2070}
2071
2072void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2073{
c71099ac
TG
2074 struct rt6_mtu_change_arg arg = {
2075 .dev = dev,
2076 .mtu = mtu,
2077 };
1da177e4 2078
c346dca1 2079 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2080}
2081
ef7c79ed 2082static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2083 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2084 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2085 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2086 [RTA_PRIORITY] = { .type = NLA_U32 },
2087 [RTA_METRICS] = { .type = NLA_NESTED },
2088};
2089
2090static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2091 struct fib6_config *cfg)
1da177e4 2092{
86872cb5
TG
2093 struct rtmsg *rtm;
2094 struct nlattr *tb[RTA_MAX+1];
2095 int err;
1da177e4 2096
86872cb5
TG
2097 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2098 if (err < 0)
2099 goto errout;
1da177e4 2100
86872cb5
TG
2101 err = -EINVAL;
2102 rtm = nlmsg_data(nlh);
2103 memset(cfg, 0, sizeof(*cfg));
2104
2105 cfg->fc_table = rtm->rtm_table;
2106 cfg->fc_dst_len = rtm->rtm_dst_len;
2107 cfg->fc_src_len = rtm->rtm_src_len;
2108 cfg->fc_flags = RTF_UP;
2109 cfg->fc_protocol = rtm->rtm_protocol;
2110
2111 if (rtm->rtm_type == RTN_UNREACHABLE)
2112 cfg->fc_flags |= RTF_REJECT;
2113
ab79ad14
2114 if (rtm->rtm_type == RTN_LOCAL)
2115 cfg->fc_flags |= RTF_LOCAL;
2116
86872cb5
TG
2117 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2118 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2119 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2120
2121 if (tb[RTA_GATEWAY]) {
2122 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2123 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2124 }
86872cb5
TG
2125
2126 if (tb[RTA_DST]) {
2127 int plen = (rtm->rtm_dst_len + 7) >> 3;
2128
2129 if (nla_len(tb[RTA_DST]) < plen)
2130 goto errout;
2131
2132 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2133 }
86872cb5
TG
2134
2135 if (tb[RTA_SRC]) {
2136 int plen = (rtm->rtm_src_len + 7) >> 3;
2137
2138 if (nla_len(tb[RTA_SRC]) < plen)
2139 goto errout;
2140
2141 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2142 }
86872cb5
TG
2143
2144 if (tb[RTA_OIF])
2145 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2146
2147 if (tb[RTA_PRIORITY])
2148 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2149
2150 if (tb[RTA_METRICS]) {
2151 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2152 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2153 }
86872cb5
TG
2154
2155 if (tb[RTA_TABLE])
2156 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2157
2158 err = 0;
2159errout:
2160 return err;
1da177e4
LT
2161}
2162
c127ea2c 2163static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2164{
86872cb5
TG
2165 struct fib6_config cfg;
2166 int err;
1da177e4 2167
86872cb5
TG
2168 err = rtm_to_fib6_config(skb, nlh, &cfg);
2169 if (err < 0)
2170 return err;
2171
2172 return ip6_route_del(&cfg);
1da177e4
LT
2173}
2174
c127ea2c 2175static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2176{
86872cb5
TG
2177 struct fib6_config cfg;
2178 int err;
1da177e4 2179
86872cb5
TG
2180 err = rtm_to_fib6_config(skb, nlh, &cfg);
2181 if (err < 0)
2182 return err;
2183
2184 return ip6_route_add(&cfg);
1da177e4
LT
2185}
2186
339bf98f
TG
2187static inline size_t rt6_nlmsg_size(void)
2188{
2189 return NLMSG_ALIGN(sizeof(struct rtmsg))
2190 + nla_total_size(16) /* RTA_SRC */
2191 + nla_total_size(16) /* RTA_DST */
2192 + nla_total_size(16) /* RTA_GATEWAY */
2193 + nla_total_size(16) /* RTA_PREFSRC */
2194 + nla_total_size(4) /* RTA_TABLE */
2195 + nla_total_size(4) /* RTA_IIF */
2196 + nla_total_size(4) /* RTA_OIF */
2197 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2198 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2199 + nla_total_size(sizeof(struct rta_cacheinfo));
2200}
2201
191cd582
BH
2202static int rt6_fill_node(struct net *net,
2203 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2204 struct in6_addr *dst, struct in6_addr *src,
2205 int iif, int type, u32 pid, u32 seq,
7bc570c8 2206 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2207{
2208 struct rtmsg *rtm;
2d7202bf 2209 struct nlmsghdr *nlh;
e3703b3d 2210 long expires;
9e762a4a 2211 u32 table;
1da177e4
LT
2212
2213 if (prefix) { /* user wants prefix routes only */
2214 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2215 /* success since this is not a prefix route */
2216 return 1;
2217 }
2218 }
2219
2d7202bf
TG
2220 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2221 if (nlh == NULL)
26932566 2222 return -EMSGSIZE;
2d7202bf
TG
2223
2224 rtm = nlmsg_data(nlh);
1da177e4
LT
2225 rtm->rtm_family = AF_INET6;
2226 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2227 rtm->rtm_src_len = rt->rt6i_src.plen;
2228 rtm->rtm_tos = 0;
c71099ac 2229 if (rt->rt6i_table)
9e762a4a 2230 table = rt->rt6i_table->tb6_id;
c71099ac 2231 else
9e762a4a
PM
2232 table = RT6_TABLE_UNSPEC;
2233 rtm->rtm_table = table;
2d7202bf 2234 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2235 if (rt->rt6i_flags&RTF_REJECT)
2236 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2237 else if (rt->rt6i_flags&RTF_LOCAL)
2238 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2239 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2240 rtm->rtm_type = RTN_LOCAL;
2241 else
2242 rtm->rtm_type = RTN_UNICAST;
2243 rtm->rtm_flags = 0;
2244 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2245 rtm->rtm_protocol = rt->rt6i_protocol;
2246 if (rt->rt6i_flags&RTF_DYNAMIC)
2247 rtm->rtm_protocol = RTPROT_REDIRECT;
2248 else if (rt->rt6i_flags & RTF_ADDRCONF)
2249 rtm->rtm_protocol = RTPROT_KERNEL;
2250 else if (rt->rt6i_flags&RTF_DEFAULT)
2251 rtm->rtm_protocol = RTPROT_RA;
2252
2253 if (rt->rt6i_flags&RTF_CACHE)
2254 rtm->rtm_flags |= RTM_F_CLONED;
2255
2256 if (dst) {
2d7202bf 2257 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2258 rtm->rtm_dst_len = 128;
1da177e4 2259 } else if (rtm->rtm_dst_len)
2d7202bf 2260 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2261#ifdef CONFIG_IPV6_SUBTREES
2262 if (src) {
2d7202bf 2263 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2264 rtm->rtm_src_len = 128;
1da177e4 2265 } else if (rtm->rtm_src_len)
2d7202bf 2266 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2267#endif
7bc570c8
YH
2268 if (iif) {
2269#ifdef CONFIG_IPV6_MROUTE
2270 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2271 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2272 if (err <= 0) {
2273 if (!nowait) {
2274 if (err == 0)
2275 return 0;
2276 goto nla_put_failure;
2277 } else {
2278 if (err == -EMSGSIZE)
2279 goto nla_put_failure;
2280 }
2281 }
2282 } else
2283#endif
2284 NLA_PUT_U32(skb, RTA_IIF, iif);
2285 } else if (dst) {
d8d1f30b 2286 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2287 struct in6_addr saddr_buf;
191cd582 2288 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2289 dst, 0, &saddr_buf) == 0)
2d7202bf 2290 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2291 }
2d7202bf 2292
defb3519 2293 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2294 goto nla_put_failure;
2295
d8d1f30b
CG
2296 if (rt->dst.neighbour)
2297 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2298
d8d1f30b 2299 if (rt->dst.dev)
2d7202bf
TG
2300 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2301
2302 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2303
36e3deae
YH
2304 if (!(rt->rt6i_flags & RTF_EXPIRES))
2305 expires = 0;
2306 else if (rt->rt6i_expires - jiffies < INT_MAX)
2307 expires = rt->rt6i_expires - jiffies;
2308 else
2309 expires = INT_MAX;
69cdf8f9 2310
d8d1f30b
CG
2311 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2312 expires, rt->dst.error) < 0)
e3703b3d 2313 goto nla_put_failure;
2d7202bf
TG
2314
2315 return nlmsg_end(skb, nlh);
2316
2317nla_put_failure:
26932566
PM
2318 nlmsg_cancel(skb, nlh);
2319 return -EMSGSIZE;
1da177e4
LT
2320}
2321
1b43af54 2322int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2323{
2324 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2325 int prefix;
2326
2d7202bf
TG
2327 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2328 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2329 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2330 } else
2331 prefix = 0;
2332
191cd582
BH
2333 return rt6_fill_node(arg->net,
2334 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2335 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2336 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2337}
2338
c127ea2c 2339static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2340{
3b1e0a65 2341 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2342 struct nlattr *tb[RTA_MAX+1];
2343 struct rt6_info *rt;
1da177e4 2344 struct sk_buff *skb;
ab364a6f 2345 struct rtmsg *rtm;
1da177e4 2346 struct flowi fl;
ab364a6f 2347 int err, iif = 0;
1da177e4 2348
ab364a6f
TG
2349 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2350 if (err < 0)
2351 goto errout;
1da177e4 2352
ab364a6f 2353 err = -EINVAL;
1da177e4 2354 memset(&fl, 0, sizeof(fl));
1da177e4 2355
ab364a6f
TG
2356 if (tb[RTA_SRC]) {
2357 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2358 goto errout;
2359
2360 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2361 }
2362
2363 if (tb[RTA_DST]) {
2364 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2365 goto errout;
2366
2367 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2368 }
2369
2370 if (tb[RTA_IIF])
2371 iif = nla_get_u32(tb[RTA_IIF]);
2372
2373 if (tb[RTA_OIF])
2374 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2375
2376 if (iif) {
2377 struct net_device *dev;
5578689a 2378 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2379 if (!dev) {
2380 err = -ENODEV;
ab364a6f 2381 goto errout;
1da177e4
LT
2382 }
2383 }
2384
ab364a6f
TG
2385 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2386 if (skb == NULL) {
2387 err = -ENOBUFS;
2388 goto errout;
2389 }
1da177e4 2390
ab364a6f
TG
2391 /* Reserve room for dummy headers, this skb can pass
2392 through good chunk of routing engine.
2393 */
459a98ed 2394 skb_reset_mac_header(skb);
ab364a6f 2395 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2396
8a3edd80 2397 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2398 skb_dst_set(skb, &rt->dst);
1da177e4 2399
191cd582 2400 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2401 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2402 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2403 if (err < 0) {
ab364a6f
TG
2404 kfree_skb(skb);
2405 goto errout;
1da177e4
LT
2406 }
2407
5578689a 2408 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2409errout:
1da177e4 2410 return err;
1da177e4
LT
2411}
2412
86872cb5 2413void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2414{
2415 struct sk_buff *skb;
5578689a 2416 struct net *net = info->nl_net;
528c4ceb
DL
2417 u32 seq;
2418 int err;
2419
2420 err = -ENOBUFS;
2421 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2422
339bf98f 2423 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2424 if (skb == NULL)
2425 goto errout;
2426
191cd582 2427 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2428 event, info->pid, seq, 0, 0, 0);
26932566
PM
2429 if (err < 0) {
2430 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2431 WARN_ON(err == -EMSGSIZE);
2432 kfree_skb(skb);
2433 goto errout;
2434 }
1ce85fe4
PNA
2435 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2436 info->nlh, gfp_any());
2437 return;
21713ebc
TG
2438errout:
2439 if (err < 0)
5578689a 2440 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2441}
2442
8ed67789
DL
2443static int ip6_route_dev_notify(struct notifier_block *this,
2444 unsigned long event, void *data)
2445{
2446 struct net_device *dev = (struct net_device *)data;
c346dca1 2447 struct net *net = dev_net(dev);
8ed67789
DL
2448
2449 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2450 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2451 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2452#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2453 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2454 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2455 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2456 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2457#endif
2458 }
2459
2460 return NOTIFY_OK;
2461}
2462
1da177e4
LT
2463/*
2464 * /proc
2465 */
2466
2467#ifdef CONFIG_PROC_FS
2468
1da177e4
LT
2469struct rt6_proc_arg
2470{
2471 char *buffer;
2472 int offset;
2473 int length;
2474 int skip;
2475 int len;
2476};
2477
2478static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2479{
33120b30 2480 struct seq_file *m = p_arg;
1da177e4 2481
4b7a4274 2482 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2483
2484#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2486#else
33120b30 2487 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2488#endif
2489
2490 if (rt->rt6i_nexthop) {
4b7a4274 2491 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2492 } else {
33120b30 2493 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2494 }
33120b30 2495 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2496 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2497 rt->dst.__use, rt->rt6i_flags,
33120b30 2498 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2499 return 0;
2500}
2501
33120b30 2502static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2503{
f3db4851
DL
2504 struct net *net = (struct net *)m->private;
2505 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2506 return 0;
2507}
1da177e4 2508
33120b30
AD
2509static int ipv6_route_open(struct inode *inode, struct file *file)
2510{
de05c557 2511 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2512}
2513
33120b30
AD
2514static const struct file_operations ipv6_route_proc_fops = {
2515 .owner = THIS_MODULE,
2516 .open = ipv6_route_open,
2517 .read = seq_read,
2518 .llseek = seq_lseek,
b6fcbdb4 2519 .release = single_release_net,
33120b30
AD
2520};
2521
1da177e4
LT
2522static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2523{
69ddb805 2524 struct net *net = (struct net *)seq->private;
1da177e4 2525 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2526 net->ipv6.rt6_stats->fib_nodes,
2527 net->ipv6.rt6_stats->fib_route_nodes,
2528 net->ipv6.rt6_stats->fib_rt_alloc,
2529 net->ipv6.rt6_stats->fib_rt_entries,
2530 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2531 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2532 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2533
2534 return 0;
2535}
2536
2537static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2538{
de05c557 2539 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2540}
2541
9a32144e 2542static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2543 .owner = THIS_MODULE,
2544 .open = rt6_stats_seq_open,
2545 .read = seq_read,
2546 .llseek = seq_lseek,
b6fcbdb4 2547 .release = single_release_net,
1da177e4
LT
2548};
2549#endif /* CONFIG_PROC_FS */
2550
2551#ifdef CONFIG_SYSCTL
2552
1da177e4 2553static
8d65af78 2554int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2555 void __user *buffer, size_t *lenp, loff_t *ppos)
2556{
5b7c931d
DL
2557 struct net *net = current->nsproxy->net_ns;
2558 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2559 if (write) {
8d65af78 2560 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2561 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2562 return 0;
2563 } else
2564 return -EINVAL;
2565}
2566
760f2d01 2567ctl_table ipv6_route_table_template[] = {
1ab1457c 2568 {
1da177e4 2569 .procname = "flush",
4990509f 2570 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2571 .maxlen = sizeof(int),
89c8b3a1 2572 .mode = 0200,
6d9f239a 2573 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2574 },
2575 {
1da177e4 2576 .procname = "gc_thresh",
9a7ec3a9 2577 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2578 .maxlen = sizeof(int),
2579 .mode = 0644,
6d9f239a 2580 .proc_handler = proc_dointvec,
1da177e4
LT
2581 },
2582 {
1da177e4 2583 .procname = "max_size",
4990509f 2584 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2585 .maxlen = sizeof(int),
2586 .mode = 0644,
6d9f239a 2587 .proc_handler = proc_dointvec,
1da177e4
LT
2588 },
2589 {
1da177e4 2590 .procname = "gc_min_interval",
4990509f 2591 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2592 .maxlen = sizeof(int),
2593 .mode = 0644,
6d9f239a 2594 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2595 },
2596 {
1da177e4 2597 .procname = "gc_timeout",
4990509f 2598 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2599 .maxlen = sizeof(int),
2600 .mode = 0644,
6d9f239a 2601 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2602 },
2603 {
1da177e4 2604 .procname = "gc_interval",
4990509f 2605 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
6d9f239a 2608 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2609 },
2610 {
1da177e4 2611 .procname = "gc_elasticity",
4990509f 2612 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2613 .maxlen = sizeof(int),
2614 .mode = 0644,
f3d3f616 2615 .proc_handler = proc_dointvec,
1da177e4
LT
2616 },
2617 {
1da177e4 2618 .procname = "mtu_expires",
4990509f 2619 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2620 .maxlen = sizeof(int),
2621 .mode = 0644,
6d9f239a 2622 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2623 },
2624 {
1da177e4 2625 .procname = "min_adv_mss",
4990509f 2626 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2627 .maxlen = sizeof(int),
2628 .mode = 0644,
f3d3f616 2629 .proc_handler = proc_dointvec,
1da177e4
LT
2630 },
2631 {
1da177e4 2632 .procname = "gc_min_interval_ms",
4990509f 2633 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2634 .maxlen = sizeof(int),
2635 .mode = 0644,
6d9f239a 2636 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2637 },
f8572d8f 2638 { }
1da177e4
LT
2639};
2640
2c8c1e72 2641struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2642{
2643 struct ctl_table *table;
2644
2645 table = kmemdup(ipv6_route_table_template,
2646 sizeof(ipv6_route_table_template),
2647 GFP_KERNEL);
5ee09105
YH
2648
2649 if (table) {
2650 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2651 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2652 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2653 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2654 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2655 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2656 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2657 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2658 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2659 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2660 }
2661
760f2d01
DL
2662 return table;
2663}
1da177e4
LT
2664#endif
2665
2c8c1e72 2666static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2667{
633d424b 2668 int ret = -ENOMEM;
8ed67789 2669
86393e52
AD
2670 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2671 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2672
fc66f95c
ED
2673 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2674 goto out_ip6_dst_ops;
2675
8ed67789
DL
2676 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2677 sizeof(*net->ipv6.ip6_null_entry),
2678 GFP_KERNEL);
2679 if (!net->ipv6.ip6_null_entry)
fc66f95c 2680 goto out_ip6_dst_entries;
d8d1f30b 2681 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2682 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2683 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2684 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2685
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2688 sizeof(*net->ipv6.ip6_prohibit_entry),
2689 GFP_KERNEL);
68fffc67
PZ
2690 if (!net->ipv6.ip6_prohibit_entry)
2691 goto out_ip6_null_entry;
d8d1f30b 2692 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2693 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2694 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2695 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2696
2697 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2698 sizeof(*net->ipv6.ip6_blk_hole_entry),
2699 GFP_KERNEL);
68fffc67
PZ
2700 if (!net->ipv6.ip6_blk_hole_entry)
2701 goto out_ip6_prohibit_entry;
d8d1f30b 2702 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2703 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2704 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2705 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2706#endif
2707
b339a47c
PZ
2708 net->ipv6.sysctl.flush_delay = 0;
2709 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2710 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2711 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2712 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2713 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2714 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2715 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2716
cdb18761
DL
2717#ifdef CONFIG_PROC_FS
2718 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2719 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2720#endif
6891a346
BT
2721 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2722
8ed67789
DL
2723 ret = 0;
2724out:
2725 return ret;
f2fc6a54 2726
68fffc67
PZ
2727#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2728out_ip6_prohibit_entry:
2729 kfree(net->ipv6.ip6_prohibit_entry);
2730out_ip6_null_entry:
2731 kfree(net->ipv6.ip6_null_entry);
2732#endif
fc66f95c
ED
2733out_ip6_dst_entries:
2734 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2735out_ip6_dst_ops:
f2fc6a54 2736 goto out;
cdb18761
DL
2737}
2738
2c8c1e72 2739static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2740{
2741#ifdef CONFIG_PROC_FS
2742 proc_net_remove(net, "ipv6_route");
2743 proc_net_remove(net, "rt6_stats");
2744#endif
8ed67789
DL
2745 kfree(net->ipv6.ip6_null_entry);
2746#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2747 kfree(net->ipv6.ip6_prohibit_entry);
2748 kfree(net->ipv6.ip6_blk_hole_entry);
2749#endif
41bb78b4 2750 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2751}
2752
2753static struct pernet_operations ip6_route_net_ops = {
2754 .init = ip6_route_net_init,
2755 .exit = ip6_route_net_exit,
2756};
2757
8ed67789
DL
2758static struct notifier_block ip6_route_dev_notifier = {
2759 .notifier_call = ip6_route_dev_notify,
2760 .priority = 0,
2761};
2762
433d49c3 2763int __init ip6_route_init(void)
1da177e4 2764{
433d49c3
DL
2765 int ret;
2766
9a7ec3a9
DL
2767 ret = -ENOMEM;
2768 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2769 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2770 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2771 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2772 goto out;
14e50e57 2773
fc66f95c 2774 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2775 if (ret)
bdb3289f 2776 goto out_kmem_cache;
bdb3289f 2777
fc66f95c
ED
2778 ret = register_pernet_subsys(&ip6_route_net_ops);
2779 if (ret)
2780 goto out_dst_entries;
2781
5dc121e9
AE
2782 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2783
8ed67789
DL
2784 /* Registering of the loopback is done before this portion of code,
2785 * the loopback reference in rt6_info will not be taken, do it
2786 * manually for init_net */
d8d1f30b 2787 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2788 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2789 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2790 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2791 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2792 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2793 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2794 #endif
433d49c3
DL
2795 ret = fib6_init();
2796 if (ret)
8ed67789 2797 goto out_register_subsys;
433d49c3 2798
433d49c3
DL
2799 ret = xfrm6_init();
2800 if (ret)
cdb18761 2801 goto out_fib6_init;
c35b7e72 2802
433d49c3
DL
2803 ret = fib6_rules_init();
2804 if (ret)
2805 goto xfrm6_init;
7e5449c2 2806
433d49c3
DL
2807 ret = -ENOBUFS;
2808 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2809 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2810 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2811 goto fib6_rules_init;
c127ea2c 2812
8ed67789 2813 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2814 if (ret)
2815 goto fib6_rules_init;
8ed67789 2816
433d49c3
DL
2817out:
2818 return ret;
2819
2820fib6_rules_init:
433d49c3
DL
2821 fib6_rules_cleanup();
2822xfrm6_init:
433d49c3 2823 xfrm6_fini();
433d49c3 2824out_fib6_init:
433d49c3 2825 fib6_gc_cleanup();
8ed67789
DL
2826out_register_subsys:
2827 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2828out_dst_entries:
2829 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2830out_kmem_cache:
f2fc6a54 2831 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2832 goto out;
1da177e4
LT
2833}
2834
2835void ip6_route_cleanup(void)
2836{
8ed67789 2837 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2838 fib6_rules_cleanup();
1da177e4 2839 xfrm6_fini();
1da177e4 2840 fib6_gc_cleanup();
8ed67789 2841 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2842 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2843 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2844}
This page took 0.820719 seconds and 5 git commands to generate.