Merge branch 'for-airlied' of git://people.freedesktop.org/~danvet/drm-intel into...
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
1da177e4 85
70ceb4f5 86#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 87static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
95c96174 90 unsigned int pref);
efa2cea0 91static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
94#endif
95
06582540
DM
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
8e2ec639
YZ
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
fbfe95a4 105 peer = rt6_get_peer_create(rt);
06582540
DM
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
f894cbf8
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
39232973
DM
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
a7563f34 132 if (!ipv6_addr_any(p))
39232973 133 return (const void *) p;
f894cbf8
DM
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
39232973
DM
136 return daddr;
137}
138
f894cbf8
DM
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
d3aaeb38 142{
39232973
DM
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
f894cbf8 146 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
8ade06c6 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 154{
8ade06c6
DM
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
97cac082 161 rt->n = n;
f83c7790
DM
162
163 return 0;
d3aaeb38
DM
164}
165
9a7ec3a9 166static struct dst_ops ip6_dst_ops_template = {
1da177e4 167 .family = AF_INET6,
09640e63 168 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
0dbaee3b 172 .default_advmss = ip6_default_advmss,
ebb762f2 173 .mtu = ip6_mtu,
06582540 174 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 180 .redirect = rt6_do_redirect,
1ac06e03 181 .local_out = __ip6_local_out,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
183};
184
ebb762f2 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 186{
618f9bc7
SK
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
ec831ea7
RD
190}
191
6700c270
DM
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
14e50e57
DM
194{
195}
196
6700c270
DM
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
b587ee3b
DM
199{
200}
201
0972ddb2
HB
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
14e50e57
DM
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
09640e63 210 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
ebb762f2 213 .mtu = ip6_blackhole_mtu,
214f45c9 214 .default_advmss = ip6_default_advmss,
14e50e57 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 216 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 218 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
219};
220
62fa8a84
DM
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223};
224
bdb3289f 225static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
2c20cbd7 229 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 230 .error = -ENETUNREACH,
d8d1f30b
CG
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
1da177e4
LT
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 235 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
101367c2
TG
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
6723ab54
DM
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 244
280a34c8 245static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
2c20cbd7 249 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 250 .error = -EACCES,
d8d1f30b
CG
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
bdb3289f 260static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
2c20cbd7 264 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 265 .error = -EINVAL,
d8d1f30b
CG
266 .input = dst_discard,
267 .output = dst_discard,
101367c2
TG
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 270 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
1da177e4 277/* allocate dst with ip6_dst_ops */
97bab73f 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 279 struct net_device *dev,
8b96d22d
DM
280 int flags,
281 struct fib6_table *table)
1da177e4 282{
97bab73f 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 284 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 285
97bab73f 286 if (rt) {
8104891b
SK
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
6f3118b5 291 rt->rt6i_genid = rt_genid(net);
97bab73f 292 }
cf911662 293 return rt;
1da177e4
LT
294}
295
296static void ip6_dst_destroy(struct dst_entry *dst)
297{
298 struct rt6_info *rt = (struct rt6_info *)dst;
299 struct inet6_dev *idev = rt->rt6i_idev;
300
97cac082
DM
301 if (rt->n)
302 neigh_release(rt->n);
303
8e2ec639
YZ
304 if (!(rt->dst.flags & DST_HOST))
305 dst_destroy_metrics_generic(dst);
306
38308473 307 if (idev) {
1da177e4
LT
308 rt->rt6i_idev = NULL;
309 in6_dev_put(idev);
1ab1457c 310 }
1716a961
G
311
312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 dst_release(dst->from);
314
97bab73f
DM
315 if (rt6_has_peer(rt)) {
316 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
317 inet_putpeer(peer);
318 }
319}
320
6431cbc2
DM
321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323static u32 rt6_peer_genid(void)
324{
325 return atomic_read(&__rt6_peer_genid);
326}
327
b3419363
DM
328void rt6_bind_peer(struct rt6_info *rt, int create)
329{
97bab73f 330 struct inet_peer_base *base;
b3419363
DM
331 struct inet_peer *peer;
332
97bab73f
DM
333 base = inetpeer_base_ptr(rt->_rt6i_peer);
334 if (!base)
335 return;
336
337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
338 if (peer) {
339 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 }
1da177e4
LT
344}
345
346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 int how)
348{
349 struct rt6_info *rt = (struct rt6_info *)dst;
350 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 351 struct net_device *loopback_dev =
c346dca1 352 dev_net(dev)->loopback_dev;
1da177e4 353
97cac082
DM
354 if (dev != loopback_dev) {
355 if (idev && idev->dev == dev) {
356 struct inet6_dev *loopback_idev =
357 in6_dev_get(loopback_dev);
358 if (loopback_idev) {
359 rt->rt6i_idev = loopback_idev;
360 in6_dev_put(idev);
361 }
362 }
363 if (rt->n && rt->n->dev == dev) {
364 rt->n->dev = loopback_dev;
365 dev_hold(loopback_dev);
366 dev_put(dev);
1da177e4
LT
367 }
368 }
369}
370
a50feda5 371static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 372{
1716a961
G
373 struct rt6_info *ort = NULL;
374
375 if (rt->rt6i_flags & RTF_EXPIRES) {
376 if (time_after(jiffies, rt->dst.expires))
a50feda5 377 return true;
1716a961
G
378 } else if (rt->dst.from) {
379 ort = (struct rt6_info *) rt->dst.from;
380 return (ort->rt6i_flags & RTF_EXPIRES) &&
381 time_after(jiffies, ort->dst.expires);
382 }
a50feda5 383 return false;
1da177e4
LT
384}
385
a50feda5 386static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 387{
a02cec21
ED
388 return ipv6_addr_type(daddr) &
389 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
390}
391
1da177e4 392/*
c71099ac 393 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
394 */
395
8ed67789
DL
396static inline struct rt6_info *rt6_device_match(struct net *net,
397 struct rt6_info *rt,
b71d1d42 398 const struct in6_addr *saddr,
1da177e4 399 int oif,
d420895e 400 int flags)
1da177e4
LT
401{
402 struct rt6_info *local = NULL;
403 struct rt6_info *sprt;
404
dd3abc4e
YH
405 if (!oif && ipv6_addr_any(saddr))
406 goto out;
407
d8d1f30b 408 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 409 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
410
411 if (oif) {
1da177e4
LT
412 if (dev->ifindex == oif)
413 return sprt;
414 if (dev->flags & IFF_LOOPBACK) {
38308473 415 if (!sprt->rt6i_idev ||
1da177e4 416 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 417 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 418 continue;
1ab1457c 419 if (local && (!oif ||
1da177e4
LT
420 local->rt6i_idev->dev->ifindex == oif))
421 continue;
422 }
423 local = sprt;
424 }
dd3abc4e
YH
425 } else {
426 if (ipv6_chk_addr(net, saddr, dev,
427 flags & RT6_LOOKUP_F_IFACE))
428 return sprt;
1da177e4 429 }
dd3abc4e 430 }
1da177e4 431
dd3abc4e 432 if (oif) {
1da177e4
LT
433 if (local)
434 return local;
435
d420895e 436 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 437 return net->ipv6.ip6_null_entry;
1da177e4 438 }
dd3abc4e 439out:
1da177e4
LT
440 return rt;
441}
442
27097255
YH
443#ifdef CONFIG_IPV6_ROUTER_PREF
444static void rt6_probe(struct rt6_info *rt)
445{
f2c31e32 446 struct neighbour *neigh;
27097255
YH
447 /*
448 * Okay, this does not seem to be appropriate
449 * for now, however, we need to check if it
450 * is really so; aka Router Reachability Probing.
451 *
452 * Router Reachability Probe MUST be rate-limited
453 * to no more than one per minute.
454 */
f2c31e32 455 rcu_read_lock();
97cac082 456 neigh = rt ? rt->n : NULL;
27097255 457 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 458 goto out;
27097255
YH
459 read_lock_bh(&neigh->lock);
460 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 461 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
462 struct in6_addr mcaddr;
463 struct in6_addr *target;
464
465 neigh->updated = jiffies;
466 read_unlock_bh(&neigh->lock);
467
468 target = (struct in6_addr *)&neigh->primary_key;
469 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 470 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 471 } else {
27097255 472 read_unlock_bh(&neigh->lock);
f2c31e32
ED
473 }
474out:
475 rcu_read_unlock();
27097255
YH
476}
477#else
478static inline void rt6_probe(struct rt6_info *rt)
479{
27097255
YH
480}
481#endif
482
1da177e4 483/*
554cfb7e 484 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 485 */
b6f99a21 486static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 487{
d1918542 488 struct net_device *dev = rt->dst.dev;
161980f4 489 if (!oif || dev->ifindex == oif)
554cfb7e 490 return 2;
161980f4
DM
491 if ((dev->flags & IFF_LOOPBACK) &&
492 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
493 return 1;
494 return 0;
554cfb7e 495}
1da177e4 496
b6f99a21 497static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 498{
f2c31e32 499 struct neighbour *neigh;
398bcbeb 500 int m;
f2c31e32
ED
501
502 rcu_read_lock();
97cac082 503 neigh = rt->n;
4d0c5911
YH
504 if (rt->rt6i_flags & RTF_NONEXTHOP ||
505 !(rt->rt6i_flags & RTF_GATEWAY))
506 m = 1;
507 else if (neigh) {
554cfb7e
YH
508 read_lock_bh(&neigh->lock);
509 if (neigh->nud_state & NUD_VALID)
4d0c5911 510 m = 2;
398bcbeb
YH
511#ifdef CONFIG_IPV6_ROUTER_PREF
512 else if (neigh->nud_state & NUD_FAILED)
513 m = 0;
514#endif
515 else
ea73ee23 516 m = 1;
554cfb7e 517 read_unlock_bh(&neigh->lock);
398bcbeb
YH
518 } else
519 m = 0;
f2c31e32 520 rcu_read_unlock();
554cfb7e 521 return m;
1da177e4
LT
522}
523
554cfb7e
YH
524static int rt6_score_route(struct rt6_info *rt, int oif,
525 int strict)
1da177e4 526{
4d0c5911 527 int m, n;
1ab1457c 528
4d0c5911 529 m = rt6_check_dev(rt, oif);
77d16f45 530 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 531 return -1;
ebacaaa0
YH
532#ifdef CONFIG_IPV6_ROUTER_PREF
533 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
534#endif
4d0c5911 535 n = rt6_check_neigh(rt);
557e92ef 536 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
537 return -1;
538 return m;
539}
540
f11e6659
DM
541static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
542 int *mpri, struct rt6_info *match)
554cfb7e 543{
f11e6659
DM
544 int m;
545
546 if (rt6_check_expired(rt))
547 goto out;
548
549 m = rt6_score_route(rt, oif, strict);
550 if (m < 0)
551 goto out;
552
553 if (m > *mpri) {
554 if (strict & RT6_LOOKUP_F_REACHABLE)
555 rt6_probe(match);
556 *mpri = m;
557 match = rt;
558 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
559 rt6_probe(rt);
560 }
561
562out:
563 return match;
564}
565
566static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
567 struct rt6_info *rr_head,
568 u32 metric, int oif, int strict)
569{
570 struct rt6_info *rt, *match;
554cfb7e 571 int mpri = -1;
1da177e4 572
f11e6659
DM
573 match = NULL;
574 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 575 rt = rt->dst.rt6_next)
f11e6659
DM
576 match = find_match(rt, oif, strict, &mpri, match);
577 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 578 rt = rt->dst.rt6_next)
f11e6659 579 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 580
f11e6659
DM
581 return match;
582}
1da177e4 583
f11e6659
DM
584static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
585{
586 struct rt6_info *match, *rt0;
8ed67789 587 struct net *net;
1da177e4 588
f11e6659
DM
589 rt0 = fn->rr_ptr;
590 if (!rt0)
591 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 592
f11e6659 593 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 594
554cfb7e 595 if (!match &&
f11e6659 596 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 597 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 598
554cfb7e 599 /* no entries matched; do round-robin */
f11e6659
DM
600 if (!next || next->rt6i_metric != rt0->rt6i_metric)
601 next = fn->leaf;
602
603 if (next != rt0)
604 fn->rr_ptr = next;
1da177e4 605 }
1da177e4 606
d1918542 607 net = dev_net(rt0->dst.dev);
a02cec21 608 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
609}
610
70ceb4f5
YH
611#ifdef CONFIG_IPV6_ROUTE_INFO
612int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 613 const struct in6_addr *gwaddr)
70ceb4f5 614{
c346dca1 615 struct net *net = dev_net(dev);
70ceb4f5
YH
616 struct route_info *rinfo = (struct route_info *) opt;
617 struct in6_addr prefix_buf, *prefix;
618 unsigned int pref;
4bed72e4 619 unsigned long lifetime;
70ceb4f5
YH
620 struct rt6_info *rt;
621
622 if (len < sizeof(struct route_info)) {
623 return -EINVAL;
624 }
625
626 /* Sanity check for prefix_len and length */
627 if (rinfo->length > 3) {
628 return -EINVAL;
629 } else if (rinfo->prefix_len > 128) {
630 return -EINVAL;
631 } else if (rinfo->prefix_len > 64) {
632 if (rinfo->length < 2) {
633 return -EINVAL;
634 }
635 } else if (rinfo->prefix_len > 0) {
636 if (rinfo->length < 1) {
637 return -EINVAL;
638 }
639 }
640
641 pref = rinfo->route_pref;
642 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 643 return -EINVAL;
70ceb4f5 644
4bed72e4 645 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
646
647 if (rinfo->length == 3)
648 prefix = (struct in6_addr *)rinfo->prefix;
649 else {
650 /* this function is safe */
651 ipv6_addr_prefix(&prefix_buf,
652 (struct in6_addr *)rinfo->prefix,
653 rinfo->prefix_len);
654 prefix = &prefix_buf;
655 }
656
efa2cea0
DL
657 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
658 dev->ifindex);
70ceb4f5
YH
659
660 if (rt && !lifetime) {
e0a1ad73 661 ip6_del_rt(rt);
70ceb4f5
YH
662 rt = NULL;
663 }
664
665 if (!rt && lifetime)
efa2cea0 666 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
667 pref);
668 else if (rt)
669 rt->rt6i_flags = RTF_ROUTEINFO |
670 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
671
672 if (rt) {
1716a961
G
673 if (!addrconf_finite_timeout(lifetime))
674 rt6_clean_expires(rt);
675 else
676 rt6_set_expires(rt, jiffies + HZ * lifetime);
677
d8d1f30b 678 dst_release(&rt->dst);
70ceb4f5
YH
679 }
680 return 0;
681}
682#endif
683
8ed67789 684#define BACKTRACK(__net, saddr) \
982f56f3 685do { \
8ed67789 686 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 687 struct fib6_node *pn; \
e0eda7bb 688 while (1) { \
982f56f3
YH
689 if (fn->fn_flags & RTN_TL_ROOT) \
690 goto out; \
691 pn = fn->parent; \
692 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 693 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
694 else \
695 fn = pn; \
696 if (fn->fn_flags & RTN_RTINFO) \
697 goto restart; \
c71099ac 698 } \
c71099ac 699 } \
38308473 700} while (0)
c71099ac 701
8ed67789
DL
702static struct rt6_info *ip6_pol_route_lookup(struct net *net,
703 struct fib6_table *table,
4c9483b2 704 struct flowi6 *fl6, int flags)
1da177e4
LT
705{
706 struct fib6_node *fn;
707 struct rt6_info *rt;
708
c71099ac 709 read_lock_bh(&table->tb6_lock);
4c9483b2 710 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
711restart:
712 rt = fn->leaf;
4c9483b2
DM
713 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
714 BACKTRACK(net, &fl6->saddr);
c71099ac 715out:
d8d1f30b 716 dst_use(&rt->dst, jiffies);
c71099ac 717 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
718 return rt;
719
720}
721
ea6e574e
FW
722struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
723 int flags)
724{
725 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
726}
727EXPORT_SYMBOL_GPL(ip6_route_lookup);
728
9acd9f3a
YH
729struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
730 const struct in6_addr *saddr, int oif, int strict)
c71099ac 731{
4c9483b2
DM
732 struct flowi6 fl6 = {
733 .flowi6_oif = oif,
734 .daddr = *daddr,
c71099ac
TG
735 };
736 struct dst_entry *dst;
77d16f45 737 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 738
adaa70bb 739 if (saddr) {
4c9483b2 740 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
741 flags |= RT6_LOOKUP_F_HAS_SADDR;
742 }
743
4c9483b2 744 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
745 if (dst->error == 0)
746 return (struct rt6_info *) dst;
747
748 dst_release(dst);
749
1da177e4
LT
750 return NULL;
751}
752
7159039a
YH
753EXPORT_SYMBOL(rt6_lookup);
754
c71099ac 755/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
756 It takes new route entry, the addition fails by any reason the
757 route is freed. In any case, if caller does not hold it, it may
758 be destroyed.
759 */
760
86872cb5 761static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
762{
763 int err;
c71099ac 764 struct fib6_table *table;
1da177e4 765
c71099ac
TG
766 table = rt->rt6i_table;
767 write_lock_bh(&table->tb6_lock);
86872cb5 768 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 769 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
770
771 return err;
772}
773
40e22e8f
TG
774int ip6_ins_rt(struct rt6_info *rt)
775{
4d1169c1 776 struct nl_info info = {
d1918542 777 .nl_net = dev_net(rt->dst.dev),
4d1169c1 778 };
528c4ceb 779 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
780}
781
1716a961 782static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 783 const struct in6_addr *daddr,
b71d1d42 784 const struct in6_addr *saddr)
1da177e4 785{
1da177e4
LT
786 struct rt6_info *rt;
787
788 /*
789 * Clone the route.
790 */
791
21efcfa0 792 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
793
794 if (rt) {
14deae41
DM
795 int attempts = !in_softirq();
796
38308473 797 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 798 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 799 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 800 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 801 rt->rt6i_gateway = *daddr;
58c4fb86 802 }
1da177e4 803
1da177e4 804 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
805
806#ifdef CONFIG_IPV6_SUBTREES
807 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 808 rt->rt6i_src.addr = *saddr;
1da177e4
LT
809 rt->rt6i_src.plen = 128;
810 }
811#endif
812
14deae41 813 retry:
8ade06c6 814 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 815 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
816 int saved_rt_min_interval =
817 net->ipv6.sysctl.ip6_rt_gc_min_interval;
818 int saved_rt_elasticity =
819 net->ipv6.sysctl.ip6_rt_gc_elasticity;
820
821 if (attempts-- > 0) {
822 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
823 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
824
86393e52 825 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
826
827 net->ipv6.sysctl.ip6_rt_gc_elasticity =
828 saved_rt_elasticity;
829 net->ipv6.sysctl.ip6_rt_gc_min_interval =
830 saved_rt_min_interval;
831 goto retry;
832 }
833
f3213831 834 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 835 dst_free(&rt->dst);
14deae41
DM
836 return NULL;
837 }
95a9a5ba 838 }
1da177e4 839
95a9a5ba
YH
840 return rt;
841}
1da177e4 842
21efcfa0
ED
843static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
844 const struct in6_addr *daddr)
299d9939 845{
21efcfa0
ED
846 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
847
299d9939 848 if (rt) {
299d9939 849 rt->rt6i_flags |= RTF_CACHE;
97cac082 850 rt->n = neigh_clone(ort->n);
299d9939
YH
851 }
852 return rt;
853}
854
8ed67789 855static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 856 struct flowi6 *fl6, int flags)
1da177e4
LT
857{
858 struct fib6_node *fn;
519fbd87 859 struct rt6_info *rt, *nrt;
c71099ac 860 int strict = 0;
1da177e4 861 int attempts = 3;
519fbd87 862 int err;
53b7997f 863 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 864
77d16f45 865 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
866
867relookup:
c71099ac 868 read_lock_bh(&table->tb6_lock);
1da177e4 869
8238dd06 870restart_2:
4c9483b2 871 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
872
873restart:
4acad72d 874 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 875
4c9483b2 876 BACKTRACK(net, &fl6->saddr);
8ed67789 877 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 878 rt->rt6i_flags & RTF_CACHE)
1ddef044 879 goto out;
1da177e4 880
d8d1f30b 881 dst_hold(&rt->dst);
c71099ac 882 read_unlock_bh(&table->tb6_lock);
fb9de91e 883
97cac082 884 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 885 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 886 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 887 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
888 else
889 goto out2;
e40cf353 890
d8d1f30b 891 dst_release(&rt->dst);
8ed67789 892 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 893
d8d1f30b 894 dst_hold(&rt->dst);
519fbd87 895 if (nrt) {
40e22e8f 896 err = ip6_ins_rt(nrt);
519fbd87 897 if (!err)
1da177e4 898 goto out2;
1da177e4 899 }
1da177e4 900
519fbd87
YH
901 if (--attempts <= 0)
902 goto out2;
903
904 /*
c71099ac 905 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
906 * released someone could insert this route. Relookup.
907 */
d8d1f30b 908 dst_release(&rt->dst);
519fbd87
YH
909 goto relookup;
910
911out:
8238dd06
YH
912 if (reachable) {
913 reachable = 0;
914 goto restart_2;
915 }
d8d1f30b 916 dst_hold(&rt->dst);
c71099ac 917 read_unlock_bh(&table->tb6_lock);
1da177e4 918out2:
d8d1f30b
CG
919 rt->dst.lastuse = jiffies;
920 rt->dst.__use++;
c71099ac
TG
921
922 return rt;
1da177e4
LT
923}
924
8ed67789 925static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 926 struct flowi6 *fl6, int flags)
4acad72d 927{
4c9483b2 928 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
929}
930
72331bc0
SL
931static struct dst_entry *ip6_route_input_lookup(struct net *net,
932 struct net_device *dev,
933 struct flowi6 *fl6, int flags)
934{
935 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
936 flags |= RT6_LOOKUP_F_IFACE;
937
938 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
939}
940
c71099ac
TG
941void ip6_route_input(struct sk_buff *skb)
942{
b71d1d42 943 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 944 struct net *net = dev_net(skb->dev);
adaa70bb 945 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
946 struct flowi6 fl6 = {
947 .flowi6_iif = skb->dev->ifindex,
948 .daddr = iph->daddr,
949 .saddr = iph->saddr,
38308473 950 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
951 .flowi6_mark = skb->mark,
952 .flowi6_proto = iph->nexthdr,
c71099ac 953 };
adaa70bb 954
72331bc0 955 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
956}
957
8ed67789 958static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 959 struct flowi6 *fl6, int flags)
1da177e4 960{
4c9483b2 961 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
962}
963
9c7a4f9c 964struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 965 struct flowi6 *fl6)
c71099ac
TG
966{
967 int flags = 0;
968
4dc27d1c
DM
969 fl6->flowi6_iif = net->loopback_dev->ifindex;
970
4c9483b2 971 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 972 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 973
4c9483b2 974 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 975 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
976 else if (sk)
977 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 978
4c9483b2 979 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
980}
981
7159039a 982EXPORT_SYMBOL(ip6_route_output);
1da177e4 983
2774c131 984struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 985{
5c1e6aa3 986 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
987 struct dst_entry *new = NULL;
988
f5b0a874 989 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 990 if (rt) {
d8d1f30b 991 new = &rt->dst;
14e50e57 992
8104891b
SK
993 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
994 rt6_init_peer(rt, net->ipv6.peers);
995
14e50e57 996 new->__use = 1;
352e512c
HX
997 new->input = dst_discard;
998 new->output = dst_discard;
14e50e57 999
21efcfa0
ED
1000 if (dst_metrics_read_only(&ort->dst))
1001 new->_metrics = ort->dst._metrics;
1002 else
1003 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1004 rt->rt6i_idev = ort->rt6i_idev;
1005 if (rt->rt6i_idev)
1006 in6_dev_hold(rt->rt6i_idev);
14e50e57 1007
4e3fd7a0 1008 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1009 rt->rt6i_flags = ort->rt6i_flags;
1010 rt6_clean_expires(rt);
14e50e57
DM
1011 rt->rt6i_metric = 0;
1012
1013 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1014#ifdef CONFIG_IPV6_SUBTREES
1015 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1016#endif
1017
1018 dst_free(new);
1019 }
1020
69ead7af
DM
1021 dst_release(dst_orig);
1022 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1023}
14e50e57 1024
1da177e4
LT
1025/*
1026 * Destination cache support functions
1027 */
1028
1029static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030{
1031 struct rt6_info *rt;
1032
1033 rt = (struct rt6_info *) dst;
1034
6f3118b5
ND
1035 /* All IPV6 dsts are created with ->obsolete set to the value
1036 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1037 * into this function always.
1038 */
1039 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1040 return NULL;
1041
6431cbc2
DM
1042 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1043 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1044 if (!rt6_has_peer(rt))
6431cbc2
DM
1045 rt6_bind_peer(rt, 0);
1046 rt->rt6i_peer_genid = rt6_peer_genid();
1047 }
1da177e4 1048 return dst;
6431cbc2 1049 }
1da177e4
LT
1050 return NULL;
1051}
1052
1053static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1054{
1055 struct rt6_info *rt = (struct rt6_info *) dst;
1056
1057 if (rt) {
54c1a859
YH
1058 if (rt->rt6i_flags & RTF_CACHE) {
1059 if (rt6_check_expired(rt)) {
1060 ip6_del_rt(rt);
1061 dst = NULL;
1062 }
1063 } else {
1da177e4 1064 dst_release(dst);
54c1a859
YH
1065 dst = NULL;
1066 }
1da177e4 1067 }
54c1a859 1068 return dst;
1da177e4
LT
1069}
1070
1071static void ip6_link_failure(struct sk_buff *skb)
1072{
1073 struct rt6_info *rt;
1074
3ffe533c 1075 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1076
adf30907 1077 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1078 if (rt) {
1716a961
G
1079 if (rt->rt6i_flags & RTF_CACHE)
1080 rt6_update_expires(rt, 0);
1081 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1082 rt->rt6i_node->fn_sernum = -1;
1083 }
1084}
1085
6700c270
DM
1086static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1087 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1088{
1089 struct rt6_info *rt6 = (struct rt6_info*)dst;
1090
81aded24 1091 dst_confirm(dst);
1da177e4 1092 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1093 struct net *net = dev_net(dst->dev);
1094
1da177e4
LT
1095 rt6->rt6i_flags |= RTF_MODIFIED;
1096 if (mtu < IPV6_MIN_MTU) {
defb3519 1097 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1098 mtu = IPV6_MIN_MTU;
defb3519
DM
1099 features |= RTAX_FEATURE_ALLFRAG;
1100 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1101 }
defb3519 1102 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1103 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1104 }
1105}
1106
42ae66c8
DM
1107void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108 int oif, u32 mark)
81aded24
DM
1109{
1110 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1111 struct dst_entry *dst;
1112 struct flowi6 fl6;
1113
1114 memset(&fl6, 0, sizeof(fl6));
1115 fl6.flowi6_oif = oif;
1116 fl6.flowi6_mark = mark;
3e12939a 1117 fl6.flowi6_flags = 0;
81aded24
DM
1118 fl6.daddr = iph->daddr;
1119 fl6.saddr = iph->saddr;
1120 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1121
1122 dst = ip6_route_output(net, NULL, &fl6);
1123 if (!dst->error)
6700c270 1124 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1125 dst_release(dst);
1126}
1127EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1128
1129void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1130{
1131 ip6_update_pmtu(skb, sock_net(sk), mtu,
1132 sk->sk_bound_dev_if, sk->sk_mark);
1133}
1134EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1135
3a5ad2ee
DM
1136void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1137{
1138 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1139 struct dst_entry *dst;
1140 struct flowi6 fl6;
1141
1142 memset(&fl6, 0, sizeof(fl6));
1143 fl6.flowi6_oif = oif;
1144 fl6.flowi6_mark = mark;
1145 fl6.flowi6_flags = 0;
1146 fl6.daddr = iph->daddr;
1147 fl6.saddr = iph->saddr;
1148 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1149
1150 dst = ip6_route_output(net, NULL, &fl6);
1151 if (!dst->error)
6700c270 1152 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1153 dst_release(dst);
1154}
1155EXPORT_SYMBOL_GPL(ip6_redirect);
1156
1157void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158{
1159 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1160}
1161EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1162
0dbaee3b 1163static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1164{
0dbaee3b
DM
1165 struct net_device *dev = dst->dev;
1166 unsigned int mtu = dst_mtu(dst);
1167 struct net *net = dev_net(dev);
1168
1da177e4
LT
1169 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1170
5578689a
DL
1171 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1172 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1173
1174 /*
1ab1457c
YH
1175 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1176 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1177 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1178 * rely only on pmtu discovery"
1179 */
1180 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1181 mtu = IPV6_MAXPLEN;
1182 return mtu;
1183}
1184
ebb762f2 1185static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1186{
d33e4553 1187 struct inet6_dev *idev;
618f9bc7
SK
1188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1189
1190 if (mtu)
1191 return mtu;
1192
1193 mtu = IPV6_MIN_MTU;
d33e4553
DM
1194
1195 rcu_read_lock();
1196 idev = __in6_dev_get(dst->dev);
1197 if (idev)
1198 mtu = idev->cnf.mtu6;
1199 rcu_read_unlock();
1200
1201 return mtu;
1202}
1203
3b00944c
YH
1204static struct dst_entry *icmp6_dst_gc_list;
1205static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1206
3b00944c 1207struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1208 struct neighbour *neigh,
87a11578 1209 struct flowi6 *fl6)
1da177e4 1210{
87a11578 1211 struct dst_entry *dst;
1da177e4
LT
1212 struct rt6_info *rt;
1213 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1214 struct net *net = dev_net(dev);
1da177e4 1215
38308473 1216 if (unlikely(!idev))
122bdf67 1217 return ERR_PTR(-ENODEV);
1da177e4 1218
8b96d22d 1219 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1220 if (unlikely(!rt)) {
1da177e4 1221 in6_dev_put(idev);
87a11578 1222 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1223 goto out;
1224 }
1225
1da177e4
LT
1226 if (neigh)
1227 neigh_hold(neigh);
14deae41 1228 else {
f894cbf8 1229 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1230 if (IS_ERR(neigh)) {
252c3d84 1231 in6_dev_put(idev);
b43faac6
DM
1232 dst_free(&rt->dst);
1233 return ERR_CAST(neigh);
1234 }
14deae41 1235 }
1da177e4 1236
8e2ec639
YZ
1237 rt->dst.flags |= DST_HOST;
1238 rt->dst.output = ip6_output;
97cac082 1239 rt->n = neigh;
d8d1f30b 1240 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1241 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1242 rt->rt6i_dst.plen = 128;
1243 rt->rt6i_idev = idev;
7011687f 1244 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1245
3b00944c 1246 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1247 rt->dst.next = icmp6_dst_gc_list;
1248 icmp6_dst_gc_list = &rt->dst;
3b00944c 1249 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1250
5578689a 1251 fib6_force_start_gc(net);
1da177e4 1252
87a11578
DM
1253 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1254
1da177e4 1255out:
87a11578 1256 return dst;
1da177e4
LT
1257}
1258
3d0f24a7 1259int icmp6_dst_gc(void)
1da177e4 1260{
e9476e95 1261 struct dst_entry *dst, **pprev;
3d0f24a7 1262 int more = 0;
1da177e4 1263
3b00944c
YH
1264 spin_lock_bh(&icmp6_dst_lock);
1265 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1266
1da177e4
LT
1267 while ((dst = *pprev) != NULL) {
1268 if (!atomic_read(&dst->__refcnt)) {
1269 *pprev = dst->next;
1270 dst_free(dst);
1da177e4
LT
1271 } else {
1272 pprev = &dst->next;
3d0f24a7 1273 ++more;
1da177e4
LT
1274 }
1275 }
1276
3b00944c 1277 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1278
3d0f24a7 1279 return more;
1da177e4
LT
1280}
1281
1e493d19
DM
1282static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1283 void *arg)
1284{
1285 struct dst_entry *dst, **pprev;
1286
1287 spin_lock_bh(&icmp6_dst_lock);
1288 pprev = &icmp6_dst_gc_list;
1289 while ((dst = *pprev) != NULL) {
1290 struct rt6_info *rt = (struct rt6_info *) dst;
1291 if (func(rt, arg)) {
1292 *pprev = dst->next;
1293 dst_free(dst);
1294 } else {
1295 pprev = &dst->next;
1296 }
1297 }
1298 spin_unlock_bh(&icmp6_dst_lock);
1299}
1300
569d3645 1301static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1302{
1da177e4 1303 unsigned long now = jiffies;
86393e52 1304 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1305 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1306 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1307 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1308 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1309 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1310 int entries;
7019b78e 1311
fc66f95c 1312 entries = dst_entries_get_fast(ops);
7019b78e 1313 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1314 entries <= rt_max_size)
1da177e4
LT
1315 goto out;
1316
6891a346
BT
1317 net->ipv6.ip6_rt_gc_expire++;
1318 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1319 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1320 entries = dst_entries_get_slow(ops);
1321 if (entries < ops->gc_thresh)
7019b78e 1322 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1323out:
7019b78e 1324 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1325 return entries > rt_max_size;
1da177e4
LT
1326}
1327
1328/* Clean host part of a prefix. Not necessary in radix tree,
1329 but results in cleaner routing tables.
1330
1331 Remove it only when all the things will work!
1332 */
1333
6b75d090 1334int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1335{
5170ae82 1336 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1337 if (hoplimit == 0) {
6b75d090 1338 struct net_device *dev = dst->dev;
c68f24cc
ED
1339 struct inet6_dev *idev;
1340
1341 rcu_read_lock();
1342 idev = __in6_dev_get(dev);
1343 if (idev)
6b75d090 1344 hoplimit = idev->cnf.hop_limit;
c68f24cc 1345 else
53b7997f 1346 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1347 rcu_read_unlock();
1da177e4
LT
1348 }
1349 return hoplimit;
1350}
abbf46ae 1351EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1352
1353/*
1354 *
1355 */
1356
86872cb5 1357int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1358{
1359 int err;
5578689a 1360 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1361 struct rt6_info *rt = NULL;
1362 struct net_device *dev = NULL;
1363 struct inet6_dev *idev = NULL;
c71099ac 1364 struct fib6_table *table;
1da177e4
LT
1365 int addr_type;
1366
86872cb5 1367 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1368 return -EINVAL;
1369#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1370 if (cfg->fc_src_len)
1da177e4
LT
1371 return -EINVAL;
1372#endif
86872cb5 1373 if (cfg->fc_ifindex) {
1da177e4 1374 err = -ENODEV;
5578689a 1375 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1376 if (!dev)
1377 goto out;
1378 idev = in6_dev_get(dev);
1379 if (!idev)
1380 goto out;
1381 }
1382
86872cb5
TG
1383 if (cfg->fc_metric == 0)
1384 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1385
d71314b4 1386 err = -ENOBUFS;
38308473
DM
1387 if (cfg->fc_nlinfo.nlh &&
1388 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1389 table = fib6_get_table(net, cfg->fc_table);
38308473 1390 if (!table) {
f3213831 1391 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1392 table = fib6_new_table(net, cfg->fc_table);
1393 }
1394 } else {
1395 table = fib6_new_table(net, cfg->fc_table);
1396 }
38308473
DM
1397
1398 if (!table)
c71099ac 1399 goto out;
c71099ac 1400
8b96d22d 1401 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1402
38308473 1403 if (!rt) {
1da177e4
LT
1404 err = -ENOMEM;
1405 goto out;
1406 }
1407
1716a961
G
1408 if (cfg->fc_flags & RTF_EXPIRES)
1409 rt6_set_expires(rt, jiffies +
1410 clock_t_to_jiffies(cfg->fc_expires));
1411 else
1412 rt6_clean_expires(rt);
1da177e4 1413
86872cb5
TG
1414 if (cfg->fc_protocol == RTPROT_UNSPEC)
1415 cfg->fc_protocol = RTPROT_BOOT;
1416 rt->rt6i_protocol = cfg->fc_protocol;
1417
1418 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1419
1420 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1421 rt->dst.input = ip6_mc_input;
ab79ad14
1422 else if (cfg->fc_flags & RTF_LOCAL)
1423 rt->dst.input = ip6_input;
1da177e4 1424 else
d8d1f30b 1425 rt->dst.input = ip6_forward;
1da177e4 1426
d8d1f30b 1427 rt->dst.output = ip6_output;
1da177e4 1428
86872cb5
TG
1429 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1430 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1431 if (rt->rt6i_dst.plen == 128)
11d53b49 1432 rt->dst.flags |= DST_HOST;
1da177e4 1433
8e2ec639
YZ
1434 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1435 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1436 if (!metrics) {
1437 err = -ENOMEM;
1438 goto out;
1439 }
1440 dst_init_metrics(&rt->dst, metrics, 0);
1441 }
1da177e4 1442#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1443 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1444 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1445#endif
1446
86872cb5 1447 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1448
1449 /* We cannot add true routes via loopback here,
1450 they would result in kernel looping; promote them to reject routes
1451 */
86872cb5 1452 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1453 (dev && (dev->flags & IFF_LOOPBACK) &&
1454 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1455 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1456 /* hold loopback dev/idev if we haven't done so. */
5578689a 1457 if (dev != net->loopback_dev) {
1da177e4
LT
1458 if (dev) {
1459 dev_put(dev);
1460 in6_dev_put(idev);
1461 }
5578689a 1462 dev = net->loopback_dev;
1da177e4
LT
1463 dev_hold(dev);
1464 idev = in6_dev_get(dev);
1465 if (!idev) {
1466 err = -ENODEV;
1467 goto out;
1468 }
1469 }
d8d1f30b
CG
1470 rt->dst.output = ip6_pkt_discard_out;
1471 rt->dst.input = ip6_pkt_discard;
1472 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1473 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1474 goto install_route;
1475 }
1476
86872cb5 1477 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1478 const struct in6_addr *gw_addr;
1da177e4
LT
1479 int gwa_type;
1480
86872cb5 1481 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1482 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1483 gwa_type = ipv6_addr_type(gw_addr);
1484
1485 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1486 struct rt6_info *grt;
1487
1488 /* IPv6 strictly inhibits using not link-local
1489 addresses as nexthop address.
1490 Otherwise, router will not able to send redirects.
1491 It is very good, but in some (rare!) circumstances
1492 (SIT, PtP, NBMA NOARP links) it is handy to allow
1493 some exceptions. --ANK
1494 */
1495 err = -EINVAL;
38308473 1496 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1497 goto out;
1498
5578689a 1499 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1500
1501 err = -EHOSTUNREACH;
38308473 1502 if (!grt)
1da177e4
LT
1503 goto out;
1504 if (dev) {
d1918542 1505 if (dev != grt->dst.dev) {
d8d1f30b 1506 dst_release(&grt->dst);
1da177e4
LT
1507 goto out;
1508 }
1509 } else {
d1918542 1510 dev = grt->dst.dev;
1da177e4
LT
1511 idev = grt->rt6i_idev;
1512 dev_hold(dev);
1513 in6_dev_hold(grt->rt6i_idev);
1514 }
38308473 1515 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1516 err = 0;
d8d1f30b 1517 dst_release(&grt->dst);
1da177e4
LT
1518
1519 if (err)
1520 goto out;
1521 }
1522 err = -EINVAL;
38308473 1523 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1524 goto out;
1525 }
1526
1527 err = -ENODEV;
38308473 1528 if (!dev)
1da177e4
LT
1529 goto out;
1530
c3968a85
DW
1531 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1532 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1533 err = -EINVAL;
1534 goto out;
1535 }
4e3fd7a0 1536 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1537 rt->rt6i_prefsrc.plen = 128;
1538 } else
1539 rt->rt6i_prefsrc.plen = 0;
1540
86872cb5 1541 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1542 err = rt6_bind_neighbour(rt, dev);
f83c7790 1543 if (err)
1da177e4 1544 goto out;
1da177e4
LT
1545 }
1546
86872cb5 1547 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1548
1549install_route:
86872cb5
TG
1550 if (cfg->fc_mx) {
1551 struct nlattr *nla;
1552 int remaining;
1553
1554 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1555 int type = nla_type(nla);
86872cb5
TG
1556
1557 if (type) {
1558 if (type > RTAX_MAX) {
1da177e4
LT
1559 err = -EINVAL;
1560 goto out;
1561 }
86872cb5 1562
defb3519 1563 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1564 }
1da177e4
LT
1565 }
1566 }
1567
d8d1f30b 1568 rt->dst.dev = dev;
1da177e4 1569 rt->rt6i_idev = idev;
c71099ac 1570 rt->rt6i_table = table;
63152fc0 1571
c346dca1 1572 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1573
86872cb5 1574 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1575
1576out:
1577 if (dev)
1578 dev_put(dev);
1579 if (idev)
1580 in6_dev_put(idev);
1581 if (rt)
d8d1f30b 1582 dst_free(&rt->dst);
1da177e4
LT
1583 return err;
1584}
1585
86872cb5 1586static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1587{
1588 int err;
c71099ac 1589 struct fib6_table *table;
d1918542 1590 struct net *net = dev_net(rt->dst.dev);
1da177e4 1591
8ed67789 1592 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1593 return -ENOENT;
1594
c71099ac
TG
1595 table = rt->rt6i_table;
1596 write_lock_bh(&table->tb6_lock);
1da177e4 1597
86872cb5 1598 err = fib6_del(rt, info);
d8d1f30b 1599 dst_release(&rt->dst);
1da177e4 1600
c71099ac 1601 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1602
1603 return err;
1604}
1605
e0a1ad73
TG
1606int ip6_del_rt(struct rt6_info *rt)
1607{
4d1169c1 1608 struct nl_info info = {
d1918542 1609 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1610 };
528c4ceb 1611 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1612}
1613
86872cb5 1614static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1615{
c71099ac 1616 struct fib6_table *table;
1da177e4
LT
1617 struct fib6_node *fn;
1618 struct rt6_info *rt;
1619 int err = -ESRCH;
1620
5578689a 1621 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1622 if (!table)
c71099ac
TG
1623 return err;
1624
1625 read_lock_bh(&table->tb6_lock);
1da177e4 1626
c71099ac 1627 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1628 &cfg->fc_dst, cfg->fc_dst_len,
1629 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1630
1da177e4 1631 if (fn) {
d8d1f30b 1632 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1633 if (cfg->fc_ifindex &&
d1918542
DM
1634 (!rt->dst.dev ||
1635 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1636 continue;
86872cb5
TG
1637 if (cfg->fc_flags & RTF_GATEWAY &&
1638 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1639 continue;
86872cb5 1640 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1641 continue;
d8d1f30b 1642 dst_hold(&rt->dst);
c71099ac 1643 read_unlock_bh(&table->tb6_lock);
1da177e4 1644
86872cb5 1645 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1646 }
1647 }
c71099ac 1648 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1649
1650 return err;
1651}
1652
6700c270 1653static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1654{
e8599ff4 1655 struct net *net = dev_net(skb->dev);
a6279458 1656 struct netevent_redirect netevent;
e8599ff4
DM
1657 struct rt6_info *rt, *nrt = NULL;
1658 const struct in6_addr *target;
e8599ff4 1659 struct ndisc_options ndopts;
6e157b6a
DM
1660 const struct in6_addr *dest;
1661 struct neighbour *old_neigh;
e8599ff4
DM
1662 struct inet6_dev *in6_dev;
1663 struct neighbour *neigh;
1664 struct icmp6hdr *icmph;
6e157b6a
DM
1665 int optlen, on_link;
1666 u8 *lladdr;
e8599ff4
DM
1667
1668 optlen = skb->tail - skb->transport_header;
1669 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1670
1671 if (optlen < 0) {
6e157b6a 1672 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1673 return;
1674 }
1675
1676 icmph = icmp6_hdr(skb);
1677 target = (const struct in6_addr *) (icmph + 1);
1678 dest = target + 1;
1679
1680 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1681 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1682 return;
1683 }
1684
6e157b6a 1685 on_link = 0;
e8599ff4
DM
1686 if (ipv6_addr_equal(dest, target)) {
1687 on_link = 1;
1688 } else if (ipv6_addr_type(target) !=
1689 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1690 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1691 return;
1692 }
1693
1694 in6_dev = __in6_dev_get(skb->dev);
1695 if (!in6_dev)
1696 return;
1697 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1698 return;
1699
1700 /* RFC2461 8.1:
1701 * The IP source address of the Redirect MUST be the same as the current
1702 * first-hop router for the specified ICMP Destination Address.
1703 */
1704
1705 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1706 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1707 return;
1708 }
6e157b6a
DM
1709
1710 lladdr = NULL;
e8599ff4
DM
1711 if (ndopts.nd_opts_tgt_lladdr) {
1712 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1713 skb->dev);
1714 if (!lladdr) {
1715 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1716 return;
1717 }
1718 }
1719
6e157b6a
DM
1720 rt = (struct rt6_info *) dst;
1721 if (rt == net->ipv6.ip6_null_entry) {
1722 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1723 return;
6e157b6a 1724 }
e8599ff4 1725
6e157b6a
DM
1726 /* Redirect received -> path was valid.
1727 * Look, redirects are sent only in response to data packets,
1728 * so that this nexthop apparently is reachable. --ANK
1729 */
1730 dst_confirm(&rt->dst);
a6279458 1731
6e157b6a
DM
1732 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1733 if (!neigh)
1734 return;
a6279458 1735
6e157b6a
DM
1736 /* Duplicate redirect: silently ignore. */
1737 old_neigh = rt->n;
1738 if (neigh == old_neigh)
a6279458 1739 goto out;
1da177e4 1740
1da177e4
LT
1741 /*
1742 * We have finally decided to accept it.
1743 */
1744
1ab1457c 1745 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1746 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1747 NEIGH_UPDATE_F_OVERRIDE|
1748 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1749 NEIGH_UPDATE_F_ISROUTER))
1750 );
1751
21efcfa0 1752 nrt = ip6_rt_copy(rt, dest);
38308473 1753 if (!nrt)
1da177e4
LT
1754 goto out;
1755
1756 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1757 if (on_link)
1758 nrt->rt6i_flags &= ~RTF_GATEWAY;
1759
4e3fd7a0 1760 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1761 nrt->n = neigh_clone(neigh);
1da177e4 1762
40e22e8f 1763 if (ip6_ins_rt(nrt))
1da177e4
LT
1764 goto out;
1765
d8d1f30b 1766 netevent.old = &rt->dst;
1d248b1c 1767 netevent.old_neigh = old_neigh;
d8d1f30b 1768 netevent.new = &nrt->dst;
1d248b1c
DM
1769 netevent.new_neigh = neigh;
1770 netevent.daddr = dest;
8d71740c
TT
1771 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1772
38308473 1773 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1774 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1775 ip6_del_rt(rt);
1da177e4
LT
1776 }
1777
1778out:
e8599ff4 1779 neigh_release(neigh);
6e157b6a
DM
1780}
1781
1da177e4
LT
1782/*
1783 * Misc support functions
1784 */
1785
1716a961 1786static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1787 const struct in6_addr *dest)
1da177e4 1788{
d1918542 1789 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1790 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1791 ort->rt6i_table);
1da177e4
LT
1792
1793 if (rt) {
d8d1f30b
CG
1794 rt->dst.input = ort->dst.input;
1795 rt->dst.output = ort->dst.output;
8e2ec639 1796 rt->dst.flags |= DST_HOST;
d8d1f30b 1797
4e3fd7a0 1798 rt->rt6i_dst.addr = *dest;
8e2ec639 1799 rt->rt6i_dst.plen = 128;
defb3519 1800 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1801 rt->dst.error = ort->dst.error;
1da177e4
LT
1802 rt->rt6i_idev = ort->rt6i_idev;
1803 if (rt->rt6i_idev)
1804 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1805 rt->dst.lastuse = jiffies;
1da177e4 1806
4e3fd7a0 1807 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1808 rt->rt6i_flags = ort->rt6i_flags;
1809 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1810 (RTF_DEFAULT | RTF_ADDRCONF))
1811 rt6_set_from(rt, ort);
1812 else
1813 rt6_clean_expires(rt);
1da177e4
LT
1814 rt->rt6i_metric = 0;
1815
1da177e4
LT
1816#ifdef CONFIG_IPV6_SUBTREES
1817 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1818#endif
0f6c6392 1819 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1820 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1821 }
1822 return rt;
1823}
1824
70ceb4f5 1825#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1826static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1827 const struct in6_addr *prefix, int prefixlen,
1828 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1829{
1830 struct fib6_node *fn;
1831 struct rt6_info *rt = NULL;
c71099ac
TG
1832 struct fib6_table *table;
1833
efa2cea0 1834 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1835 if (!table)
c71099ac 1836 return NULL;
70ceb4f5 1837
c71099ac
TG
1838 write_lock_bh(&table->tb6_lock);
1839 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1840 if (!fn)
1841 goto out;
1842
d8d1f30b 1843 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1844 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1845 continue;
1846 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1847 continue;
1848 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1849 continue;
d8d1f30b 1850 dst_hold(&rt->dst);
70ceb4f5
YH
1851 break;
1852 }
1853out:
c71099ac 1854 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1855 return rt;
1856}
1857
efa2cea0 1858static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1859 const struct in6_addr *prefix, int prefixlen,
1860 const struct in6_addr *gwaddr, int ifindex,
95c96174 1861 unsigned int pref)
70ceb4f5 1862{
86872cb5
TG
1863 struct fib6_config cfg = {
1864 .fc_table = RT6_TABLE_INFO,
238fc7ea 1865 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1866 .fc_ifindex = ifindex,
1867 .fc_dst_len = prefixlen,
1868 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1869 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1870 .fc_nlinfo.pid = 0,
1871 .fc_nlinfo.nlh = NULL,
1872 .fc_nlinfo.nl_net = net,
86872cb5
TG
1873 };
1874
4e3fd7a0
AD
1875 cfg.fc_dst = *prefix;
1876 cfg.fc_gateway = *gwaddr;
70ceb4f5 1877
e317da96
YH
1878 /* We should treat it as a default route if prefix length is 0. */
1879 if (!prefixlen)
86872cb5 1880 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1881
86872cb5 1882 ip6_route_add(&cfg);
70ceb4f5 1883
efa2cea0 1884 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1885}
1886#endif
1887
b71d1d42 1888struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1889{
1da177e4 1890 struct rt6_info *rt;
c71099ac 1891 struct fib6_table *table;
1da177e4 1892
c346dca1 1893 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1894 if (!table)
c71099ac 1895 return NULL;
1da177e4 1896
c71099ac 1897 write_lock_bh(&table->tb6_lock);
d8d1f30b 1898 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1899 if (dev == rt->dst.dev &&
045927ff 1900 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1901 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1902 break;
1903 }
1904 if (rt)
d8d1f30b 1905 dst_hold(&rt->dst);
c71099ac 1906 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1907 return rt;
1908}
1909
b71d1d42 1910struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1911 struct net_device *dev,
1912 unsigned int pref)
1da177e4 1913{
86872cb5
TG
1914 struct fib6_config cfg = {
1915 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1916 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1917 .fc_ifindex = dev->ifindex,
1918 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1919 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1920 .fc_nlinfo.pid = 0,
1921 .fc_nlinfo.nlh = NULL,
c346dca1 1922 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1923 };
1da177e4 1924
4e3fd7a0 1925 cfg.fc_gateway = *gwaddr;
1da177e4 1926
86872cb5 1927 ip6_route_add(&cfg);
1da177e4 1928
1da177e4
LT
1929 return rt6_get_dflt_router(gwaddr, dev);
1930}
1931
7b4da532 1932void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1933{
1934 struct rt6_info *rt;
c71099ac
TG
1935 struct fib6_table *table;
1936
1937 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1938 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1939 if (!table)
c71099ac 1940 return;
1da177e4
LT
1941
1942restart:
c71099ac 1943 read_lock_bh(&table->tb6_lock);
d8d1f30b 1944 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1945 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1946 dst_hold(&rt->dst);
c71099ac 1947 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1948 ip6_del_rt(rt);
1da177e4
LT
1949 goto restart;
1950 }
1951 }
c71099ac 1952 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1953}
1954
5578689a
DL
1955static void rtmsg_to_fib6_config(struct net *net,
1956 struct in6_rtmsg *rtmsg,
86872cb5
TG
1957 struct fib6_config *cfg)
1958{
1959 memset(cfg, 0, sizeof(*cfg));
1960
1961 cfg->fc_table = RT6_TABLE_MAIN;
1962 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1963 cfg->fc_metric = rtmsg->rtmsg_metric;
1964 cfg->fc_expires = rtmsg->rtmsg_info;
1965 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1966 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1967 cfg->fc_flags = rtmsg->rtmsg_flags;
1968
5578689a 1969 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1970
4e3fd7a0
AD
1971 cfg->fc_dst = rtmsg->rtmsg_dst;
1972 cfg->fc_src = rtmsg->rtmsg_src;
1973 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1974}
1975
5578689a 1976int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1977{
86872cb5 1978 struct fib6_config cfg;
1da177e4
LT
1979 struct in6_rtmsg rtmsg;
1980 int err;
1981
1982 switch(cmd) {
1983 case SIOCADDRT: /* Add a route */
1984 case SIOCDELRT: /* Delete a route */
1985 if (!capable(CAP_NET_ADMIN))
1986 return -EPERM;
1987 err = copy_from_user(&rtmsg, arg,
1988 sizeof(struct in6_rtmsg));
1989 if (err)
1990 return -EFAULT;
86872cb5 1991
5578689a 1992 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1993
1da177e4
LT
1994 rtnl_lock();
1995 switch (cmd) {
1996 case SIOCADDRT:
86872cb5 1997 err = ip6_route_add(&cfg);
1da177e4
LT
1998 break;
1999 case SIOCDELRT:
86872cb5 2000 err = ip6_route_del(&cfg);
1da177e4
LT
2001 break;
2002 default:
2003 err = -EINVAL;
2004 }
2005 rtnl_unlock();
2006
2007 return err;
3ff50b79 2008 }
1da177e4
LT
2009
2010 return -EINVAL;
2011}
2012
2013/*
2014 * Drop the packet on the floor
2015 */
2016
d5fdd6ba 2017static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2018{
612f09e8 2019 int type;
adf30907 2020 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2021 switch (ipstats_mib_noroutes) {
2022 case IPSTATS_MIB_INNOROUTES:
0660e03f 2023 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2024 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2025 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2026 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2027 break;
2028 }
2029 /* FALLTHROUGH */
2030 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2031 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032 ipstats_mib_noroutes);
612f09e8
YH
2033 break;
2034 }
3ffe533c 2035 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2036 kfree_skb(skb);
2037 return 0;
2038}
2039
9ce8ade0
TG
2040static int ip6_pkt_discard(struct sk_buff *skb)
2041{
612f09e8 2042 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2043}
2044
20380731 2045static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2046{
adf30907 2047 skb->dev = skb_dst(skb)->dev;
612f09e8 2048 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2049}
2050
6723ab54
DM
2051#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2052
9ce8ade0
TG
2053static int ip6_pkt_prohibit(struct sk_buff *skb)
2054{
612f09e8 2055 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2056}
2057
2058static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2059{
adf30907 2060 skb->dev = skb_dst(skb)->dev;
612f09e8 2061 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2062}
2063
6723ab54
DM
2064#endif
2065
1da177e4
LT
2066/*
2067 * Allocate a dst for local (unicast / anycast) address.
2068 */
2069
2070struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2071 const struct in6_addr *addr,
8f031519 2072 bool anycast)
1da177e4 2073{
c346dca1 2074 struct net *net = dev_net(idev->dev);
8b96d22d 2075 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2076 int err;
1da177e4 2077
38308473 2078 if (!rt) {
f3213831 2079 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2080 return ERR_PTR(-ENOMEM);
40385653 2081 }
1da177e4 2082
1da177e4
LT
2083 in6_dev_hold(idev);
2084
11d53b49 2085 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2086 rt->dst.input = ip6_input;
2087 rt->dst.output = ip6_output;
1da177e4 2088 rt->rt6i_idev = idev;
1da177e4
LT
2089
2090 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2091 if (anycast)
2092 rt->rt6i_flags |= RTF_ANYCAST;
2093 else
1da177e4 2094 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2095 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2096 if (err) {
d8d1f30b 2097 dst_free(&rt->dst);
f83c7790 2098 return ERR_PTR(err);
1da177e4
LT
2099 }
2100
4e3fd7a0 2101 rt->rt6i_dst.addr = *addr;
1da177e4 2102 rt->rt6i_dst.plen = 128;
5578689a 2103 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2104
d8d1f30b 2105 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2106
2107 return rt;
2108}
2109
c3968a85
DW
2110int ip6_route_get_saddr(struct net *net,
2111 struct rt6_info *rt,
b71d1d42 2112 const struct in6_addr *daddr,
c3968a85
DW
2113 unsigned int prefs,
2114 struct in6_addr *saddr)
2115{
2116 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2117 int err = 0;
2118 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2119 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2120 else
2121 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2122 daddr, prefs, saddr);
2123 return err;
2124}
2125
2126/* remove deleted ip from prefsrc entries */
2127struct arg_dev_net_ip {
2128 struct net_device *dev;
2129 struct net *net;
2130 struct in6_addr *addr;
2131};
2132
2133static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2134{
2135 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2136 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2137 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2138
d1918542 2139 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2140 rt != net->ipv6.ip6_null_entry &&
2141 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2142 /* remove prefsrc entry */
2143 rt->rt6i_prefsrc.plen = 0;
2144 }
2145 return 0;
2146}
2147
2148void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2149{
2150 struct net *net = dev_net(ifp->idev->dev);
2151 struct arg_dev_net_ip adni = {
2152 .dev = ifp->idev->dev,
2153 .net = net,
2154 .addr = &ifp->addr,
2155 };
2156 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2157}
2158
8ed67789
DL
2159struct arg_dev_net {
2160 struct net_device *dev;
2161 struct net *net;
2162};
2163
1da177e4
LT
2164static int fib6_ifdown(struct rt6_info *rt, void *arg)
2165{
bc3ef660 2166 const struct arg_dev_net *adn = arg;
2167 const struct net_device *dev = adn->dev;
8ed67789 2168
d1918542 2169 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2170 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2171 return -1;
c159d30c 2172
1da177e4
LT
2173 return 0;
2174}
2175
f3db4851 2176void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2177{
8ed67789
DL
2178 struct arg_dev_net adn = {
2179 .dev = dev,
2180 .net = net,
2181 };
2182
2183 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2184 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2185}
2186
95c96174 2187struct rt6_mtu_change_arg {
1da177e4 2188 struct net_device *dev;
95c96174 2189 unsigned int mtu;
1da177e4
LT
2190};
2191
2192static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2193{
2194 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2195 struct inet6_dev *idev;
2196
2197 /* In IPv6 pmtu discovery is not optional,
2198 so that RTAX_MTU lock cannot disable it.
2199 We still use this lock to block changes
2200 caused by addrconf/ndisc.
2201 */
2202
2203 idev = __in6_dev_get(arg->dev);
38308473 2204 if (!idev)
1da177e4
LT
2205 return 0;
2206
2207 /* For administrative MTU increase, there is no way to discover
2208 IPv6 PMTU increase, so PMTU increase should be updated here.
2209 Since RFC 1981 doesn't include administrative MTU increase
2210 update PMTU increase is a MUST. (i.e. jumbo frame)
2211 */
2212 /*
2213 If new MTU is less than route PMTU, this new MTU will be the
2214 lowest MTU in the path, update the route PMTU to reflect PMTU
2215 decreases; if new MTU is greater than route PMTU, and the
2216 old MTU is the lowest MTU in the path, update the route PMTU
2217 to reflect the increase. In this case if the other nodes' MTU
2218 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2219 PMTU discouvery.
2220 */
d1918542 2221 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2222 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2223 (dst_mtu(&rt->dst) >= arg->mtu ||
2224 (dst_mtu(&rt->dst) < arg->mtu &&
2225 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2226 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2227 }
1da177e4
LT
2228 return 0;
2229}
2230
95c96174 2231void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2232{
c71099ac
TG
2233 struct rt6_mtu_change_arg arg = {
2234 .dev = dev,
2235 .mtu = mtu,
2236 };
1da177e4 2237
c346dca1 2238 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2239}
2240
ef7c79ed 2241static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2242 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2243 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2244 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2245 [RTA_PRIORITY] = { .type = NLA_U32 },
2246 [RTA_METRICS] = { .type = NLA_NESTED },
2247};
2248
2249static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2250 struct fib6_config *cfg)
1da177e4 2251{
86872cb5
TG
2252 struct rtmsg *rtm;
2253 struct nlattr *tb[RTA_MAX+1];
2254 int err;
1da177e4 2255
86872cb5
TG
2256 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2257 if (err < 0)
2258 goto errout;
1da177e4 2259
86872cb5
TG
2260 err = -EINVAL;
2261 rtm = nlmsg_data(nlh);
2262 memset(cfg, 0, sizeof(*cfg));
2263
2264 cfg->fc_table = rtm->rtm_table;
2265 cfg->fc_dst_len = rtm->rtm_dst_len;
2266 cfg->fc_src_len = rtm->rtm_src_len;
2267 cfg->fc_flags = RTF_UP;
2268 cfg->fc_protocol = rtm->rtm_protocol;
2269
2270 if (rtm->rtm_type == RTN_UNREACHABLE)
2271 cfg->fc_flags |= RTF_REJECT;
2272
ab79ad14
2273 if (rtm->rtm_type == RTN_LOCAL)
2274 cfg->fc_flags |= RTF_LOCAL;
2275
86872cb5
TG
2276 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2277 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2278 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2279
2280 if (tb[RTA_GATEWAY]) {
2281 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2282 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2283 }
86872cb5
TG
2284
2285 if (tb[RTA_DST]) {
2286 int plen = (rtm->rtm_dst_len + 7) >> 3;
2287
2288 if (nla_len(tb[RTA_DST]) < plen)
2289 goto errout;
2290
2291 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2292 }
86872cb5
TG
2293
2294 if (tb[RTA_SRC]) {
2295 int plen = (rtm->rtm_src_len + 7) >> 3;
2296
2297 if (nla_len(tb[RTA_SRC]) < plen)
2298 goto errout;
2299
2300 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2301 }
86872cb5 2302
c3968a85
DW
2303 if (tb[RTA_PREFSRC])
2304 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2305
86872cb5
TG
2306 if (tb[RTA_OIF])
2307 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2308
2309 if (tb[RTA_PRIORITY])
2310 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2311
2312 if (tb[RTA_METRICS]) {
2313 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2314 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2315 }
86872cb5
TG
2316
2317 if (tb[RTA_TABLE])
2318 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2319
2320 err = 0;
2321errout:
2322 return err;
1da177e4
LT
2323}
2324
c127ea2c 2325static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2326{
86872cb5
TG
2327 struct fib6_config cfg;
2328 int err;
1da177e4 2329
86872cb5
TG
2330 err = rtm_to_fib6_config(skb, nlh, &cfg);
2331 if (err < 0)
2332 return err;
2333
2334 return ip6_route_del(&cfg);
1da177e4
LT
2335}
2336
c127ea2c 2337static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2338{
86872cb5
TG
2339 struct fib6_config cfg;
2340 int err;
1da177e4 2341
86872cb5
TG
2342 err = rtm_to_fib6_config(skb, nlh, &cfg);
2343 if (err < 0)
2344 return err;
2345
2346 return ip6_route_add(&cfg);
1da177e4
LT
2347}
2348
339bf98f
TG
2349static inline size_t rt6_nlmsg_size(void)
2350{
2351 return NLMSG_ALIGN(sizeof(struct rtmsg))
2352 + nla_total_size(16) /* RTA_SRC */
2353 + nla_total_size(16) /* RTA_DST */
2354 + nla_total_size(16) /* RTA_GATEWAY */
2355 + nla_total_size(16) /* RTA_PREFSRC */
2356 + nla_total_size(4) /* RTA_TABLE */
2357 + nla_total_size(4) /* RTA_IIF */
2358 + nla_total_size(4) /* RTA_OIF */
2359 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2360 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2361 + nla_total_size(sizeof(struct rta_cacheinfo));
2362}
2363
191cd582
BH
2364static int rt6_fill_node(struct net *net,
2365 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2366 struct in6_addr *dst, struct in6_addr *src,
2367 int iif, int type, u32 pid, u32 seq,
7bc570c8 2368 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2369{
2370 struct rtmsg *rtm;
2d7202bf 2371 struct nlmsghdr *nlh;
e3703b3d 2372 long expires;
9e762a4a 2373 u32 table;
f2c31e32 2374 struct neighbour *n;
1da177e4
LT
2375
2376 if (prefix) { /* user wants prefix routes only */
2377 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2378 /* success since this is not a prefix route */
2379 return 1;
2380 }
2381 }
2382
2d7202bf 2383 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2384 if (!nlh)
26932566 2385 return -EMSGSIZE;
2d7202bf
TG
2386
2387 rtm = nlmsg_data(nlh);
1da177e4
LT
2388 rtm->rtm_family = AF_INET6;
2389 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2390 rtm->rtm_src_len = rt->rt6i_src.plen;
2391 rtm->rtm_tos = 0;
c71099ac 2392 if (rt->rt6i_table)
9e762a4a 2393 table = rt->rt6i_table->tb6_id;
c71099ac 2394 else
9e762a4a
PM
2395 table = RT6_TABLE_UNSPEC;
2396 rtm->rtm_table = table;
c78679e8
DM
2397 if (nla_put_u32(skb, RTA_TABLE, table))
2398 goto nla_put_failure;
38308473 2399 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2400 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2401 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2402 rtm->rtm_type = RTN_LOCAL;
d1918542 2403 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2404 rtm->rtm_type = RTN_LOCAL;
2405 else
2406 rtm->rtm_type = RTN_UNICAST;
2407 rtm->rtm_flags = 0;
2408 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2409 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2410 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2411 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2412 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2413 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2414 rtm->rtm_protocol = RTPROT_RA;
2415 else
2416 rtm->rtm_protocol = RTPROT_KERNEL;
2417 }
1da177e4 2418
38308473 2419 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2420 rtm->rtm_flags |= RTM_F_CLONED;
2421
2422 if (dst) {
c78679e8
DM
2423 if (nla_put(skb, RTA_DST, 16, dst))
2424 goto nla_put_failure;
1ab1457c 2425 rtm->rtm_dst_len = 128;
1da177e4 2426 } else if (rtm->rtm_dst_len)
c78679e8
DM
2427 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2428 goto nla_put_failure;
1da177e4
LT
2429#ifdef CONFIG_IPV6_SUBTREES
2430 if (src) {
c78679e8
DM
2431 if (nla_put(skb, RTA_SRC, 16, src))
2432 goto nla_put_failure;
1ab1457c 2433 rtm->rtm_src_len = 128;
c78679e8
DM
2434 } else if (rtm->rtm_src_len &&
2435 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2436 goto nla_put_failure;
1da177e4 2437#endif
7bc570c8
YH
2438 if (iif) {
2439#ifdef CONFIG_IPV6_MROUTE
2440 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2441 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2442 if (err <= 0) {
2443 if (!nowait) {
2444 if (err == 0)
2445 return 0;
2446 goto nla_put_failure;
2447 } else {
2448 if (err == -EMSGSIZE)
2449 goto nla_put_failure;
2450 }
2451 }
2452 } else
2453#endif
c78679e8
DM
2454 if (nla_put_u32(skb, RTA_IIF, iif))
2455 goto nla_put_failure;
7bc570c8 2456 } else if (dst) {
1da177e4 2457 struct in6_addr saddr_buf;
c78679e8
DM
2458 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2459 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2460 goto nla_put_failure;
1da177e4 2461 }
2d7202bf 2462
c3968a85
DW
2463 if (rt->rt6i_prefsrc.plen) {
2464 struct in6_addr saddr_buf;
4e3fd7a0 2465 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2466 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2467 goto nla_put_failure;
c3968a85
DW
2468 }
2469
defb3519 2470 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2471 goto nla_put_failure;
2472
f2c31e32 2473 rcu_read_lock();
97cac082 2474 n = rt->n;
94f826b8
ED
2475 if (n) {
2476 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2477 rcu_read_unlock();
2478 goto nla_put_failure;
2479 }
2480 }
f2c31e32 2481 rcu_read_unlock();
2d7202bf 2482
c78679e8
DM
2483 if (rt->dst.dev &&
2484 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2485 goto nla_put_failure;
2486 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2487 goto nla_put_failure;
8253947e
LW
2488
2489 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2490
87a50699 2491 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2492 goto nla_put_failure;
2d7202bf
TG
2493
2494 return nlmsg_end(skb, nlh);
2495
2496nla_put_failure:
26932566
PM
2497 nlmsg_cancel(skb, nlh);
2498 return -EMSGSIZE;
1da177e4
LT
2499}
2500
1b43af54 2501int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2502{
2503 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2504 int prefix;
2505
2d7202bf
TG
2506 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2507 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2508 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2509 } else
2510 prefix = 0;
2511
191cd582
BH
2512 return rt6_fill_node(arg->net,
2513 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2514 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2515 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2516}
2517
c127ea2c 2518static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2519{
3b1e0a65 2520 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2521 struct nlattr *tb[RTA_MAX+1];
2522 struct rt6_info *rt;
1da177e4 2523 struct sk_buff *skb;
ab364a6f 2524 struct rtmsg *rtm;
4c9483b2 2525 struct flowi6 fl6;
72331bc0 2526 int err, iif = 0, oif = 0;
1da177e4 2527
ab364a6f
TG
2528 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2529 if (err < 0)
2530 goto errout;
1da177e4 2531
ab364a6f 2532 err = -EINVAL;
4c9483b2 2533 memset(&fl6, 0, sizeof(fl6));
1da177e4 2534
ab364a6f
TG
2535 if (tb[RTA_SRC]) {
2536 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2537 goto errout;
2538
4e3fd7a0 2539 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2540 }
2541
2542 if (tb[RTA_DST]) {
2543 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2544 goto errout;
2545
4e3fd7a0 2546 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2547 }
2548
2549 if (tb[RTA_IIF])
2550 iif = nla_get_u32(tb[RTA_IIF]);
2551
2552 if (tb[RTA_OIF])
72331bc0 2553 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2554
2555 if (iif) {
2556 struct net_device *dev;
72331bc0
SL
2557 int flags = 0;
2558
5578689a 2559 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2560 if (!dev) {
2561 err = -ENODEV;
ab364a6f 2562 goto errout;
1da177e4 2563 }
72331bc0
SL
2564
2565 fl6.flowi6_iif = iif;
2566
2567 if (!ipv6_addr_any(&fl6.saddr))
2568 flags |= RT6_LOOKUP_F_HAS_SADDR;
2569
2570 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2571 flags);
2572 } else {
2573 fl6.flowi6_oif = oif;
2574
2575 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2576 }
2577
ab364a6f 2578 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2579 if (!skb) {
2173bff5 2580 dst_release(&rt->dst);
ab364a6f
TG
2581 err = -ENOBUFS;
2582 goto errout;
2583 }
1da177e4 2584
ab364a6f
TG
2585 /* Reserve room for dummy headers, this skb can pass
2586 through good chunk of routing engine.
2587 */
459a98ed 2588 skb_reset_mac_header(skb);
ab364a6f 2589 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2590
d8d1f30b 2591 skb_dst_set(skb, &rt->dst);
1da177e4 2592
4c9483b2 2593 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2594 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2595 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2596 if (err < 0) {
ab364a6f
TG
2597 kfree_skb(skb);
2598 goto errout;
1da177e4
LT
2599 }
2600
5578689a 2601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2602errout:
1da177e4 2603 return err;
1da177e4
LT
2604}
2605
86872cb5 2606void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2607{
2608 struct sk_buff *skb;
5578689a 2609 struct net *net = info->nl_net;
528c4ceb
DL
2610 u32 seq;
2611 int err;
2612
2613 err = -ENOBUFS;
38308473 2614 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2615
339bf98f 2616 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2617 if (!skb)
21713ebc
TG
2618 goto errout;
2619
191cd582 2620 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2621 event, info->pid, seq, 0, 0, 0);
26932566
PM
2622 if (err < 0) {
2623 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624 WARN_ON(err == -EMSGSIZE);
2625 kfree_skb(skb);
2626 goto errout;
2627 }
1ce85fe4
PNA
2628 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2629 info->nlh, gfp_any());
2630 return;
21713ebc
TG
2631errout:
2632 if (err < 0)
5578689a 2633 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2634}
2635
8ed67789
DL
2636static int ip6_route_dev_notify(struct notifier_block *this,
2637 unsigned long event, void *data)
2638{
2639 struct net_device *dev = (struct net_device *)data;
c346dca1 2640 struct net *net = dev_net(dev);
8ed67789
DL
2641
2642 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2643 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2644 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2645#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2646 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2647 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2648 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2649 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2650#endif
2651 }
2652
2653 return NOTIFY_OK;
2654}
2655
1da177e4
LT
2656/*
2657 * /proc
2658 */
2659
2660#ifdef CONFIG_PROC_FS
2661
1da177e4
LT
2662struct rt6_proc_arg
2663{
2664 char *buffer;
2665 int offset;
2666 int length;
2667 int skip;
2668 int len;
2669};
2670
2671static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2672{
33120b30 2673 struct seq_file *m = p_arg;
69cce1d1 2674 struct neighbour *n;
1da177e4 2675
4b7a4274 2676 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2677
2678#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2679 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2680#else
33120b30 2681 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2682#endif
f2c31e32 2683 rcu_read_lock();
97cac082 2684 n = rt->n;
69cce1d1
DM
2685 if (n) {
2686 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2687 } else {
33120b30 2688 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2689 }
f2c31e32 2690 rcu_read_unlock();
33120b30 2691 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2692 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693 rt->dst.__use, rt->rt6i_flags,
d1918542 2694 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2695 return 0;
2696}
2697
33120b30 2698static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2699{
f3db4851 2700 struct net *net = (struct net *)m->private;
32b293a5 2701 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2702 return 0;
2703}
1da177e4 2704
33120b30
AD
2705static int ipv6_route_open(struct inode *inode, struct file *file)
2706{
de05c557 2707 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2708}
2709
33120b30
AD
2710static const struct file_operations ipv6_route_proc_fops = {
2711 .owner = THIS_MODULE,
2712 .open = ipv6_route_open,
2713 .read = seq_read,
2714 .llseek = seq_lseek,
b6fcbdb4 2715 .release = single_release_net,
33120b30
AD
2716};
2717
1da177e4
LT
2718static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2719{
69ddb805 2720 struct net *net = (struct net *)seq->private;
1da177e4 2721 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2722 net->ipv6.rt6_stats->fib_nodes,
2723 net->ipv6.rt6_stats->fib_route_nodes,
2724 net->ipv6.rt6_stats->fib_rt_alloc,
2725 net->ipv6.rt6_stats->fib_rt_entries,
2726 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2727 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2728 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2729
2730 return 0;
2731}
2732
2733static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2734{
de05c557 2735 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2736}
2737
9a32144e 2738static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2739 .owner = THIS_MODULE,
2740 .open = rt6_stats_seq_open,
2741 .read = seq_read,
2742 .llseek = seq_lseek,
b6fcbdb4 2743 .release = single_release_net,
1da177e4
LT
2744};
2745#endif /* CONFIG_PROC_FS */
2746
2747#ifdef CONFIG_SYSCTL
2748
1da177e4 2749static
8d65af78 2750int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2751 void __user *buffer, size_t *lenp, loff_t *ppos)
2752{
c486da34
LAG
2753 struct net *net;
2754 int delay;
2755 if (!write)
1da177e4 2756 return -EINVAL;
c486da34
LAG
2757
2758 net = (struct net *)ctl->extra1;
2759 delay = net->ipv6.sysctl.flush_delay;
2760 proc_dointvec(ctl, write, buffer, lenp, ppos);
2761 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2762 return 0;
1da177e4
LT
2763}
2764
760f2d01 2765ctl_table ipv6_route_table_template[] = {
1ab1457c 2766 {
1da177e4 2767 .procname = "flush",
4990509f 2768 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2769 .maxlen = sizeof(int),
89c8b3a1 2770 .mode = 0200,
6d9f239a 2771 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2772 },
2773 {
1da177e4 2774 .procname = "gc_thresh",
9a7ec3a9 2775 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2776 .maxlen = sizeof(int),
2777 .mode = 0644,
6d9f239a 2778 .proc_handler = proc_dointvec,
1da177e4
LT
2779 },
2780 {
1da177e4 2781 .procname = "max_size",
4990509f 2782 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2783 .maxlen = sizeof(int),
2784 .mode = 0644,
6d9f239a 2785 .proc_handler = proc_dointvec,
1da177e4
LT
2786 },
2787 {
1da177e4 2788 .procname = "gc_min_interval",
4990509f 2789 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2790 .maxlen = sizeof(int),
2791 .mode = 0644,
6d9f239a 2792 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2793 },
2794 {
1da177e4 2795 .procname = "gc_timeout",
4990509f 2796 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2797 .maxlen = sizeof(int),
2798 .mode = 0644,
6d9f239a 2799 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2800 },
2801 {
1da177e4 2802 .procname = "gc_interval",
4990509f 2803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2804 .maxlen = sizeof(int),
2805 .mode = 0644,
6d9f239a 2806 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2807 },
2808 {
1da177e4 2809 .procname = "gc_elasticity",
4990509f 2810 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2811 .maxlen = sizeof(int),
2812 .mode = 0644,
f3d3f616 2813 .proc_handler = proc_dointvec,
1da177e4
LT
2814 },
2815 {
1da177e4 2816 .procname = "mtu_expires",
4990509f 2817 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2818 .maxlen = sizeof(int),
2819 .mode = 0644,
6d9f239a 2820 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2821 },
2822 {
1da177e4 2823 .procname = "min_adv_mss",
4990509f 2824 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2825 .maxlen = sizeof(int),
2826 .mode = 0644,
f3d3f616 2827 .proc_handler = proc_dointvec,
1da177e4
LT
2828 },
2829 {
1da177e4 2830 .procname = "gc_min_interval_ms",
4990509f 2831 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2832 .maxlen = sizeof(int),
2833 .mode = 0644,
6d9f239a 2834 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2835 },
f8572d8f 2836 { }
1da177e4
LT
2837};
2838
2c8c1e72 2839struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2840{
2841 struct ctl_table *table;
2842
2843 table = kmemdup(ipv6_route_table_template,
2844 sizeof(ipv6_route_table_template),
2845 GFP_KERNEL);
5ee09105
YH
2846
2847 if (table) {
2848 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2849 table[0].extra1 = net;
86393e52 2850 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2851 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2852 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2853 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2854 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2855 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2856 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2857 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2858 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2859 }
2860
760f2d01
DL
2861 return table;
2862}
1da177e4
LT
2863#endif
2864
2c8c1e72 2865static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2866{
633d424b 2867 int ret = -ENOMEM;
8ed67789 2868
86393e52
AD
2869 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2870 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2871
fc66f95c
ED
2872 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2873 goto out_ip6_dst_ops;
2874
8ed67789
DL
2875 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2876 sizeof(*net->ipv6.ip6_null_entry),
2877 GFP_KERNEL);
2878 if (!net->ipv6.ip6_null_entry)
fc66f95c 2879 goto out_ip6_dst_entries;
d8d1f30b 2880 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2881 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2882 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2883 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2884 ip6_template_metrics, true);
8ed67789
DL
2885
2886#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2887 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2888 sizeof(*net->ipv6.ip6_prohibit_entry),
2889 GFP_KERNEL);
68fffc67
PZ
2890 if (!net->ipv6.ip6_prohibit_entry)
2891 goto out_ip6_null_entry;
d8d1f30b 2892 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2893 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2894 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2895 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2896 ip6_template_metrics, true);
8ed67789
DL
2897
2898 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2899 sizeof(*net->ipv6.ip6_blk_hole_entry),
2900 GFP_KERNEL);
68fffc67
PZ
2901 if (!net->ipv6.ip6_blk_hole_entry)
2902 goto out_ip6_prohibit_entry;
d8d1f30b 2903 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2904 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2905 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2906 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2907 ip6_template_metrics, true);
8ed67789
DL
2908#endif
2909
b339a47c
PZ
2910 net->ipv6.sysctl.flush_delay = 0;
2911 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2912 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2913 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2914 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2915 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2916 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2917 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2918
6891a346
BT
2919 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2920
8ed67789
DL
2921 ret = 0;
2922out:
2923 return ret;
f2fc6a54 2924
68fffc67
PZ
2925#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926out_ip6_prohibit_entry:
2927 kfree(net->ipv6.ip6_prohibit_entry);
2928out_ip6_null_entry:
2929 kfree(net->ipv6.ip6_null_entry);
2930#endif
fc66f95c
ED
2931out_ip6_dst_entries:
2932 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2933out_ip6_dst_ops:
f2fc6a54 2934 goto out;
cdb18761
DL
2935}
2936
2c8c1e72 2937static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2938{
8ed67789
DL
2939 kfree(net->ipv6.ip6_null_entry);
2940#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2941 kfree(net->ipv6.ip6_prohibit_entry);
2942 kfree(net->ipv6.ip6_blk_hole_entry);
2943#endif
41bb78b4 2944 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2945}
2946
d189634e
TG
2947static int __net_init ip6_route_net_init_late(struct net *net)
2948{
2949#ifdef CONFIG_PROC_FS
2950 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2951 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2952#endif
2953 return 0;
2954}
2955
2956static void __net_exit ip6_route_net_exit_late(struct net *net)
2957{
2958#ifdef CONFIG_PROC_FS
2959 proc_net_remove(net, "ipv6_route");
2960 proc_net_remove(net, "rt6_stats");
2961#endif
2962}
2963
cdb18761
DL
2964static struct pernet_operations ip6_route_net_ops = {
2965 .init = ip6_route_net_init,
2966 .exit = ip6_route_net_exit,
2967};
2968
c3426b47
DM
2969static int __net_init ipv6_inetpeer_init(struct net *net)
2970{
2971 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2972
2973 if (!bp)
2974 return -ENOMEM;
2975 inet_peer_base_init(bp);
2976 net->ipv6.peers = bp;
2977 return 0;
2978}
2979
2980static void __net_exit ipv6_inetpeer_exit(struct net *net)
2981{
2982 struct inet_peer_base *bp = net->ipv6.peers;
2983
2984 net->ipv6.peers = NULL;
56a6b248 2985 inetpeer_invalidate_tree(bp);
c3426b47
DM
2986 kfree(bp);
2987}
2988
2b823f72 2989static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
2990 .init = ipv6_inetpeer_init,
2991 .exit = ipv6_inetpeer_exit,
2992};
2993
d189634e
TG
2994static struct pernet_operations ip6_route_net_late_ops = {
2995 .init = ip6_route_net_init_late,
2996 .exit = ip6_route_net_exit_late,
2997};
2998
8ed67789
DL
2999static struct notifier_block ip6_route_dev_notifier = {
3000 .notifier_call = ip6_route_dev_notify,
3001 .priority = 0,
3002};
3003
433d49c3 3004int __init ip6_route_init(void)
1da177e4 3005{
433d49c3
DL
3006 int ret;
3007
9a7ec3a9
DL
3008 ret = -ENOMEM;
3009 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3010 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3011 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3012 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3013 goto out;
14e50e57 3014
fc66f95c 3015 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3016 if (ret)
bdb3289f 3017 goto out_kmem_cache;
bdb3289f 3018
c3426b47
DM
3019 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3020 if (ret)
e8803b6c 3021 goto out_dst_entries;
2a0c451a 3022
7e52b33b
DM
3023 ret = register_pernet_subsys(&ip6_route_net_ops);
3024 if (ret)
3025 goto out_register_inetpeer;
c3426b47 3026
5dc121e9
AE
3027 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3028
8ed67789
DL
3029 /* Registering of the loopback is done before this portion of code,
3030 * the loopback reference in rt6_info will not be taken, do it
3031 * manually for init_net */
d8d1f30b 3032 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3033 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3034 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3035 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3036 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3037 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3038 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3039 #endif
e8803b6c 3040 ret = fib6_init();
433d49c3 3041 if (ret)
8ed67789 3042 goto out_register_subsys;
433d49c3 3043
433d49c3
DL
3044 ret = xfrm6_init();
3045 if (ret)
e8803b6c 3046 goto out_fib6_init;
c35b7e72 3047
433d49c3
DL
3048 ret = fib6_rules_init();
3049 if (ret)
3050 goto xfrm6_init;
7e5449c2 3051
d189634e
TG
3052 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3053 if (ret)
3054 goto fib6_rules_init;
3055
433d49c3 3056 ret = -ENOBUFS;
c7ac8679
GR
3057 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3058 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3059 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3060 goto out_register_late_subsys;
c127ea2c 3061
8ed67789 3062 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3063 if (ret)
d189634e 3064 goto out_register_late_subsys;
8ed67789 3065
433d49c3
DL
3066out:
3067 return ret;
3068
d189634e
TG
3069out_register_late_subsys:
3070 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3071fib6_rules_init:
433d49c3
DL
3072 fib6_rules_cleanup();
3073xfrm6_init:
433d49c3 3074 xfrm6_fini();
2a0c451a
TG
3075out_fib6_init:
3076 fib6_gc_cleanup();
8ed67789
DL
3077out_register_subsys:
3078 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3079out_register_inetpeer:
3080 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3081out_dst_entries:
3082 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3083out_kmem_cache:
f2fc6a54 3084 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3085 goto out;
1da177e4
LT
3086}
3087
3088void ip6_route_cleanup(void)
3089{
8ed67789 3090 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3091 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3092 fib6_rules_cleanup();
1da177e4 3093 xfrm6_fini();
1da177e4 3094 fib6_gc_cleanup();
c3426b47 3095 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3096 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3097 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3098 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3099}
This page took 1.132111 seconds and 5 git commands to generate.