net: IPv6 fib lookup tracepoint
[deliverable/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4
LT
66
67#include <asm/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex,
95c96174 106 unsigned int pref);
efa2cea0 107static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
108 const struct in6_addr *prefix, int prefixlen,
109 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
110#endif
111
8d0b94af
MKL
112struct uncached_list {
113 spinlock_t lock;
114 struct list_head head;
115};
116
117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118
119static void rt6_uncached_list_add(struct rt6_info *rt)
120{
121 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122
123 rt->dst.flags |= DST_NOCACHE;
124 rt->rt6i_uncached_list = ul;
125
126 spin_lock_bh(&ul->lock);
127 list_add_tail(&rt->rt6i_uncached, &ul->head);
128 spin_unlock_bh(&ul->lock);
129}
130
131static void rt6_uncached_list_del(struct rt6_info *rt)
132{
133 if (!list_empty(&rt->rt6i_uncached)) {
134 struct uncached_list *ul = rt->rt6i_uncached_list;
135
136 spin_lock_bh(&ul->lock);
137 list_del(&rt->rt6i_uncached);
138 spin_unlock_bh(&ul->lock);
139 }
140}
141
142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143{
144 struct net_device *loopback_dev = net->loopback_dev;
145 int cpu;
146
e332bc67
EB
147 if (dev == loopback_dev)
148 return;
149
8d0b94af
MKL
150 for_each_possible_cpu(cpu) {
151 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 struct rt6_info *rt;
153
154 spin_lock_bh(&ul->lock);
155 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 struct inet6_dev *rt_idev = rt->rt6i_idev;
157 struct net_device *rt_dev = rt->dst.dev;
158
e332bc67 159 if (rt_idev->dev == dev) {
8d0b94af
MKL
160 rt->rt6i_idev = in6_dev_get(loopback_dev);
161 in6_dev_put(rt_idev);
162 }
163
e332bc67 164 if (rt_dev == dev) {
8d0b94af
MKL
165 rt->dst.dev = loopback_dev;
166 dev_hold(rt->dst.dev);
167 dev_put(rt_dev);
168 }
169 }
170 spin_unlock_bh(&ul->lock);
171 }
172}
173
d52d3997
MKL
174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175{
176 return dst_metrics_write_ptr(rt->dst.from);
177}
178
06582540
DM
179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180{
4b32b5ad 181 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 182
d52d3997
MKL
183 if (rt->rt6i_flags & RTF_PCPU)
184 return rt6_pcpu_cow_metrics(rt);
185 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
186 return NULL;
187 else
3b471175 188 return dst_cow_metrics_generic(dst, old);
06582540
DM
189}
190
f894cbf8
DM
191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 struct sk_buff *skb,
193 const void *daddr)
39232973
DM
194{
195 struct in6_addr *p = &rt->rt6i_gateway;
196
a7563f34 197 if (!ipv6_addr_any(p))
39232973 198 return (const void *) p;
f894cbf8
DM
199 else if (skb)
200 return &ipv6_hdr(skb)->daddr;
39232973
DM
201 return daddr;
202}
203
f894cbf8
DM
204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct rt6_info *rt = (struct rt6_info *) dst;
209 struct neighbour *n;
210
f894cbf8 211 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 212 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
213 if (n)
214 return n;
215 return neigh_create(&nd_tbl, daddr, dst->dev);
216}
217
9a7ec3a9 218static struct dst_ops ip6_dst_ops_template = {
1da177e4 219 .family = AF_INET6,
1da177e4
LT
220 .gc = ip6_dst_gc,
221 .gc_thresh = 1024,
222 .check = ip6_dst_check,
0dbaee3b 223 .default_advmss = ip6_default_advmss,
ebb762f2 224 .mtu = ip6_mtu,
06582540 225 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
226 .destroy = ip6_dst_destroy,
227 .ifdown = ip6_dst_ifdown,
228 .negative_advice = ip6_negative_advice,
229 .link_failure = ip6_link_failure,
230 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 231 .redirect = rt6_do_redirect,
9f8955cc 232 .local_out = __ip6_local_out,
d3aaeb38 233 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
234};
235
ebb762f2 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 237{
618f9bc7
SK
238 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239
240 return mtu ? : dst->dev->mtu;
ec831ea7
RD
241}
242
6700c270
DM
243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 struct sk_buff *skb, u32 mtu)
14e50e57
DM
245{
246}
247
6700c270
DM
248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 struct sk_buff *skb)
b587ee3b
DM
250{
251}
252
14e50e57
DM
253static struct dst_ops ip6_dst_blackhole_ops = {
254 .family = AF_INET6,
14e50e57
DM
255 .destroy = ip6_dst_destroy,
256 .check = ip6_dst_check,
ebb762f2 257 .mtu = ip6_blackhole_mtu,
214f45c9 258 .default_advmss = ip6_default_advmss,
14e50e57 259 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 260 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 261 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 262 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
263};
264
62fa8a84 265static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 266 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
267};
268
fb0af4c7 269static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
2c20cbd7 273 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 274 .error = -ENETUNREACH,
d8d1f30b
CG
275 .input = ip6_pkt_discard,
276 .output = ip6_pkt_discard_out,
1da177e4
LT
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 279 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282};
283
101367c2
TG
284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
285
fb0af4c7 286static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
287 .dst = {
288 .__refcnt = ATOMIC_INIT(1),
289 .__use = 1,
2c20cbd7 290 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 291 .error = -EACCES,
d8d1f30b
CG
292 .input = ip6_pkt_prohibit,
293 .output = ip6_pkt_prohibit_out,
101367c2
TG
294 },
295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 296 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
297 .rt6i_metric = ~(u32) 0,
298 .rt6i_ref = ATOMIC_INIT(1),
299};
300
fb0af4c7 301static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -EINVAL,
d8d1f30b 307 .input = dst_discard,
ede2059d 308 .output = dst_discard_out,
101367c2
TG
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 311 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
312 .rt6i_metric = ~(u32) 0,
313 .rt6i_ref = ATOMIC_INIT(1),
314};
315
316#endif
317
ebfa45f0
MKL
318static void rt6_info_init(struct rt6_info *rt)
319{
320 struct dst_entry *dst = &rt->dst;
321
322 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 INIT_LIST_HEAD(&rt->rt6i_siblings);
324 INIT_LIST_HEAD(&rt->rt6i_uncached);
325}
326
1da177e4 327/* allocate dst with ip6_dst_ops */
d52d3997
MKL
328static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 struct net_device *dev,
ad706862 330 int flags)
1da177e4 331{
97bab73f 332 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 333 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 334
ebfa45f0
MKL
335 if (rt)
336 rt6_info_init(rt);
8104891b 337
cf911662 338 return rt;
1da177e4
LT
339}
340
d52d3997
MKL
341static struct rt6_info *ip6_dst_alloc(struct net *net,
342 struct net_device *dev,
ad706862 343 int flags)
d52d3997 344{
ad706862 345 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
346
347 if (rt) {
348 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 if (rt->rt6i_pcpu) {
350 int cpu;
351
352 for_each_possible_cpu(cpu) {
353 struct rt6_info **p;
354
355 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 /* no one shares rt */
357 *p = NULL;
358 }
359 } else {
360 dst_destroy((struct dst_entry *)rt);
361 return NULL;
362 }
363 }
364
365 return rt;
366}
367
1da177e4
LT
368static void ip6_dst_destroy(struct dst_entry *dst)
369{
370 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 371 struct dst_entry *from = dst->from;
8d0b94af 372 struct inet6_dev *idev;
1da177e4 373
4b32b5ad 374 dst_destroy_metrics_generic(dst);
87775312 375 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
376 rt6_uncached_list_del(rt);
377
378 idev = rt->rt6i_idev;
38308473 379 if (idev) {
1da177e4
LT
380 rt->rt6i_idev = NULL;
381 in6_dev_put(idev);
1ab1457c 382 }
1716a961 383
ecd98837
YH
384 dst->from = NULL;
385 dst_release(from);
b3419363
DM
386}
387
1da177e4
LT
388static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389 int how)
390{
391 struct rt6_info *rt = (struct rt6_info *)dst;
392 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 393 struct net_device *loopback_dev =
c346dca1 394 dev_net(dev)->loopback_dev;
1da177e4 395
97cac082
DM
396 if (dev != loopback_dev) {
397 if (idev && idev->dev == dev) {
398 struct inet6_dev *loopback_idev =
399 in6_dev_get(loopback_dev);
400 if (loopback_idev) {
401 rt->rt6i_idev = loopback_idev;
402 in6_dev_put(idev);
403 }
404 }
1da177e4
LT
405 }
406}
407
5973fb1e
MKL
408static bool __rt6_check_expired(const struct rt6_info *rt)
409{
410 if (rt->rt6i_flags & RTF_EXPIRES)
411 return time_after(jiffies, rt->dst.expires);
412 else
413 return false;
414}
415
a50feda5 416static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 417{
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
1716a961 421 } else if (rt->dst.from) {
3fd91fb3 422 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 423 }
a50feda5 424 return false;
1da177e4
LT
425}
426
51ebd318
ND
427/* Multipath route selection:
428 * Hash based function using packet header and flowlabel.
429 * Adapted from fib_info_hashfn()
430 */
431static int rt6_info_hash_nhsfn(unsigned int candidate_count,
432 const struct flowi6 *fl6)
433{
644d0e65 434 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
435}
436
437static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
438 struct flowi6 *fl6, int oif,
439 int strict)
51ebd318
ND
440{
441 struct rt6_info *sibling, *next_sibling;
442 int route_choosen;
443
444 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
445 /* Don't change the route, if route_choosen == 0
446 * (siblings does not include ourself)
447 */
448 if (route_choosen)
449 list_for_each_entry_safe(sibling, next_sibling,
450 &match->rt6i_siblings, rt6i_siblings) {
451 route_choosen--;
452 if (route_choosen == 0) {
52bd4c0c
ND
453 if (rt6_score_route(sibling, oif, strict) < 0)
454 break;
51ebd318
ND
455 match = sibling;
456 break;
457 }
458 }
459 return match;
460}
461
1da177e4 462/*
c71099ac 463 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
464 */
465
8ed67789
DL
466static inline struct rt6_info *rt6_device_match(struct net *net,
467 struct rt6_info *rt,
b71d1d42 468 const struct in6_addr *saddr,
1da177e4 469 int oif,
d420895e 470 int flags)
1da177e4
LT
471{
472 struct rt6_info *local = NULL;
473 struct rt6_info *sprt;
474
dd3abc4e
YH
475 if (!oif && ipv6_addr_any(saddr))
476 goto out;
477
d8d1f30b 478 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 479 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
480
481 if (oif) {
1da177e4
LT
482 if (dev->ifindex == oif)
483 return sprt;
484 if (dev->flags & IFF_LOOPBACK) {
38308473 485 if (!sprt->rt6i_idev ||
1da177e4 486 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 487 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 488 continue;
17fb0b2b
DA
489 if (local &&
490 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
491 continue;
492 }
493 local = sprt;
494 }
dd3abc4e
YH
495 } else {
496 if (ipv6_chk_addr(net, saddr, dev,
497 flags & RT6_LOOKUP_F_IFACE))
498 return sprt;
1da177e4 499 }
dd3abc4e 500 }
1da177e4 501
dd3abc4e 502 if (oif) {
1da177e4
LT
503 if (local)
504 return local;
505
d420895e 506 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 507 return net->ipv6.ip6_null_entry;
1da177e4 508 }
dd3abc4e 509out:
1da177e4
LT
510 return rt;
511}
512
27097255 513#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
514struct __rt6_probe_work {
515 struct work_struct work;
516 struct in6_addr target;
517 struct net_device *dev;
518};
519
520static void rt6_probe_deferred(struct work_struct *w)
521{
522 struct in6_addr mcaddr;
523 struct __rt6_probe_work *work =
524 container_of(w, struct __rt6_probe_work, work);
525
526 addrconf_addr_solict_mult(&work->target, &mcaddr);
38cf595b 527 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
c2f17e82 528 dev_put(work->dev);
662f5533 529 kfree(work);
c2f17e82
HFS
530}
531
27097255
YH
532static void rt6_probe(struct rt6_info *rt)
533{
990edb42 534 struct __rt6_probe_work *work;
f2c31e32 535 struct neighbour *neigh;
27097255
YH
536 /*
537 * Okay, this does not seem to be appropriate
538 * for now, however, we need to check if it
539 * is really so; aka Router Reachability Probing.
540 *
541 * Router Reachability Probe MUST be rate-limited
542 * to no more than one per minute.
543 */
2152caea 544 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 545 return;
2152caea
YH
546 rcu_read_lock_bh();
547 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
548 if (neigh) {
8d6c31bf
MKL
549 if (neigh->nud_state & NUD_VALID)
550 goto out;
551
990edb42 552 work = NULL;
2152caea 553 write_lock(&neigh->lock);
990edb42
MKL
554 if (!(neigh->nud_state & NUD_VALID) &&
555 time_after(jiffies,
556 neigh->updated +
557 rt->rt6i_idev->cnf.rtr_probe_interval)) {
558 work = kmalloc(sizeof(*work), GFP_ATOMIC);
559 if (work)
560 __neigh_set_probe_once(neigh);
c2f17e82 561 }
2152caea 562 write_unlock(&neigh->lock);
990edb42
MKL
563 } else {
564 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 565 }
990edb42
MKL
566
567 if (work) {
568 INIT_WORK(&work->work, rt6_probe_deferred);
569 work->target = rt->rt6i_gateway;
570 dev_hold(rt->dst.dev);
571 work->dev = rt->dst.dev;
572 schedule_work(&work->work);
573 }
574
8d6c31bf 575out:
2152caea 576 rcu_read_unlock_bh();
27097255
YH
577}
578#else
579static inline void rt6_probe(struct rt6_info *rt)
580{
27097255
YH
581}
582#endif
583
1da177e4 584/*
554cfb7e 585 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 586 */
b6f99a21 587static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 588{
d1918542 589 struct net_device *dev = rt->dst.dev;
161980f4 590 if (!oif || dev->ifindex == oif)
554cfb7e 591 return 2;
161980f4
DM
592 if ((dev->flags & IFF_LOOPBACK) &&
593 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
594 return 1;
595 return 0;
554cfb7e 596}
1da177e4 597
afc154e9 598static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 599{
f2c31e32 600 struct neighbour *neigh;
afc154e9 601 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 602
4d0c5911
YH
603 if (rt->rt6i_flags & RTF_NONEXTHOP ||
604 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 605 return RT6_NUD_SUCCEED;
145a3621
YH
606
607 rcu_read_lock_bh();
608 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
609 if (neigh) {
610 read_lock(&neigh->lock);
554cfb7e 611 if (neigh->nud_state & NUD_VALID)
afc154e9 612 ret = RT6_NUD_SUCCEED;
398bcbeb 613#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 614 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 615 ret = RT6_NUD_SUCCEED;
7e980569
JB
616 else
617 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 618#endif
145a3621 619 read_unlock(&neigh->lock);
afc154e9
HFS
620 } else {
621 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 622 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 623 }
145a3621
YH
624 rcu_read_unlock_bh();
625
a5a81f0b 626 return ret;
1da177e4
LT
627}
628
554cfb7e
YH
629static int rt6_score_route(struct rt6_info *rt, int oif,
630 int strict)
1da177e4 631{
a5a81f0b 632 int m;
1ab1457c 633
4d0c5911 634 m = rt6_check_dev(rt, oif);
77d16f45 635 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 636 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
637#ifdef CONFIG_IPV6_ROUTER_PREF
638 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
639#endif
afc154e9
HFS
640 if (strict & RT6_LOOKUP_F_REACHABLE) {
641 int n = rt6_check_neigh(rt);
642 if (n < 0)
643 return n;
644 }
554cfb7e
YH
645 return m;
646}
647
f11e6659 648static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
649 int *mpri, struct rt6_info *match,
650 bool *do_rr)
554cfb7e 651{
f11e6659 652 int m;
afc154e9 653 bool match_do_rr = false;
35103d11
AG
654 struct inet6_dev *idev = rt->rt6i_idev;
655 struct net_device *dev = rt->dst.dev;
656
657 if (dev && !netif_carrier_ok(dev) &&
658 idev->cnf.ignore_routes_with_linkdown)
659 goto out;
f11e6659
DM
660
661 if (rt6_check_expired(rt))
662 goto out;
663
664 m = rt6_score_route(rt, oif, strict);
7e980569 665 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
666 match_do_rr = true;
667 m = 0; /* lowest valid score */
7e980569 668 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 669 goto out;
afc154e9
HFS
670 }
671
672 if (strict & RT6_LOOKUP_F_REACHABLE)
673 rt6_probe(rt);
f11e6659 674
7e980569 675 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 676 if (m > *mpri) {
afc154e9 677 *do_rr = match_do_rr;
f11e6659
DM
678 *mpri = m;
679 match = rt;
f11e6659 680 }
f11e6659
DM
681out:
682 return match;
683}
684
685static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
686 struct rt6_info *rr_head,
afc154e9
HFS
687 u32 metric, int oif, int strict,
688 bool *do_rr)
f11e6659 689{
9fbdcfaf 690 struct rt6_info *rt, *match, *cont;
554cfb7e 691 int mpri = -1;
1da177e4 692
f11e6659 693 match = NULL;
9fbdcfaf
SK
694 cont = NULL;
695 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
696 if (rt->rt6i_metric != metric) {
697 cont = rt;
698 break;
699 }
700
701 match = find_match(rt, oif, strict, &mpri, match, do_rr);
702 }
703
704 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
705 if (rt->rt6i_metric != metric) {
706 cont = rt;
707 break;
708 }
709
afc154e9 710 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
711 }
712
713 if (match || !cont)
714 return match;
715
716 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 717 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 718
f11e6659
DM
719 return match;
720}
1da177e4 721
f11e6659
DM
722static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
723{
724 struct rt6_info *match, *rt0;
8ed67789 725 struct net *net;
afc154e9 726 bool do_rr = false;
1da177e4 727
f11e6659
DM
728 rt0 = fn->rr_ptr;
729 if (!rt0)
730 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 731
afc154e9
HFS
732 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
733 &do_rr);
1da177e4 734
afc154e9 735 if (do_rr) {
d8d1f30b 736 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 737
554cfb7e 738 /* no entries matched; do round-robin */
f11e6659
DM
739 if (!next || next->rt6i_metric != rt0->rt6i_metric)
740 next = fn->leaf;
741
742 if (next != rt0)
743 fn->rr_ptr = next;
1da177e4 744 }
1da177e4 745
d1918542 746 net = dev_net(rt0->dst.dev);
a02cec21 747 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
748}
749
8b9df265
MKL
750static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
751{
752 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
753}
754
70ceb4f5
YH
755#ifdef CONFIG_IPV6_ROUTE_INFO
756int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 757 const struct in6_addr *gwaddr)
70ceb4f5 758{
c346dca1 759 struct net *net = dev_net(dev);
70ceb4f5
YH
760 struct route_info *rinfo = (struct route_info *) opt;
761 struct in6_addr prefix_buf, *prefix;
762 unsigned int pref;
4bed72e4 763 unsigned long lifetime;
70ceb4f5
YH
764 struct rt6_info *rt;
765
766 if (len < sizeof(struct route_info)) {
767 return -EINVAL;
768 }
769
770 /* Sanity check for prefix_len and length */
771 if (rinfo->length > 3) {
772 return -EINVAL;
773 } else if (rinfo->prefix_len > 128) {
774 return -EINVAL;
775 } else if (rinfo->prefix_len > 64) {
776 if (rinfo->length < 2) {
777 return -EINVAL;
778 }
779 } else if (rinfo->prefix_len > 0) {
780 if (rinfo->length < 1) {
781 return -EINVAL;
782 }
783 }
784
785 pref = rinfo->route_pref;
786 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 787 return -EINVAL;
70ceb4f5 788
4bed72e4 789 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
790
791 if (rinfo->length == 3)
792 prefix = (struct in6_addr *)rinfo->prefix;
793 else {
794 /* this function is safe */
795 ipv6_addr_prefix(&prefix_buf,
796 (struct in6_addr *)rinfo->prefix,
797 rinfo->prefix_len);
798 prefix = &prefix_buf;
799 }
800
f104a567
DJ
801 if (rinfo->prefix_len == 0)
802 rt = rt6_get_dflt_router(gwaddr, dev);
803 else
804 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
805 gwaddr, dev->ifindex);
70ceb4f5
YH
806
807 if (rt && !lifetime) {
e0a1ad73 808 ip6_del_rt(rt);
70ceb4f5
YH
809 rt = NULL;
810 }
811
812 if (!rt && lifetime)
efa2cea0 813 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
814 pref);
815 else if (rt)
816 rt->rt6i_flags = RTF_ROUTEINFO |
817 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
818
819 if (rt) {
1716a961
G
820 if (!addrconf_finite_timeout(lifetime))
821 rt6_clean_expires(rt);
822 else
823 rt6_set_expires(rt, jiffies + HZ * lifetime);
824
94e187c0 825 ip6_rt_put(rt);
70ceb4f5
YH
826 }
827 return 0;
828}
829#endif
830
a3c00e46
MKL
831static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
832 struct in6_addr *saddr)
833{
834 struct fib6_node *pn;
835 while (1) {
836 if (fn->fn_flags & RTN_TL_ROOT)
837 return NULL;
838 pn = fn->parent;
839 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
840 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
841 else
842 fn = pn;
843 if (fn->fn_flags & RTN_RTINFO)
844 return fn;
845 }
846}
c71099ac 847
8ed67789
DL
848static struct rt6_info *ip6_pol_route_lookup(struct net *net,
849 struct fib6_table *table,
4c9483b2 850 struct flowi6 *fl6, int flags)
1da177e4
LT
851{
852 struct fib6_node *fn;
853 struct rt6_info *rt;
854
c71099ac 855 read_lock_bh(&table->tb6_lock);
4c9483b2 856 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
857restart:
858 rt = fn->leaf;
4c9483b2 859 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 860 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 861 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
862 if (rt == net->ipv6.ip6_null_entry) {
863 fn = fib6_backtrack(fn, &fl6->saddr);
864 if (fn)
865 goto restart;
866 }
d8d1f30b 867 dst_use(&rt->dst, jiffies);
c71099ac 868 read_unlock_bh(&table->tb6_lock);
b811580d
DA
869
870 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
871
c71099ac
TG
872 return rt;
873
874}
875
67ba4152 876struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
877 int flags)
878{
879 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
880}
881EXPORT_SYMBOL_GPL(ip6_route_lookup);
882
9acd9f3a
YH
883struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
884 const struct in6_addr *saddr, int oif, int strict)
c71099ac 885{
4c9483b2
DM
886 struct flowi6 fl6 = {
887 .flowi6_oif = oif,
888 .daddr = *daddr,
c71099ac
TG
889 };
890 struct dst_entry *dst;
77d16f45 891 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 892
adaa70bb 893 if (saddr) {
4c9483b2 894 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
895 flags |= RT6_LOOKUP_F_HAS_SADDR;
896 }
897
4c9483b2 898 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
899 if (dst->error == 0)
900 return (struct rt6_info *) dst;
901
902 dst_release(dst);
903
1da177e4
LT
904 return NULL;
905}
7159039a
YH
906EXPORT_SYMBOL(rt6_lookup);
907
c71099ac 908/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
909 It takes new route entry, the addition fails by any reason the
910 route is freed. In any case, if caller does not hold it, it may
911 be destroyed.
912 */
913
e5fd387a 914static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 915 struct mx6_config *mxc)
1da177e4
LT
916{
917 int err;
c71099ac 918 struct fib6_table *table;
1da177e4 919
c71099ac
TG
920 table = rt->rt6i_table;
921 write_lock_bh(&table->tb6_lock);
e715b6d3 922 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 923 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
924
925 return err;
926}
927
40e22e8f
TG
928int ip6_ins_rt(struct rt6_info *rt)
929{
e715b6d3
FW
930 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
931 struct mx6_config mxc = { .mx = NULL, };
932
933 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
934}
935
8b9df265
MKL
936static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
937 const struct in6_addr *daddr,
938 const struct in6_addr *saddr)
1da177e4 939{
1da177e4
LT
940 struct rt6_info *rt;
941
942 /*
943 * Clone the route.
944 */
945
d52d3997 946 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 947 ort = (struct rt6_info *)ort->dst.from;
1da177e4 948
ad706862 949 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
950
951 if (!rt)
952 return NULL;
953
954 ip6_rt_copy_init(rt, ort);
955 rt->rt6i_flags |= RTF_CACHE;
956 rt->rt6i_metric = 0;
957 rt->dst.flags |= DST_HOST;
958 rt->rt6i_dst.addr = *daddr;
959 rt->rt6i_dst.plen = 128;
1da177e4 960
83a09abd
MKL
961 if (!rt6_is_gw_or_nonexthop(ort)) {
962 if (ort->rt6i_dst.plen != 128 &&
963 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
964 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 965#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
966 if (rt->rt6i_src.plen && saddr) {
967 rt->rt6i_src.addr = *saddr;
968 rt->rt6i_src.plen = 128;
8b9df265 969 }
83a09abd 970#endif
95a9a5ba 971 }
1da177e4 972
95a9a5ba
YH
973 return rt;
974}
1da177e4 975
d52d3997
MKL
976static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
977{
978 struct rt6_info *pcpu_rt;
979
980 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 981 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
982
983 if (!pcpu_rt)
984 return NULL;
985 ip6_rt_copy_init(pcpu_rt, rt);
986 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
987 pcpu_rt->rt6i_flags |= RTF_PCPU;
988 return pcpu_rt;
989}
990
991/* It should be called with read_lock_bh(&tb6_lock) acquired */
992static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
993{
a73e4195 994 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
995
996 p = this_cpu_ptr(rt->rt6i_pcpu);
997 pcpu_rt = *p;
998
a73e4195
MKL
999 if (pcpu_rt) {
1000 dst_hold(&pcpu_rt->dst);
1001 rt6_dst_from_metrics_check(pcpu_rt);
1002 }
1003 return pcpu_rt;
1004}
1005
1006static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1007{
9c7370a1 1008 struct fib6_table *table = rt->rt6i_table;
a73e4195 1009 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1010
1011 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012 if (!pcpu_rt) {
1013 struct net *net = dev_net(rt->dst.dev);
1014
9c7370a1
MKL
1015 dst_hold(&net->ipv6.ip6_null_entry->dst);
1016 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1017 }
1018
9c7370a1
MKL
1019 read_lock_bh(&table->tb6_lock);
1020 if (rt->rt6i_pcpu) {
1021 p = this_cpu_ptr(rt->rt6i_pcpu);
1022 prev = cmpxchg(p, NULL, pcpu_rt);
1023 if (prev) {
1024 /* If someone did it before us, return prev instead */
1025 dst_destroy(&pcpu_rt->dst);
1026 pcpu_rt = prev;
1027 }
1028 } else {
1029 /* rt has been removed from the fib6 tree
1030 * before we have a chance to acquire the read_lock.
1031 * In this case, don't brother to create a pcpu rt
1032 * since rt is going away anyway. The next
1033 * dst_check() will trigger a re-lookup.
1034 */
d52d3997 1035 dst_destroy(&pcpu_rt->dst);
9c7370a1 1036 pcpu_rt = rt;
d52d3997 1037 }
d52d3997
MKL
1038 dst_hold(&pcpu_rt->dst);
1039 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1040 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1041 return pcpu_rt;
1042}
1043
8ed67789 1044static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1045 struct flowi6 *fl6, int flags)
1da177e4 1046{
367efcb9 1047 struct fib6_node *fn, *saved_fn;
45e4fd26 1048 struct rt6_info *rt;
c71099ac 1049 int strict = 0;
1da177e4 1050
77d16f45 1051 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1052 if (net->ipv6.devconf_all->forwarding == 0)
1053 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1054
c71099ac 1055 read_lock_bh(&table->tb6_lock);
1da177e4 1056
4c9483b2 1057 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1058 saved_fn = fn;
1da177e4 1059
ca254490
DA
1060 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1061 oif = 0;
1062
a3c00e46 1063redo_rt6_select:
367efcb9 1064 rt = rt6_select(fn, oif, strict);
52bd4c0c 1065 if (rt->rt6i_nsiblings)
367efcb9 1066 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1067 if (rt == net->ipv6.ip6_null_entry) {
1068 fn = fib6_backtrack(fn, &fl6->saddr);
1069 if (fn)
1070 goto redo_rt6_select;
367efcb9
MKL
1071 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1072 /* also consider unreachable route */
1073 strict &= ~RT6_LOOKUP_F_REACHABLE;
1074 fn = saved_fn;
1075 goto redo_rt6_select;
367efcb9 1076 }
a3c00e46
MKL
1077 }
1078
fb9de91e 1079
3da59bd9 1080 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1081 dst_use(&rt->dst, jiffies);
1082 read_unlock_bh(&table->tb6_lock);
1083
1084 rt6_dst_from_metrics_check(rt);
b811580d
DA
1085
1086 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1087 return rt;
3da59bd9
MKL
1088 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1089 !(rt->rt6i_flags & RTF_GATEWAY))) {
1090 /* Create a RTF_CACHE clone which will not be
1091 * owned by the fib6 tree. It is for the special case where
1092 * the daddr in the skb during the neighbor look-up is different
1093 * from the fl6->daddr used to look-up route here.
1094 */
1095
1096 struct rt6_info *uncached_rt;
1097
d52d3997
MKL
1098 dst_use(&rt->dst, jiffies);
1099 read_unlock_bh(&table->tb6_lock);
1100
3da59bd9
MKL
1101 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1102 dst_release(&rt->dst);
c71099ac 1103
3da59bd9 1104 if (uncached_rt)
8d0b94af 1105 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1106 else
1107 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1108
3da59bd9 1109 dst_hold(&uncached_rt->dst);
b811580d
DA
1110
1111 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1112 return uncached_rt;
3da59bd9 1113
d52d3997
MKL
1114 } else {
1115 /* Get a percpu copy */
1116
1117 struct rt6_info *pcpu_rt;
1118
1119 rt->dst.lastuse = jiffies;
1120 rt->dst.__use++;
1121 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1122
9c7370a1
MKL
1123 if (pcpu_rt) {
1124 read_unlock_bh(&table->tb6_lock);
1125 } else {
1126 /* We have to do the read_unlock first
1127 * because rt6_make_pcpu_route() may trigger
1128 * ip6_dst_gc() which will take the write_lock.
1129 */
1130 dst_hold(&rt->dst);
1131 read_unlock_bh(&table->tb6_lock);
a73e4195 1132 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1133 dst_release(&rt->dst);
1134 }
d52d3997 1135
b811580d 1136 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1137 return pcpu_rt;
9c7370a1 1138
d52d3997 1139 }
1da177e4
LT
1140}
1141
8ed67789 1142static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1143 struct flowi6 *fl6, int flags)
4acad72d 1144{
4c9483b2 1145 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1146}
1147
72331bc0
SL
1148static struct dst_entry *ip6_route_input_lookup(struct net *net,
1149 struct net_device *dev,
1150 struct flowi6 *fl6, int flags)
1151{
1152 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1153 flags |= RT6_LOOKUP_F_IFACE;
1154
1155 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1156}
1157
c71099ac
TG
1158void ip6_route_input(struct sk_buff *skb)
1159{
b71d1d42 1160 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1161 struct net *net = dev_net(skb->dev);
adaa70bb 1162 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1163 struct ip_tunnel_info *tun_info;
4c9483b2 1164 struct flowi6 fl6 = {
ca254490 1165 .flowi6_iif = l3mdev_fib_oif(skb->dev),
4c9483b2
DM
1166 .daddr = iph->daddr,
1167 .saddr = iph->saddr,
6502ca52 1168 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1169 .flowi6_mark = skb->mark,
1170 .flowi6_proto = iph->nexthdr,
c71099ac 1171 };
adaa70bb 1172
904af04d 1173 tun_info = skb_tunnel_info(skb);
46fa062a 1174 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1175 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1176 skb_dst_drop(skb);
72331bc0 1177 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1178}
1179
8ed67789 1180static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1181 struct flowi6 *fl6, int flags)
1da177e4 1182{
4c9483b2 1183 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1184}
1185
67ba4152 1186struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1187 struct flowi6 *fl6)
c71099ac 1188{
ca254490 1189 struct dst_entry *dst;
c71099ac 1190 int flags = 0;
d46a9d67 1191 bool any_src;
c71099ac 1192
ca254490
DA
1193 dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194 if (dst)
1195 return dst;
1196
1fb9489b 1197 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1198
d46a9d67 1199 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1200 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1201 (fl6->flowi6_oif && any_src))
77d16f45 1202 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1203
d46a9d67 1204 if (!any_src)
adaa70bb 1205 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1206 else if (sk)
1207 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1208
4c9483b2 1209 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1210}
7159039a 1211EXPORT_SYMBOL(ip6_route_output);
1da177e4 1212
2774c131 1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1214{
5c1e6aa3 1215 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1216 struct dst_entry *new = NULL;
1217
f5b0a874 1218 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1219 if (rt) {
0a1f5962 1220 rt6_info_init(rt);
8104891b 1221
0a1f5962 1222 new = &rt->dst;
14e50e57 1223 new->__use = 1;
352e512c 1224 new->input = dst_discard;
ede2059d 1225 new->output = dst_discard_out;
14e50e57 1226
0a1f5962 1227 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1228 rt->rt6i_idev = ort->rt6i_idev;
1229 if (rt->rt6i_idev)
1230 in6_dev_hold(rt->rt6i_idev);
14e50e57 1231
4e3fd7a0 1232 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1233 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1234 rt->rt6i_metric = 0;
1235
1236 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241 dst_free(new);
1242 }
1243
69ead7af
DM
1244 dst_release(dst_orig);
1245 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1246}
14e50e57 1247
1da177e4
LT
1248/*
1249 * Destination cache support functions
1250 */
1251
4b32b5ad
MKL
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254 if (rt->dst.from &&
1255 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
3da59bd9
MKL
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262 return NULL;
1263
1264 if (rt6_check_expired(rt))
1265 return NULL;
1266
1267 return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
5973fb1e
MKL
1272 if (!__rt6_check_expired(rt) &&
1273 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1274 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275 return &rt->dst;
1276 else
1277 return NULL;
1278}
1279
1da177e4
LT
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282 struct rt6_info *rt;
1283
1284 rt = (struct rt6_info *) dst;
1285
6f3118b5
ND
1286 /* All IPV6 dsts are created with ->obsolete set to the value
1287 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 * into this function always.
1289 */
e3bc10bd 1290
4b32b5ad
MKL
1291 rt6_dst_from_metrics_check(rt);
1292
02bcf4e0
MKL
1293 if (rt->rt6i_flags & RTF_PCPU ||
1294 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1295 return rt6_dst_from_check(rt, cookie);
1296 else
1297 return rt6_check(rt, cookie);
1da177e4
LT
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302 struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304 if (rt) {
54c1a859
YH
1305 if (rt->rt6i_flags & RTF_CACHE) {
1306 if (rt6_check_expired(rt)) {
1307 ip6_del_rt(rt);
1308 dst = NULL;
1309 }
1310 } else {
1da177e4 1311 dst_release(dst);
54c1a859
YH
1312 dst = NULL;
1313 }
1da177e4 1314 }
54c1a859 1315 return dst;
1da177e4
LT
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320 struct rt6_info *rt;
1321
3ffe533c 1322 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1323
adf30907 1324 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1325 if (rt) {
1eb4f758
HFS
1326 if (rt->rt6i_flags & RTF_CACHE) {
1327 dst_hold(&rt->dst);
8e3d5be7 1328 ip6_del_rt(rt);
1eb4f758 1329 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1330 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1331 }
1da177e4
LT
1332 }
1333}
1334
45e4fd26
MKL
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337 struct net *net = dev_net(rt->dst.dev);
1338
1339 rt->rt6i_flags |= RTF_MODIFIED;
1340 rt->rt6i_pmtu = mtu;
1341 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
0d3f6d29
MKL
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346 return !(rt->rt6i_flags & RTF_CACHE) &&
1347 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
45e4fd26
MKL
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1352{
67ba4152 1353 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1354
45e4fd26
MKL
1355 if (rt6->rt6i_flags & RTF_LOCAL)
1356 return;
81aded24 1357
45e4fd26
MKL
1358 dst_confirm(dst);
1359 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360 if (mtu >= dst_mtu(dst))
1361 return;
9d289715 1362
0d3f6d29 1363 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1364 rt6_do_update_pmtu(rt6, mtu);
1365 } else {
1366 const struct in6_addr *daddr, *saddr;
1367 struct rt6_info *nrt6;
1368
1369 if (iph) {
1370 daddr = &iph->daddr;
1371 saddr = &iph->saddr;
1372 } else if (sk) {
1373 daddr = &sk->sk_v6_daddr;
1374 saddr = &inet6_sk(sk)->saddr;
1375 } else {
1376 return;
1377 }
1378 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379 if (nrt6) {
1380 rt6_do_update_pmtu(nrt6, mtu);
1381
1382 /* ip6_ins_rt(nrt6) will bump the
1383 * rt6->rt6i_node->fn_sernum
1384 * which will fail the next rt6_check() and
1385 * invalidate the sk->sk_dst_cache.
1386 */
1387 ip6_ins_rt(nrt6);
1388 }
1da177e4
LT
1389 }
1390}
1391
45e4fd26
MKL
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393 struct sk_buff *skb, u32 mtu)
1394{
1395 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
42ae66c8
DM
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399 int oif, u32 mark)
81aded24
DM
1400{
1401 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402 struct dst_entry *dst;
1403 struct flowi6 fl6;
1404
1405 memset(&fl6, 0, sizeof(fl6));
1406 fl6.flowi6_oif = oif;
1b3c61dc 1407 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1408 fl6.daddr = iph->daddr;
1409 fl6.saddr = iph->saddr;
6502ca52 1410 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1411
1412 dst = ip6_route_output(net, NULL, &fl6);
1413 if (!dst->error)
45e4fd26 1414 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1415 dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
1421 ip6_update_pmtu(skb, sock_net(sk), mtu,
1422 sk->sk_bound_dev_if, sk->sk_mark);
1423}
1424EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1425
b55b76b2
DJ
1426/* Handle redirects */
1427struct ip6rd_flowi {
1428 struct flowi6 fl6;
1429 struct in6_addr gateway;
1430};
1431
1432static struct rt6_info *__ip6_route_redirect(struct net *net,
1433 struct fib6_table *table,
1434 struct flowi6 *fl6,
1435 int flags)
1436{
1437 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1438 struct rt6_info *rt;
1439 struct fib6_node *fn;
1440
1441 /* Get the "current" route for this destination and
1442 * check if the redirect has come from approriate router.
1443 *
1444 * RFC 4861 specifies that redirects should only be
1445 * accepted if they come from the nexthop to the target.
1446 * Due to the way the routes are chosen, this notion
1447 * is a bit fuzzy and one might need to check all possible
1448 * routes.
1449 */
1450
1451 read_lock_bh(&table->tb6_lock);
1452 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1453restart:
1454 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1455 if (rt6_check_expired(rt))
1456 continue;
1457 if (rt->dst.error)
1458 break;
1459 if (!(rt->rt6i_flags & RTF_GATEWAY))
1460 continue;
1461 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1462 continue;
1463 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1464 continue;
1465 break;
1466 }
1467
1468 if (!rt)
1469 rt = net->ipv6.ip6_null_entry;
1470 else if (rt->dst.error) {
1471 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1472 goto out;
1473 }
1474
1475 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1476 fn = fib6_backtrack(fn, &fl6->saddr);
1477 if (fn)
1478 goto restart;
b55b76b2 1479 }
a3c00e46 1480
b0a1ba59 1481out:
b55b76b2
DJ
1482 dst_hold(&rt->dst);
1483
1484 read_unlock_bh(&table->tb6_lock);
1485
b811580d 1486 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1487 return rt;
1488};
1489
1490static struct dst_entry *ip6_route_redirect(struct net *net,
1491 const struct flowi6 *fl6,
1492 const struct in6_addr *gateway)
1493{
1494 int flags = RT6_LOOKUP_F_HAS_SADDR;
1495 struct ip6rd_flowi rdfl;
1496
1497 rdfl.fl6 = *fl6;
1498 rdfl.gateway = *gateway;
1499
1500 return fib6_rule_lookup(net, &rdfl.fl6,
1501 flags, __ip6_route_redirect);
1502}
1503
3a5ad2ee
DM
1504void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1505{
1506 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1507 struct dst_entry *dst;
1508 struct flowi6 fl6;
1509
1510 memset(&fl6, 0, sizeof(fl6));
e374c618 1511 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1512 fl6.flowi6_oif = oif;
1513 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1514 fl6.daddr = iph->daddr;
1515 fl6.saddr = iph->saddr;
6502ca52 1516 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1517
b55b76b2
DJ
1518 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1519 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1520 dst_release(dst);
1521}
1522EXPORT_SYMBOL_GPL(ip6_redirect);
1523
c92a59ec
DJ
1524void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1525 u32 mark)
1526{
1527 const struct ipv6hdr *iph = ipv6_hdr(skb);
1528 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1529 struct dst_entry *dst;
1530 struct flowi6 fl6;
1531
1532 memset(&fl6, 0, sizeof(fl6));
e374c618 1533 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1534 fl6.flowi6_oif = oif;
1535 fl6.flowi6_mark = mark;
c92a59ec
DJ
1536 fl6.daddr = msg->dest;
1537 fl6.saddr = iph->daddr;
1538
b55b76b2
DJ
1539 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1540 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1541 dst_release(dst);
1542}
1543
3a5ad2ee
DM
1544void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1545{
1546 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1547}
1548EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1549
0dbaee3b 1550static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1551{
0dbaee3b
DM
1552 struct net_device *dev = dst->dev;
1553 unsigned int mtu = dst_mtu(dst);
1554 struct net *net = dev_net(dev);
1555
1da177e4
LT
1556 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1557
5578689a
DL
1558 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1559 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1560
1561 /*
1ab1457c
YH
1562 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1563 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1564 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1565 * rely only on pmtu discovery"
1566 */
1567 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1568 mtu = IPV6_MAXPLEN;
1569 return mtu;
1570}
1571
ebb762f2 1572static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1573{
4b32b5ad
MKL
1574 const struct rt6_info *rt = (const struct rt6_info *)dst;
1575 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1576 struct inet6_dev *idev;
618f9bc7 1577
4b32b5ad
MKL
1578 if (mtu)
1579 goto out;
1580
1581 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1582 if (mtu)
30f78d8e 1583 goto out;
618f9bc7
SK
1584
1585 mtu = IPV6_MIN_MTU;
d33e4553
DM
1586
1587 rcu_read_lock();
1588 idev = __in6_dev_get(dst->dev);
1589 if (idev)
1590 mtu = idev->cnf.mtu6;
1591 rcu_read_unlock();
1592
30f78d8e
ED
1593out:
1594 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1595}
1596
3b00944c
YH
1597static struct dst_entry *icmp6_dst_gc_list;
1598static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1599
3b00944c 1600struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1601 struct flowi6 *fl6)
1da177e4 1602{
87a11578 1603 struct dst_entry *dst;
1da177e4
LT
1604 struct rt6_info *rt;
1605 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1606 struct net *net = dev_net(dev);
1da177e4 1607
38308473 1608 if (unlikely(!idev))
122bdf67 1609 return ERR_PTR(-ENODEV);
1da177e4 1610
ad706862 1611 rt = ip6_dst_alloc(net, dev, 0);
38308473 1612 if (unlikely(!rt)) {
1da177e4 1613 in6_dev_put(idev);
87a11578 1614 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1615 goto out;
1616 }
1617
8e2ec639
YZ
1618 rt->dst.flags |= DST_HOST;
1619 rt->dst.output = ip6_output;
d8d1f30b 1620 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1621 rt->rt6i_gateway = fl6->daddr;
87a11578 1622 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1623 rt->rt6i_dst.plen = 128;
1624 rt->rt6i_idev = idev;
14edd87d 1625 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1626
3b00944c 1627 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1628 rt->dst.next = icmp6_dst_gc_list;
1629 icmp6_dst_gc_list = &rt->dst;
3b00944c 1630 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1631
5578689a 1632 fib6_force_start_gc(net);
1da177e4 1633
87a11578
DM
1634 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1635
1da177e4 1636out:
87a11578 1637 return dst;
1da177e4
LT
1638}
1639
3d0f24a7 1640int icmp6_dst_gc(void)
1da177e4 1641{
e9476e95 1642 struct dst_entry *dst, **pprev;
3d0f24a7 1643 int more = 0;
1da177e4 1644
3b00944c
YH
1645 spin_lock_bh(&icmp6_dst_lock);
1646 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1647
1da177e4
LT
1648 while ((dst = *pprev) != NULL) {
1649 if (!atomic_read(&dst->__refcnt)) {
1650 *pprev = dst->next;
1651 dst_free(dst);
1da177e4
LT
1652 } else {
1653 pprev = &dst->next;
3d0f24a7 1654 ++more;
1da177e4
LT
1655 }
1656 }
1657
3b00944c 1658 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1659
3d0f24a7 1660 return more;
1da177e4
LT
1661}
1662
1e493d19
DM
1663static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1664 void *arg)
1665{
1666 struct dst_entry *dst, **pprev;
1667
1668 spin_lock_bh(&icmp6_dst_lock);
1669 pprev = &icmp6_dst_gc_list;
1670 while ((dst = *pprev) != NULL) {
1671 struct rt6_info *rt = (struct rt6_info *) dst;
1672 if (func(rt, arg)) {
1673 *pprev = dst->next;
1674 dst_free(dst);
1675 } else {
1676 pprev = &dst->next;
1677 }
1678 }
1679 spin_unlock_bh(&icmp6_dst_lock);
1680}
1681
569d3645 1682static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1683{
86393e52 1684 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1685 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1686 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1687 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1688 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1689 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1690 int entries;
7019b78e 1691
fc66f95c 1692 entries = dst_entries_get_fast(ops);
49a18d86 1693 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1694 entries <= rt_max_size)
1da177e4
LT
1695 goto out;
1696
6891a346 1697 net->ipv6.ip6_rt_gc_expire++;
14956643 1698 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1699 entries = dst_entries_get_slow(ops);
1700 if (entries < ops->gc_thresh)
7019b78e 1701 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1702out:
7019b78e 1703 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1704 return entries > rt_max_size;
1da177e4
LT
1705}
1706
e715b6d3
FW
1707static int ip6_convert_metrics(struct mx6_config *mxc,
1708 const struct fib6_config *cfg)
1709{
c3a8d947 1710 bool ecn_ca = false;
e715b6d3
FW
1711 struct nlattr *nla;
1712 int remaining;
1713 u32 *mp;
1714
63159f29 1715 if (!cfg->fc_mx)
e715b6d3
FW
1716 return 0;
1717
1718 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1719 if (unlikely(!mp))
1720 return -ENOMEM;
1721
1722 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1723 int type = nla_type(nla);
1bb14807 1724 u32 val;
e715b6d3 1725
1bb14807
DB
1726 if (!type)
1727 continue;
1728 if (unlikely(type > RTAX_MAX))
1729 goto err;
ea697639 1730
1bb14807
DB
1731 if (type == RTAX_CC_ALGO) {
1732 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1733
1bb14807 1734 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1735 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1736 if (val == TCP_CA_UNSPEC)
1737 goto err;
1738 } else {
1739 val = nla_get_u32(nla);
e715b6d3 1740 }
b8d3e416
DB
1741 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1742 goto err;
1bb14807
DB
1743
1744 mp[type - 1] = val;
1745 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1746 }
1747
c3a8d947
DB
1748 if (ecn_ca) {
1749 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1750 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1751 }
e715b6d3 1752
c3a8d947 1753 mxc->mx = mp;
e715b6d3
FW
1754 return 0;
1755 err:
1756 kfree(mp);
1757 return -EINVAL;
1758}
1da177e4 1759
8c5b83f0 1760static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1761{
5578689a 1762 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1763 struct rt6_info *rt = NULL;
1764 struct net_device *dev = NULL;
1765 struct inet6_dev *idev = NULL;
c71099ac 1766 struct fib6_table *table;
1da177e4 1767 int addr_type;
8c5b83f0 1768 int err = -EINVAL;
1da177e4 1769
86872cb5 1770 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1771 goto out;
1da177e4 1772#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1773 if (cfg->fc_src_len)
8c5b83f0 1774 goto out;
1da177e4 1775#endif
86872cb5 1776 if (cfg->fc_ifindex) {
1da177e4 1777 err = -ENODEV;
5578689a 1778 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1779 if (!dev)
1780 goto out;
1781 idev = in6_dev_get(dev);
1782 if (!idev)
1783 goto out;
1784 }
1785
86872cb5
TG
1786 if (cfg->fc_metric == 0)
1787 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1788
d71314b4 1789 err = -ENOBUFS;
38308473
DM
1790 if (cfg->fc_nlinfo.nlh &&
1791 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1792 table = fib6_get_table(net, cfg->fc_table);
38308473 1793 if (!table) {
f3213831 1794 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1795 table = fib6_new_table(net, cfg->fc_table);
1796 }
1797 } else {
1798 table = fib6_new_table(net, cfg->fc_table);
1799 }
38308473
DM
1800
1801 if (!table)
c71099ac 1802 goto out;
c71099ac 1803
ad706862
MKL
1804 rt = ip6_dst_alloc(net, NULL,
1805 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1806
38308473 1807 if (!rt) {
1da177e4
LT
1808 err = -ENOMEM;
1809 goto out;
1810 }
1811
1716a961
G
1812 if (cfg->fc_flags & RTF_EXPIRES)
1813 rt6_set_expires(rt, jiffies +
1814 clock_t_to_jiffies(cfg->fc_expires));
1815 else
1816 rt6_clean_expires(rt);
1da177e4 1817
86872cb5
TG
1818 if (cfg->fc_protocol == RTPROT_UNSPEC)
1819 cfg->fc_protocol = RTPROT_BOOT;
1820 rt->rt6i_protocol = cfg->fc_protocol;
1821
1822 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1823
1824 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1825 rt->dst.input = ip6_mc_input;
ab79ad14
1826 else if (cfg->fc_flags & RTF_LOCAL)
1827 rt->dst.input = ip6_input;
1da177e4 1828 else
d8d1f30b 1829 rt->dst.input = ip6_forward;
1da177e4 1830
d8d1f30b 1831 rt->dst.output = ip6_output;
1da177e4 1832
19e42e45
RP
1833 if (cfg->fc_encap) {
1834 struct lwtunnel_state *lwtstate;
1835
1836 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1837 cfg->fc_encap, AF_INET6, cfg,
1838 &lwtstate);
19e42e45
RP
1839 if (err)
1840 goto out;
61adedf3
JB
1841 rt->dst.lwtstate = lwtstate_get(lwtstate);
1842 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1843 rt->dst.lwtstate->orig_output = rt->dst.output;
1844 rt->dst.output = lwtunnel_output;
25368623 1845 }
61adedf3
JB
1846 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1847 rt->dst.lwtstate->orig_input = rt->dst.input;
1848 rt->dst.input = lwtunnel_input;
25368623 1849 }
19e42e45
RP
1850 }
1851
86872cb5
TG
1852 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1853 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1854 if (rt->rt6i_dst.plen == 128)
e5fd387a 1855 rt->dst.flags |= DST_HOST;
e5fd387a 1856
1da177e4 1857#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1858 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1859 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1860#endif
1861
86872cb5 1862 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1863
1864 /* We cannot add true routes via loopback here,
1865 they would result in kernel looping; promote them to reject routes
1866 */
86872cb5 1867 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1868 (dev && (dev->flags & IFF_LOOPBACK) &&
1869 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1870 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1871 /* hold loopback dev/idev if we haven't done so. */
5578689a 1872 if (dev != net->loopback_dev) {
1da177e4
LT
1873 if (dev) {
1874 dev_put(dev);
1875 in6_dev_put(idev);
1876 }
5578689a 1877 dev = net->loopback_dev;
1da177e4
LT
1878 dev_hold(dev);
1879 idev = in6_dev_get(dev);
1880 if (!idev) {
1881 err = -ENODEV;
1882 goto out;
1883 }
1884 }
1da177e4 1885 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1886 switch (cfg->fc_type) {
1887 case RTN_BLACKHOLE:
1888 rt->dst.error = -EINVAL;
ede2059d 1889 rt->dst.output = dst_discard_out;
7150aede 1890 rt->dst.input = dst_discard;
ef2c7d7b
ND
1891 break;
1892 case RTN_PROHIBIT:
1893 rt->dst.error = -EACCES;
7150aede
K
1894 rt->dst.output = ip6_pkt_prohibit_out;
1895 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1896 break;
b4949ab2 1897 case RTN_THROW:
0315e382 1898 case RTN_UNREACHABLE:
ef2c7d7b 1899 default:
7150aede 1900 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1901 : (cfg->fc_type == RTN_UNREACHABLE)
1902 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1903 rt->dst.output = ip6_pkt_discard_out;
1904 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1905 break;
1906 }
1da177e4
LT
1907 goto install_route;
1908 }
1909
86872cb5 1910 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1911 const struct in6_addr *gw_addr;
1da177e4
LT
1912 int gwa_type;
1913
86872cb5 1914 gw_addr = &cfg->fc_gateway;
330567b7 1915 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1916
1917 /* if gw_addr is local we will fail to detect this in case
1918 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1919 * will return already-added prefix route via interface that
1920 * prefix route was assigned to, which might be non-loopback.
1921 */
1922 err = -EINVAL;
330567b7
FW
1923 if (ipv6_chk_addr_and_flags(net, gw_addr,
1924 gwa_type & IPV6_ADDR_LINKLOCAL ?
1925 dev : NULL, 0, 0))
48ed7b26
FW
1926 goto out;
1927
4e3fd7a0 1928 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1929
1930 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1931 struct rt6_info *grt;
1932
1933 /* IPv6 strictly inhibits using not link-local
1934 addresses as nexthop address.
1935 Otherwise, router will not able to send redirects.
1936 It is very good, but in some (rare!) circumstances
1937 (SIT, PtP, NBMA NOARP links) it is handy to allow
1938 some exceptions. --ANK
1939 */
38308473 1940 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1941 goto out;
1942
5578689a 1943 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1944
1945 err = -EHOSTUNREACH;
38308473 1946 if (!grt)
1da177e4
LT
1947 goto out;
1948 if (dev) {
d1918542 1949 if (dev != grt->dst.dev) {
94e187c0 1950 ip6_rt_put(grt);
1da177e4
LT
1951 goto out;
1952 }
1953 } else {
d1918542 1954 dev = grt->dst.dev;
1da177e4
LT
1955 idev = grt->rt6i_idev;
1956 dev_hold(dev);
1957 in6_dev_hold(grt->rt6i_idev);
1958 }
38308473 1959 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1960 err = 0;
94e187c0 1961 ip6_rt_put(grt);
1da177e4
LT
1962
1963 if (err)
1964 goto out;
1965 }
1966 err = -EINVAL;
38308473 1967 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1968 goto out;
1969 }
1970
1971 err = -ENODEV;
38308473 1972 if (!dev)
1da177e4
LT
1973 goto out;
1974
c3968a85
DW
1975 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1976 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1977 err = -EINVAL;
1978 goto out;
1979 }
4e3fd7a0 1980 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1981 rt->rt6i_prefsrc.plen = 128;
1982 } else
1983 rt->rt6i_prefsrc.plen = 0;
1984
86872cb5 1985 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1986
1987install_route:
d8d1f30b 1988 rt->dst.dev = dev;
1da177e4 1989 rt->rt6i_idev = idev;
c71099ac 1990 rt->rt6i_table = table;
63152fc0 1991
c346dca1 1992 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1993
8c5b83f0 1994 return rt;
6b9ea5a6
RP
1995out:
1996 if (dev)
1997 dev_put(dev);
1998 if (idev)
1999 in6_dev_put(idev);
2000 if (rt)
2001 dst_free(&rt->dst);
2002
8c5b83f0 2003 return ERR_PTR(err);
6b9ea5a6
RP
2004}
2005
2006int ip6_route_add(struct fib6_config *cfg)
2007{
2008 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2009 struct rt6_info *rt;
6b9ea5a6
RP
2010 int err;
2011
8c5b83f0
RP
2012 rt = ip6_route_info_create(cfg);
2013 if (IS_ERR(rt)) {
2014 err = PTR_ERR(rt);
2015 rt = NULL;
6b9ea5a6 2016 goto out;
8c5b83f0 2017 }
6b9ea5a6 2018
e715b6d3
FW
2019 err = ip6_convert_metrics(&mxc, cfg);
2020 if (err)
2021 goto out;
1da177e4 2022
e715b6d3
FW
2023 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2024
2025 kfree(mxc.mx);
6b9ea5a6 2026
e715b6d3 2027 return err;
1da177e4 2028out:
1da177e4 2029 if (rt)
d8d1f30b 2030 dst_free(&rt->dst);
6b9ea5a6 2031
1da177e4
LT
2032 return err;
2033}
2034
86872cb5 2035static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2036{
2037 int err;
c71099ac 2038 struct fib6_table *table;
d1918542 2039 struct net *net = dev_net(rt->dst.dev);
1da177e4 2040
8e3d5be7
MKL
2041 if (rt == net->ipv6.ip6_null_entry ||
2042 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2043 err = -ENOENT;
2044 goto out;
2045 }
6c813a72 2046
c71099ac
TG
2047 table = rt->rt6i_table;
2048 write_lock_bh(&table->tb6_lock);
86872cb5 2049 err = fib6_del(rt, info);
c71099ac 2050 write_unlock_bh(&table->tb6_lock);
1da177e4 2051
6825a26c 2052out:
94e187c0 2053 ip6_rt_put(rt);
1da177e4
LT
2054 return err;
2055}
2056
e0a1ad73
TG
2057int ip6_del_rt(struct rt6_info *rt)
2058{
4d1169c1 2059 struct nl_info info = {
d1918542 2060 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2061 };
528c4ceb 2062 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2063}
2064
86872cb5 2065static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2066{
c71099ac 2067 struct fib6_table *table;
1da177e4
LT
2068 struct fib6_node *fn;
2069 struct rt6_info *rt;
2070 int err = -ESRCH;
2071
5578689a 2072 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2073 if (!table)
c71099ac
TG
2074 return err;
2075
2076 read_lock_bh(&table->tb6_lock);
1da177e4 2077
c71099ac 2078 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2079 &cfg->fc_dst, cfg->fc_dst_len,
2080 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2081
1da177e4 2082 if (fn) {
d8d1f30b 2083 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2084 if ((rt->rt6i_flags & RTF_CACHE) &&
2085 !(cfg->fc_flags & RTF_CACHE))
2086 continue;
86872cb5 2087 if (cfg->fc_ifindex &&
d1918542
DM
2088 (!rt->dst.dev ||
2089 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2090 continue;
86872cb5
TG
2091 if (cfg->fc_flags & RTF_GATEWAY &&
2092 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2093 continue;
86872cb5 2094 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2095 continue;
d8d1f30b 2096 dst_hold(&rt->dst);
c71099ac 2097 read_unlock_bh(&table->tb6_lock);
1da177e4 2098
86872cb5 2099 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2100 }
2101 }
c71099ac 2102 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2103
2104 return err;
2105}
2106
6700c270 2107static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2108{
a6279458 2109 struct netevent_redirect netevent;
e8599ff4 2110 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2111 struct ndisc_options ndopts;
2112 struct inet6_dev *in6_dev;
2113 struct neighbour *neigh;
71bcdba0 2114 struct rd_msg *msg;
6e157b6a
DM
2115 int optlen, on_link;
2116 u8 *lladdr;
e8599ff4 2117
29a3cad5 2118 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2119 optlen -= sizeof(*msg);
e8599ff4
DM
2120
2121 if (optlen < 0) {
6e157b6a 2122 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2123 return;
2124 }
2125
71bcdba0 2126 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2127
71bcdba0 2128 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2129 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2130 return;
2131 }
2132
6e157b6a 2133 on_link = 0;
71bcdba0 2134 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2135 on_link = 1;
71bcdba0 2136 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2137 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2138 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2139 return;
2140 }
2141
2142 in6_dev = __in6_dev_get(skb->dev);
2143 if (!in6_dev)
2144 return;
2145 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2146 return;
2147
2148 /* RFC2461 8.1:
2149 * The IP source address of the Redirect MUST be the same as the current
2150 * first-hop router for the specified ICMP Destination Address.
2151 */
2152
71bcdba0 2153 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2154 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2155 return;
2156 }
6e157b6a
DM
2157
2158 lladdr = NULL;
e8599ff4
DM
2159 if (ndopts.nd_opts_tgt_lladdr) {
2160 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2161 skb->dev);
2162 if (!lladdr) {
2163 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2164 return;
2165 }
2166 }
2167
6e157b6a 2168 rt = (struct rt6_info *) dst;
ec13ad1d 2169 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2170 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2171 return;
6e157b6a 2172 }
e8599ff4 2173
6e157b6a
DM
2174 /* Redirect received -> path was valid.
2175 * Look, redirects are sent only in response to data packets,
2176 * so that this nexthop apparently is reachable. --ANK
2177 */
2178 dst_confirm(&rt->dst);
a6279458 2179
71bcdba0 2180 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2181 if (!neigh)
2182 return;
a6279458 2183
1da177e4
LT
2184 /*
2185 * We have finally decided to accept it.
2186 */
2187
1ab1457c 2188 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2189 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2190 NEIGH_UPDATE_F_OVERRIDE|
2191 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2192 NEIGH_UPDATE_F_ISROUTER))
2193 );
2194
83a09abd 2195 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2196 if (!nrt)
1da177e4
LT
2197 goto out;
2198
2199 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2200 if (on_link)
2201 nrt->rt6i_flags &= ~RTF_GATEWAY;
2202
4e3fd7a0 2203 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2204
40e22e8f 2205 if (ip6_ins_rt(nrt))
1da177e4
LT
2206 goto out;
2207
d8d1f30b
CG
2208 netevent.old = &rt->dst;
2209 netevent.new = &nrt->dst;
71bcdba0 2210 netevent.daddr = &msg->dest;
60592833 2211 netevent.neigh = neigh;
8d71740c
TT
2212 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2213
38308473 2214 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2215 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2216 ip6_del_rt(rt);
1da177e4
LT
2217 }
2218
2219out:
e8599ff4 2220 neigh_release(neigh);
6e157b6a
DM
2221}
2222
1da177e4
LT
2223/*
2224 * Misc support functions
2225 */
2226
4b32b5ad
MKL
2227static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2228{
2229 BUG_ON(from->dst.from);
2230
2231 rt->rt6i_flags &= ~RTF_EXPIRES;
2232 dst_hold(&from->dst);
2233 rt->dst.from = &from->dst;
2234 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2235}
2236
83a09abd
MKL
2237static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2238{
2239 rt->dst.input = ort->dst.input;
2240 rt->dst.output = ort->dst.output;
2241 rt->rt6i_dst = ort->rt6i_dst;
2242 rt->dst.error = ort->dst.error;
2243 rt->rt6i_idev = ort->rt6i_idev;
2244 if (rt->rt6i_idev)
2245 in6_dev_hold(rt->rt6i_idev);
2246 rt->dst.lastuse = jiffies;
2247 rt->rt6i_gateway = ort->rt6i_gateway;
2248 rt->rt6i_flags = ort->rt6i_flags;
2249 rt6_set_from(rt, ort);
2250 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2251#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2252 rt->rt6i_src = ort->rt6i_src;
1da177e4 2253#endif
83a09abd
MKL
2254 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2255 rt->rt6i_table = ort->rt6i_table;
61adedf3 2256 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2257}
2258
70ceb4f5 2259#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2260static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2261 const struct in6_addr *prefix, int prefixlen,
2262 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2263{
2264 struct fib6_node *fn;
2265 struct rt6_info *rt = NULL;
c71099ac
TG
2266 struct fib6_table *table;
2267
efa2cea0 2268 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2269 if (!table)
c71099ac 2270 return NULL;
70ceb4f5 2271
5744dd9b 2272 read_lock_bh(&table->tb6_lock);
67ba4152 2273 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2274 if (!fn)
2275 goto out;
2276
d8d1f30b 2277 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2278 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2279 continue;
2280 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2281 continue;
2282 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2283 continue;
d8d1f30b 2284 dst_hold(&rt->dst);
70ceb4f5
YH
2285 break;
2286 }
2287out:
5744dd9b 2288 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2289 return rt;
2290}
2291
efa2cea0 2292static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2293 const struct in6_addr *prefix, int prefixlen,
2294 const struct in6_addr *gwaddr, int ifindex,
95c96174 2295 unsigned int pref)
70ceb4f5 2296{
86872cb5 2297 struct fib6_config cfg = {
238fc7ea 2298 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2299 .fc_ifindex = ifindex,
2300 .fc_dst_len = prefixlen,
2301 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2302 RTF_UP | RTF_PREF(pref),
15e47304 2303 .fc_nlinfo.portid = 0,
efa2cea0
DL
2304 .fc_nlinfo.nlh = NULL,
2305 .fc_nlinfo.nl_net = net,
86872cb5
TG
2306 };
2307
ca254490 2308 cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
4e3fd7a0
AD
2309 cfg.fc_dst = *prefix;
2310 cfg.fc_gateway = *gwaddr;
70ceb4f5 2311
e317da96
YH
2312 /* We should treat it as a default route if prefix length is 0. */
2313 if (!prefixlen)
86872cb5 2314 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2315
86872cb5 2316 ip6_route_add(&cfg);
70ceb4f5 2317
efa2cea0 2318 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2319}
2320#endif
2321
b71d1d42 2322struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2323{
1da177e4 2324 struct rt6_info *rt;
c71099ac 2325 struct fib6_table *table;
1da177e4 2326
c346dca1 2327 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2328 if (!table)
c71099ac 2329 return NULL;
1da177e4 2330
5744dd9b 2331 read_lock_bh(&table->tb6_lock);
67ba4152 2332 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2333 if (dev == rt->dst.dev &&
045927ff 2334 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2335 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2336 break;
2337 }
2338 if (rt)
d8d1f30b 2339 dst_hold(&rt->dst);
5744dd9b 2340 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2341 return rt;
2342}
2343
b71d1d42 2344struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2345 struct net_device *dev,
2346 unsigned int pref)
1da177e4 2347{
86872cb5 2348 struct fib6_config cfg = {
ca254490 2349 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2350 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2351 .fc_ifindex = dev->ifindex,
2352 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2353 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2354 .fc_nlinfo.portid = 0,
5578689a 2355 .fc_nlinfo.nlh = NULL,
c346dca1 2356 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2357 };
1da177e4 2358
4e3fd7a0 2359 cfg.fc_gateway = *gwaddr;
1da177e4 2360
86872cb5 2361 ip6_route_add(&cfg);
1da177e4 2362
1da177e4
LT
2363 return rt6_get_dflt_router(gwaddr, dev);
2364}
2365
7b4da532 2366void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2367{
2368 struct rt6_info *rt;
c71099ac
TG
2369 struct fib6_table *table;
2370
2371 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2372 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2373 if (!table)
c71099ac 2374 return;
1da177e4
LT
2375
2376restart:
c71099ac 2377 read_lock_bh(&table->tb6_lock);
d8d1f30b 2378 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2379 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2380 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2381 dst_hold(&rt->dst);
c71099ac 2382 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2383 ip6_del_rt(rt);
1da177e4
LT
2384 goto restart;
2385 }
2386 }
c71099ac 2387 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2388}
2389
5578689a
DL
2390static void rtmsg_to_fib6_config(struct net *net,
2391 struct in6_rtmsg *rtmsg,
86872cb5
TG
2392 struct fib6_config *cfg)
2393{
2394 memset(cfg, 0, sizeof(*cfg));
2395
ca254490
DA
2396 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2397 : RT6_TABLE_MAIN;
86872cb5
TG
2398 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2399 cfg->fc_metric = rtmsg->rtmsg_metric;
2400 cfg->fc_expires = rtmsg->rtmsg_info;
2401 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2402 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2403 cfg->fc_flags = rtmsg->rtmsg_flags;
2404
5578689a 2405 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2406
4e3fd7a0
AD
2407 cfg->fc_dst = rtmsg->rtmsg_dst;
2408 cfg->fc_src = rtmsg->rtmsg_src;
2409 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2410}
2411
5578689a 2412int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2413{
86872cb5 2414 struct fib6_config cfg;
1da177e4
LT
2415 struct in6_rtmsg rtmsg;
2416 int err;
2417
67ba4152 2418 switch (cmd) {
1da177e4
LT
2419 case SIOCADDRT: /* Add a route */
2420 case SIOCDELRT: /* Delete a route */
af31f412 2421 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2422 return -EPERM;
2423 err = copy_from_user(&rtmsg, arg,
2424 sizeof(struct in6_rtmsg));
2425 if (err)
2426 return -EFAULT;
86872cb5 2427
5578689a 2428 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2429
1da177e4
LT
2430 rtnl_lock();
2431 switch (cmd) {
2432 case SIOCADDRT:
86872cb5 2433 err = ip6_route_add(&cfg);
1da177e4
LT
2434 break;
2435 case SIOCDELRT:
86872cb5 2436 err = ip6_route_del(&cfg);
1da177e4
LT
2437 break;
2438 default:
2439 err = -EINVAL;
2440 }
2441 rtnl_unlock();
2442
2443 return err;
3ff50b79 2444 }
1da177e4
LT
2445
2446 return -EINVAL;
2447}
2448
2449/*
2450 * Drop the packet on the floor
2451 */
2452
d5fdd6ba 2453static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2454{
612f09e8 2455 int type;
adf30907 2456 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2457 switch (ipstats_mib_noroutes) {
2458 case IPSTATS_MIB_INNOROUTES:
0660e03f 2459 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2460 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2461 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2462 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2463 break;
2464 }
2465 /* FALLTHROUGH */
2466 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2467 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2468 ipstats_mib_noroutes);
612f09e8
YH
2469 break;
2470 }
3ffe533c 2471 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2472 kfree_skb(skb);
2473 return 0;
2474}
2475
9ce8ade0
TG
2476static int ip6_pkt_discard(struct sk_buff *skb)
2477{
612f09e8 2478 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2479}
2480
ede2059d 2481static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2482{
adf30907 2483 skb->dev = skb_dst(skb)->dev;
612f09e8 2484 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2485}
2486
9ce8ade0
TG
2487static int ip6_pkt_prohibit(struct sk_buff *skb)
2488{
612f09e8 2489 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2490}
2491
ede2059d 2492static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2493{
adf30907 2494 skb->dev = skb_dst(skb)->dev;
612f09e8 2495 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2496}
2497
1da177e4
LT
2498/*
2499 * Allocate a dst for local (unicast / anycast) address.
2500 */
2501
2502struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2503 const struct in6_addr *addr,
8f031519 2504 bool anycast)
1da177e4 2505{
ca254490 2506 u32 tb_id;
c346dca1 2507 struct net *net = dev_net(idev->dev);
a3300ef4 2508 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2509 DST_NOCOUNT);
a3300ef4 2510 if (!rt)
1da177e4
LT
2511 return ERR_PTR(-ENOMEM);
2512
1da177e4
LT
2513 in6_dev_hold(idev);
2514
11d53b49 2515 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2516 rt->dst.input = ip6_input;
2517 rt->dst.output = ip6_output;
1da177e4 2518 rt->rt6i_idev = idev;
1da177e4
LT
2519
2520 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2521 if (anycast)
2522 rt->rt6i_flags |= RTF_ANYCAST;
2523 else
1da177e4 2524 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2525
550bab42 2526 rt->rt6i_gateway = *addr;
4e3fd7a0 2527 rt->rt6i_dst.addr = *addr;
1da177e4 2528 rt->rt6i_dst.plen = 128;
ca254490
DA
2529 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2530 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2531 rt->dst.flags |= DST_NOCACHE;
1da177e4 2532
d8d1f30b 2533 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2534
2535 return rt;
2536}
2537
c3968a85
DW
2538int ip6_route_get_saddr(struct net *net,
2539 struct rt6_info *rt,
b71d1d42 2540 const struct in6_addr *daddr,
c3968a85
DW
2541 unsigned int prefs,
2542 struct in6_addr *saddr)
2543{
e16e888b
MS
2544 struct inet6_dev *idev =
2545 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2546 int err = 0;
e16e888b 2547 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2548 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2549 else
2550 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2551 daddr, prefs, saddr);
2552 return err;
2553}
2554
2555/* remove deleted ip from prefsrc entries */
2556struct arg_dev_net_ip {
2557 struct net_device *dev;
2558 struct net *net;
2559 struct in6_addr *addr;
2560};
2561
2562static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2563{
2564 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2565 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2566 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2567
d1918542 2568 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2569 rt != net->ipv6.ip6_null_entry &&
2570 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2571 /* remove prefsrc entry */
2572 rt->rt6i_prefsrc.plen = 0;
2573 }
2574 return 0;
2575}
2576
2577void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2578{
2579 struct net *net = dev_net(ifp->idev->dev);
2580 struct arg_dev_net_ip adni = {
2581 .dev = ifp->idev->dev,
2582 .net = net,
2583 .addr = &ifp->addr,
2584 };
0c3584d5 2585 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2586}
2587
be7a010d
DJ
2588#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2589#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2590
2591/* Remove routers and update dst entries when gateway turn into host. */
2592static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2593{
2594 struct in6_addr *gateway = (struct in6_addr *)arg;
2595
2596 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2597 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2598 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2599 return -1;
2600 }
2601 return 0;
2602}
2603
2604void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2605{
2606 fib6_clean_all(net, fib6_clean_tohost, gateway);
2607}
2608
8ed67789
DL
2609struct arg_dev_net {
2610 struct net_device *dev;
2611 struct net *net;
2612};
2613
1da177e4
LT
2614static int fib6_ifdown(struct rt6_info *rt, void *arg)
2615{
bc3ef660 2616 const struct arg_dev_net *adn = arg;
2617 const struct net_device *dev = adn->dev;
8ed67789 2618
d1918542 2619 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2620 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2621 return -1;
c159d30c 2622
1da177e4
LT
2623 return 0;
2624}
2625
f3db4851 2626void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2627{
8ed67789
DL
2628 struct arg_dev_net adn = {
2629 .dev = dev,
2630 .net = net,
2631 };
2632
0c3584d5 2633 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2634 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2635 if (dev)
2636 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2637}
2638
95c96174 2639struct rt6_mtu_change_arg {
1da177e4 2640 struct net_device *dev;
95c96174 2641 unsigned int mtu;
1da177e4
LT
2642};
2643
2644static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2645{
2646 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2647 struct inet6_dev *idev;
2648
2649 /* In IPv6 pmtu discovery is not optional,
2650 so that RTAX_MTU lock cannot disable it.
2651 We still use this lock to block changes
2652 caused by addrconf/ndisc.
2653 */
2654
2655 idev = __in6_dev_get(arg->dev);
38308473 2656 if (!idev)
1da177e4
LT
2657 return 0;
2658
2659 /* For administrative MTU increase, there is no way to discover
2660 IPv6 PMTU increase, so PMTU increase should be updated here.
2661 Since RFC 1981 doesn't include administrative MTU increase
2662 update PMTU increase is a MUST. (i.e. jumbo frame)
2663 */
2664 /*
2665 If new MTU is less than route PMTU, this new MTU will be the
2666 lowest MTU in the path, update the route PMTU to reflect PMTU
2667 decreases; if new MTU is greater than route PMTU, and the
2668 old MTU is the lowest MTU in the path, update the route PMTU
2669 to reflect the increase. In this case if the other nodes' MTU
2670 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2671 PMTU discouvery.
2672 */
d1918542 2673 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2674 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2675 if (rt->rt6i_flags & RTF_CACHE) {
2676 /* For RTF_CACHE with rt6i_pmtu == 0
2677 * (i.e. a redirected route),
2678 * the metrics of its rt->dst.from has already
2679 * been updated.
2680 */
2681 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2682 rt->rt6i_pmtu = arg->mtu;
2683 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2684 (dst_mtu(&rt->dst) < arg->mtu &&
2685 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2686 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2687 }
566cfd8f 2688 }
1da177e4
LT
2689 return 0;
2690}
2691
95c96174 2692void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2693{
c71099ac
TG
2694 struct rt6_mtu_change_arg arg = {
2695 .dev = dev,
2696 .mtu = mtu,
2697 };
1da177e4 2698
0c3584d5 2699 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2700}
2701
ef7c79ed 2702static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2703 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2704 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2705 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2706 [RTA_PRIORITY] = { .type = NLA_U32 },
2707 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2708 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2709 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2710 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2711 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2712};
2713
2714static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2715 struct fib6_config *cfg)
1da177e4 2716{
86872cb5
TG
2717 struct rtmsg *rtm;
2718 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2719 unsigned int pref;
86872cb5 2720 int err;
1da177e4 2721
86872cb5
TG
2722 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2723 if (err < 0)
2724 goto errout;
1da177e4 2725
86872cb5
TG
2726 err = -EINVAL;
2727 rtm = nlmsg_data(nlh);
2728 memset(cfg, 0, sizeof(*cfg));
2729
2730 cfg->fc_table = rtm->rtm_table;
2731 cfg->fc_dst_len = rtm->rtm_dst_len;
2732 cfg->fc_src_len = rtm->rtm_src_len;
2733 cfg->fc_flags = RTF_UP;
2734 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2735 cfg->fc_type = rtm->rtm_type;
86872cb5 2736
ef2c7d7b
ND
2737 if (rtm->rtm_type == RTN_UNREACHABLE ||
2738 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2739 rtm->rtm_type == RTN_PROHIBIT ||
2740 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2741 cfg->fc_flags |= RTF_REJECT;
2742
ab79ad14
2743 if (rtm->rtm_type == RTN_LOCAL)
2744 cfg->fc_flags |= RTF_LOCAL;
2745
1f56a01f
MKL
2746 if (rtm->rtm_flags & RTM_F_CLONED)
2747 cfg->fc_flags |= RTF_CACHE;
2748
15e47304 2749 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2750 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2751 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2752
2753 if (tb[RTA_GATEWAY]) {
67b61f6c 2754 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2755 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2756 }
86872cb5
TG
2757
2758 if (tb[RTA_DST]) {
2759 int plen = (rtm->rtm_dst_len + 7) >> 3;
2760
2761 if (nla_len(tb[RTA_DST]) < plen)
2762 goto errout;
2763
2764 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2765 }
86872cb5
TG
2766
2767 if (tb[RTA_SRC]) {
2768 int plen = (rtm->rtm_src_len + 7) >> 3;
2769
2770 if (nla_len(tb[RTA_SRC]) < plen)
2771 goto errout;
2772
2773 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2774 }
86872cb5 2775
c3968a85 2776 if (tb[RTA_PREFSRC])
67b61f6c 2777 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2778
86872cb5
TG
2779 if (tb[RTA_OIF])
2780 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2781
2782 if (tb[RTA_PRIORITY])
2783 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2784
2785 if (tb[RTA_METRICS]) {
2786 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2787 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2788 }
86872cb5
TG
2789
2790 if (tb[RTA_TABLE])
2791 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2792
51ebd318
ND
2793 if (tb[RTA_MULTIPATH]) {
2794 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2795 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2796 }
2797
c78ba6d6
LR
2798 if (tb[RTA_PREF]) {
2799 pref = nla_get_u8(tb[RTA_PREF]);
2800 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2801 pref != ICMPV6_ROUTER_PREF_HIGH)
2802 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2803 cfg->fc_flags |= RTF_PREF(pref);
2804 }
2805
19e42e45
RP
2806 if (tb[RTA_ENCAP])
2807 cfg->fc_encap = tb[RTA_ENCAP];
2808
2809 if (tb[RTA_ENCAP_TYPE])
2810 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2811
86872cb5
TG
2812 err = 0;
2813errout:
2814 return err;
1da177e4
LT
2815}
2816
6b9ea5a6
RP
2817struct rt6_nh {
2818 struct rt6_info *rt6_info;
2819 struct fib6_config r_cfg;
2820 struct mx6_config mxc;
2821 struct list_head next;
2822};
2823
2824static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2825{
2826 struct rt6_nh *nh;
2827
2828 list_for_each_entry(nh, rt6_nh_list, next) {
2829 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2830 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2831 nh->r_cfg.fc_ifindex);
2832 }
2833}
2834
2835static int ip6_route_info_append(struct list_head *rt6_nh_list,
2836 struct rt6_info *rt, struct fib6_config *r_cfg)
2837{
2838 struct rt6_nh *nh;
2839 struct rt6_info *rtnh;
2840 int err = -EEXIST;
2841
2842 list_for_each_entry(nh, rt6_nh_list, next) {
2843 /* check if rt6_info already exists */
2844 rtnh = nh->rt6_info;
2845
2846 if (rtnh->dst.dev == rt->dst.dev &&
2847 rtnh->rt6i_idev == rt->rt6i_idev &&
2848 ipv6_addr_equal(&rtnh->rt6i_gateway,
2849 &rt->rt6i_gateway))
2850 return err;
2851 }
2852
2853 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2854 if (!nh)
2855 return -ENOMEM;
2856 nh->rt6_info = rt;
2857 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2858 if (err) {
2859 kfree(nh);
2860 return err;
2861 }
2862 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2863 list_add_tail(&nh->next, rt6_nh_list);
2864
2865 return 0;
2866}
2867
2868static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2869{
2870 struct fib6_config r_cfg;
2871 struct rtnexthop *rtnh;
6b9ea5a6
RP
2872 struct rt6_info *rt;
2873 struct rt6_nh *err_nh;
2874 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2875 int remaining;
2876 int attrlen;
6b9ea5a6
RP
2877 int err = 1;
2878 int nhn = 0;
2879 int replace = (cfg->fc_nlinfo.nlh &&
2880 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2881 LIST_HEAD(rt6_nh_list);
51ebd318 2882
35f1b4e9 2883 remaining = cfg->fc_mp_len;
51ebd318 2884 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2885
6b9ea5a6
RP
2886 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2887 * rt6_info structs per nexthop
2888 */
51ebd318
ND
2889 while (rtnh_ok(rtnh, remaining)) {
2890 memcpy(&r_cfg, cfg, sizeof(*cfg));
2891 if (rtnh->rtnh_ifindex)
2892 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2893
2894 attrlen = rtnh_attrlen(rtnh);
2895 if (attrlen > 0) {
2896 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2897
2898 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2899 if (nla) {
67b61f6c 2900 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2901 r_cfg.fc_flags |= RTF_GATEWAY;
2902 }
19e42e45
RP
2903 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2904 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2905 if (nla)
2906 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2907 }
6b9ea5a6 2908
8c5b83f0
RP
2909 rt = ip6_route_info_create(&r_cfg);
2910 if (IS_ERR(rt)) {
2911 err = PTR_ERR(rt);
2912 rt = NULL;
6b9ea5a6 2913 goto cleanup;
8c5b83f0 2914 }
6b9ea5a6
RP
2915
2916 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2917 if (err) {
6b9ea5a6
RP
2918 dst_free(&rt->dst);
2919 goto cleanup;
2920 }
2921
2922 rtnh = rtnh_next(rtnh, &remaining);
2923 }
2924
2925 err_nh = NULL;
2926 list_for_each_entry(nh, &rt6_nh_list, next) {
2927 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2928 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2929 nh->rt6_info = NULL;
2930 if (err) {
2931 if (replace && nhn)
2932 ip6_print_replace_route_err(&rt6_nh_list);
2933 err_nh = nh;
2934 goto add_errout;
51ebd318 2935 }
6b9ea5a6 2936
1a72418b 2937 /* Because each route is added like a single route we remove
27596472
MK
2938 * these flags after the first nexthop: if there is a collision,
2939 * we have already failed to add the first nexthop:
2940 * fib6_add_rt2node() has rejected it; when replacing, old
2941 * nexthops have been replaced by first new, the rest should
2942 * be added to it.
1a72418b 2943 */
27596472
MK
2944 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2945 NLM_F_REPLACE);
6b9ea5a6
RP
2946 nhn++;
2947 }
2948
2949 goto cleanup;
2950
2951add_errout:
2952 /* Delete routes that were already added */
2953 list_for_each_entry(nh, &rt6_nh_list, next) {
2954 if (err_nh == nh)
2955 break;
2956 ip6_route_del(&nh->r_cfg);
2957 }
2958
2959cleanup:
2960 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2961 if (nh->rt6_info)
2962 dst_free(&nh->rt6_info->dst);
52fe51f8 2963 kfree(nh->mxc.mx);
6b9ea5a6
RP
2964 list_del(&nh->next);
2965 kfree(nh);
2966 }
2967
2968 return err;
2969}
2970
2971static int ip6_route_multipath_del(struct fib6_config *cfg)
2972{
2973 struct fib6_config r_cfg;
2974 struct rtnexthop *rtnh;
2975 int remaining;
2976 int attrlen;
2977 int err = 1, last_err = 0;
2978
2979 remaining = cfg->fc_mp_len;
2980 rtnh = (struct rtnexthop *)cfg->fc_mp;
2981
2982 /* Parse a Multipath Entry */
2983 while (rtnh_ok(rtnh, remaining)) {
2984 memcpy(&r_cfg, cfg, sizeof(*cfg));
2985 if (rtnh->rtnh_ifindex)
2986 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2987
2988 attrlen = rtnh_attrlen(rtnh);
2989 if (attrlen > 0) {
2990 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2991
2992 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2993 if (nla) {
2994 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2995 r_cfg.fc_flags |= RTF_GATEWAY;
2996 }
2997 }
2998 err = ip6_route_del(&r_cfg);
2999 if (err)
3000 last_err = err;
3001
51ebd318
ND
3002 rtnh = rtnh_next(rtnh, &remaining);
3003 }
3004
3005 return last_err;
3006}
3007
67ba4152 3008static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3009{
86872cb5
TG
3010 struct fib6_config cfg;
3011 int err;
1da177e4 3012
86872cb5
TG
3013 err = rtm_to_fib6_config(skb, nlh, &cfg);
3014 if (err < 0)
3015 return err;
3016
51ebd318 3017 if (cfg.fc_mp)
6b9ea5a6 3018 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3019 else
3020 return ip6_route_del(&cfg);
1da177e4
LT
3021}
3022
67ba4152 3023static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3024{
86872cb5
TG
3025 struct fib6_config cfg;
3026 int err;
1da177e4 3027
86872cb5
TG
3028 err = rtm_to_fib6_config(skb, nlh, &cfg);
3029 if (err < 0)
3030 return err;
3031
51ebd318 3032 if (cfg.fc_mp)
6b9ea5a6 3033 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3034 else
3035 return ip6_route_add(&cfg);
1da177e4
LT
3036}
3037
19e42e45 3038static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3039{
3040 return NLMSG_ALIGN(sizeof(struct rtmsg))
3041 + nla_total_size(16) /* RTA_SRC */
3042 + nla_total_size(16) /* RTA_DST */
3043 + nla_total_size(16) /* RTA_GATEWAY */
3044 + nla_total_size(16) /* RTA_PREFSRC */
3045 + nla_total_size(4) /* RTA_TABLE */
3046 + nla_total_size(4) /* RTA_IIF */
3047 + nla_total_size(4) /* RTA_OIF */
3048 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3049 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3050 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3051 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3052 + nla_total_size(1) /* RTA_PREF */
61adedf3 3053 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3054}
3055
191cd582
BH
3056static int rt6_fill_node(struct net *net,
3057 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3058 struct in6_addr *dst, struct in6_addr *src,
15e47304 3059 int iif, int type, u32 portid, u32 seq,
7bc570c8 3060 int prefix, int nowait, unsigned int flags)
1da177e4 3061{
4b32b5ad 3062 u32 metrics[RTAX_MAX];
1da177e4 3063 struct rtmsg *rtm;
2d7202bf 3064 struct nlmsghdr *nlh;
e3703b3d 3065 long expires;
9e762a4a 3066 u32 table;
1da177e4
LT
3067
3068 if (prefix) { /* user wants prefix routes only */
3069 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3070 /* success since this is not a prefix route */
3071 return 1;
3072 }
3073 }
3074
15e47304 3075 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3076 if (!nlh)
26932566 3077 return -EMSGSIZE;
2d7202bf
TG
3078
3079 rtm = nlmsg_data(nlh);
1da177e4
LT
3080 rtm->rtm_family = AF_INET6;
3081 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3082 rtm->rtm_src_len = rt->rt6i_src.plen;
3083 rtm->rtm_tos = 0;
c71099ac 3084 if (rt->rt6i_table)
9e762a4a 3085 table = rt->rt6i_table->tb6_id;
c71099ac 3086 else
9e762a4a
PM
3087 table = RT6_TABLE_UNSPEC;
3088 rtm->rtm_table = table;
c78679e8
DM
3089 if (nla_put_u32(skb, RTA_TABLE, table))
3090 goto nla_put_failure;
ef2c7d7b
ND
3091 if (rt->rt6i_flags & RTF_REJECT) {
3092 switch (rt->dst.error) {
3093 case -EINVAL:
3094 rtm->rtm_type = RTN_BLACKHOLE;
3095 break;
3096 case -EACCES:
3097 rtm->rtm_type = RTN_PROHIBIT;
3098 break;
b4949ab2
ND
3099 case -EAGAIN:
3100 rtm->rtm_type = RTN_THROW;
3101 break;
ef2c7d7b
ND
3102 default:
3103 rtm->rtm_type = RTN_UNREACHABLE;
3104 break;
3105 }
3106 }
38308473 3107 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3108 rtm->rtm_type = RTN_LOCAL;
d1918542 3109 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3110 rtm->rtm_type = RTN_LOCAL;
3111 else
3112 rtm->rtm_type = RTN_UNICAST;
3113 rtm->rtm_flags = 0;
35103d11 3114 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3115 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3116 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3117 rtm->rtm_flags |= RTNH_F_DEAD;
3118 }
1da177e4
LT
3119 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3120 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3121 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3122 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3123 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3124 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3125 rtm->rtm_protocol = RTPROT_RA;
3126 else
3127 rtm->rtm_protocol = RTPROT_KERNEL;
3128 }
1da177e4 3129
38308473 3130 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3131 rtm->rtm_flags |= RTM_F_CLONED;
3132
3133 if (dst) {
930345ea 3134 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3135 goto nla_put_failure;
1ab1457c 3136 rtm->rtm_dst_len = 128;
1da177e4 3137 } else if (rtm->rtm_dst_len)
930345ea 3138 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3139 goto nla_put_failure;
1da177e4
LT
3140#ifdef CONFIG_IPV6_SUBTREES
3141 if (src) {
930345ea 3142 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3143 goto nla_put_failure;
1ab1457c 3144 rtm->rtm_src_len = 128;
c78679e8 3145 } else if (rtm->rtm_src_len &&
930345ea 3146 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3147 goto nla_put_failure;
1da177e4 3148#endif
7bc570c8
YH
3149 if (iif) {
3150#ifdef CONFIG_IPV6_MROUTE
3151 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3152 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3153 if (err <= 0) {
3154 if (!nowait) {
3155 if (err == 0)
3156 return 0;
3157 goto nla_put_failure;
3158 } else {
3159 if (err == -EMSGSIZE)
3160 goto nla_put_failure;
3161 }
3162 }
3163 } else
3164#endif
c78679e8
DM
3165 if (nla_put_u32(skb, RTA_IIF, iif))
3166 goto nla_put_failure;
7bc570c8 3167 } else if (dst) {
1da177e4 3168 struct in6_addr saddr_buf;
c78679e8 3169 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3170 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3171 goto nla_put_failure;
1da177e4 3172 }
2d7202bf 3173
c3968a85
DW
3174 if (rt->rt6i_prefsrc.plen) {
3175 struct in6_addr saddr_buf;
4e3fd7a0 3176 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3177 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3178 goto nla_put_failure;
c3968a85
DW
3179 }
3180
4b32b5ad
MKL
3181 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3182 if (rt->rt6i_pmtu)
3183 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3184 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3185 goto nla_put_failure;
3186
dd0cbf29 3187 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3188 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3189 goto nla_put_failure;
94f826b8 3190 }
2d7202bf 3191
c78679e8
DM
3192 if (rt->dst.dev &&
3193 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3194 goto nla_put_failure;
3195 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3196 goto nla_put_failure;
8253947e
LW
3197
3198 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3199
87a50699 3200 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3201 goto nla_put_failure;
2d7202bf 3202
c78ba6d6
LR
3203 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3204 goto nla_put_failure;
3205
61adedf3 3206 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3207
053c095a
JB
3208 nlmsg_end(skb, nlh);
3209 return 0;
2d7202bf
TG
3210
3211nla_put_failure:
26932566
PM
3212 nlmsg_cancel(skb, nlh);
3213 return -EMSGSIZE;
1da177e4
LT
3214}
3215
1b43af54 3216int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3217{
3218 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3219 int prefix;
3220
2d7202bf
TG
3221 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3222 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3223 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3224 } else
3225 prefix = 0;
3226
191cd582
BH
3227 return rt6_fill_node(arg->net,
3228 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3229 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3230 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3231}
3232
67ba4152 3233static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3234{
3b1e0a65 3235 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3236 struct nlattr *tb[RTA_MAX+1];
3237 struct rt6_info *rt;
1da177e4 3238 struct sk_buff *skb;
ab364a6f 3239 struct rtmsg *rtm;
4c9483b2 3240 struct flowi6 fl6;
72331bc0 3241 int err, iif = 0, oif = 0;
1da177e4 3242
ab364a6f
TG
3243 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3244 if (err < 0)
3245 goto errout;
1da177e4 3246
ab364a6f 3247 err = -EINVAL;
4c9483b2 3248 memset(&fl6, 0, sizeof(fl6));
1da177e4 3249
ab364a6f
TG
3250 if (tb[RTA_SRC]) {
3251 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3252 goto errout;
3253
4e3fd7a0 3254 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3255 }
3256
3257 if (tb[RTA_DST]) {
3258 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3259 goto errout;
3260
4e3fd7a0 3261 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3262 }
3263
3264 if (tb[RTA_IIF])
3265 iif = nla_get_u32(tb[RTA_IIF]);
3266
3267 if (tb[RTA_OIF])
72331bc0 3268 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3269
2e47b291
LC
3270 if (tb[RTA_MARK])
3271 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3272
1da177e4
LT
3273 if (iif) {
3274 struct net_device *dev;
72331bc0
SL
3275 int flags = 0;
3276
5578689a 3277 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3278 if (!dev) {
3279 err = -ENODEV;
ab364a6f 3280 goto errout;
1da177e4 3281 }
72331bc0
SL
3282
3283 fl6.flowi6_iif = iif;
3284
3285 if (!ipv6_addr_any(&fl6.saddr))
3286 flags |= RT6_LOOKUP_F_HAS_SADDR;
3287
3288 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3289 flags);
3290 } else {
3291 fl6.flowi6_oif = oif;
3292
ca254490
DA
3293 if (netif_index_is_l3_master(net, oif)) {
3294 fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3295 FLOWI_FLAG_SKIP_NH_OIF;
3296 }
3297
72331bc0 3298 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3299 }
3300
ab364a6f 3301 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3302 if (!skb) {
94e187c0 3303 ip6_rt_put(rt);
ab364a6f
TG
3304 err = -ENOBUFS;
3305 goto errout;
3306 }
1da177e4 3307
ab364a6f
TG
3308 /* Reserve room for dummy headers, this skb can pass
3309 through good chunk of routing engine.
3310 */
459a98ed 3311 skb_reset_mac_header(skb);
ab364a6f 3312 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3313
d8d1f30b 3314 skb_dst_set(skb, &rt->dst);
1da177e4 3315
4c9483b2 3316 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3317 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3318 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3319 if (err < 0) {
ab364a6f
TG
3320 kfree_skb(skb);
3321 goto errout;
1da177e4
LT
3322 }
3323
15e47304 3324 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3325errout:
1da177e4 3326 return err;
1da177e4
LT
3327}
3328
37a1d361
RP
3329void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3330 unsigned int nlm_flags)
1da177e4
LT
3331{
3332 struct sk_buff *skb;
5578689a 3333 struct net *net = info->nl_net;
528c4ceb
DL
3334 u32 seq;
3335 int err;
3336
3337 err = -ENOBUFS;
38308473 3338 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3339
19e42e45 3340 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3341 if (!skb)
21713ebc
TG
3342 goto errout;
3343
191cd582 3344 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3345 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3346 if (err < 0) {
3347 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3348 WARN_ON(err == -EMSGSIZE);
3349 kfree_skb(skb);
3350 goto errout;
3351 }
15e47304 3352 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3353 info->nlh, gfp_any());
3354 return;
21713ebc
TG
3355errout:
3356 if (err < 0)
5578689a 3357 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3358}
3359
8ed67789 3360static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3361 unsigned long event, void *ptr)
8ed67789 3362{
351638e7 3363 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3364 struct net *net = dev_net(dev);
8ed67789
DL
3365
3366 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3367 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3368 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3369#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3370 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3371 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3372 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3373 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3374#endif
3375 }
3376
3377 return NOTIFY_OK;
3378}
3379
1da177e4
LT
3380/*
3381 * /proc
3382 */
3383
3384#ifdef CONFIG_PROC_FS
3385
33120b30
AD
3386static const struct file_operations ipv6_route_proc_fops = {
3387 .owner = THIS_MODULE,
3388 .open = ipv6_route_open,
3389 .read = seq_read,
3390 .llseek = seq_lseek,
8d2ca1d7 3391 .release = seq_release_net,
33120b30
AD
3392};
3393
1da177e4
LT
3394static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3395{
69ddb805 3396 struct net *net = (struct net *)seq->private;
1da177e4 3397 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3398 net->ipv6.rt6_stats->fib_nodes,
3399 net->ipv6.rt6_stats->fib_route_nodes,
3400 net->ipv6.rt6_stats->fib_rt_alloc,
3401 net->ipv6.rt6_stats->fib_rt_entries,
3402 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3403 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3404 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3405
3406 return 0;
3407}
3408
3409static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3410{
de05c557 3411 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3412}
3413
9a32144e 3414static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3415 .owner = THIS_MODULE,
3416 .open = rt6_stats_seq_open,
3417 .read = seq_read,
3418 .llseek = seq_lseek,
b6fcbdb4 3419 .release = single_release_net,
1da177e4
LT
3420};
3421#endif /* CONFIG_PROC_FS */
3422
3423#ifdef CONFIG_SYSCTL
3424
1da177e4 3425static
fe2c6338 3426int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3427 void __user *buffer, size_t *lenp, loff_t *ppos)
3428{
c486da34
LAG
3429 struct net *net;
3430 int delay;
3431 if (!write)
1da177e4 3432 return -EINVAL;
c486da34
LAG
3433
3434 net = (struct net *)ctl->extra1;
3435 delay = net->ipv6.sysctl.flush_delay;
3436 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3437 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3438 return 0;
1da177e4
LT
3439}
3440
fe2c6338 3441struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3442 {
1da177e4 3443 .procname = "flush",
4990509f 3444 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3445 .maxlen = sizeof(int),
89c8b3a1 3446 .mode = 0200,
6d9f239a 3447 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3448 },
3449 {
1da177e4 3450 .procname = "gc_thresh",
9a7ec3a9 3451 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3452 .maxlen = sizeof(int),
3453 .mode = 0644,
6d9f239a 3454 .proc_handler = proc_dointvec,
1da177e4
LT
3455 },
3456 {
1da177e4 3457 .procname = "max_size",
4990509f 3458 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3459 .maxlen = sizeof(int),
3460 .mode = 0644,
6d9f239a 3461 .proc_handler = proc_dointvec,
1da177e4
LT
3462 },
3463 {
1da177e4 3464 .procname = "gc_min_interval",
4990509f 3465 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3466 .maxlen = sizeof(int),
3467 .mode = 0644,
6d9f239a 3468 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3469 },
3470 {
1da177e4 3471 .procname = "gc_timeout",
4990509f 3472 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3473 .maxlen = sizeof(int),
3474 .mode = 0644,
6d9f239a 3475 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3476 },
3477 {
1da177e4 3478 .procname = "gc_interval",
4990509f 3479 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3480 .maxlen = sizeof(int),
3481 .mode = 0644,
6d9f239a 3482 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3483 },
3484 {
1da177e4 3485 .procname = "gc_elasticity",
4990509f 3486 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3487 .maxlen = sizeof(int),
3488 .mode = 0644,
f3d3f616 3489 .proc_handler = proc_dointvec,
1da177e4
LT
3490 },
3491 {
1da177e4 3492 .procname = "mtu_expires",
4990509f 3493 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3494 .maxlen = sizeof(int),
3495 .mode = 0644,
6d9f239a 3496 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3497 },
3498 {
1da177e4 3499 .procname = "min_adv_mss",
4990509f 3500 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3501 .maxlen = sizeof(int),
3502 .mode = 0644,
f3d3f616 3503 .proc_handler = proc_dointvec,
1da177e4
LT
3504 },
3505 {
1da177e4 3506 .procname = "gc_min_interval_ms",
4990509f 3507 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3508 .maxlen = sizeof(int),
3509 .mode = 0644,
6d9f239a 3510 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3511 },
f8572d8f 3512 { }
1da177e4
LT
3513};
3514
2c8c1e72 3515struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3516{
3517 struct ctl_table *table;
3518
3519 table = kmemdup(ipv6_route_table_template,
3520 sizeof(ipv6_route_table_template),
3521 GFP_KERNEL);
5ee09105
YH
3522
3523 if (table) {
3524 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3525 table[0].extra1 = net;
86393e52 3526 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3527 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3528 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3529 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3530 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3531 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3532 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3533 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3534 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3535
3536 /* Don't export sysctls to unprivileged users */
3537 if (net->user_ns != &init_user_ns)
3538 table[0].procname = NULL;
5ee09105
YH
3539 }
3540
760f2d01
DL
3541 return table;
3542}
1da177e4
LT
3543#endif
3544
2c8c1e72 3545static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3546{
633d424b 3547 int ret = -ENOMEM;
8ed67789 3548
86393e52
AD
3549 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3550 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3551
fc66f95c
ED
3552 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3553 goto out_ip6_dst_ops;
3554
8ed67789
DL
3555 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3556 sizeof(*net->ipv6.ip6_null_entry),
3557 GFP_KERNEL);
3558 if (!net->ipv6.ip6_null_entry)
fc66f95c 3559 goto out_ip6_dst_entries;
d8d1f30b 3560 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3561 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3562 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3563 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3564 ip6_template_metrics, true);
8ed67789
DL
3565
3566#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3567 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3568 sizeof(*net->ipv6.ip6_prohibit_entry),
3569 GFP_KERNEL);
68fffc67
PZ
3570 if (!net->ipv6.ip6_prohibit_entry)
3571 goto out_ip6_null_entry;
d8d1f30b 3572 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3573 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3574 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3575 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3576 ip6_template_metrics, true);
8ed67789
DL
3577
3578 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3579 sizeof(*net->ipv6.ip6_blk_hole_entry),
3580 GFP_KERNEL);
68fffc67
PZ
3581 if (!net->ipv6.ip6_blk_hole_entry)
3582 goto out_ip6_prohibit_entry;
d8d1f30b 3583 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3584 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3585 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3586 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3587 ip6_template_metrics, true);
8ed67789
DL
3588#endif
3589
b339a47c
PZ
3590 net->ipv6.sysctl.flush_delay = 0;
3591 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3592 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3593 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3594 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3595 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3596 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3597 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3598
6891a346
BT
3599 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3600
8ed67789
DL
3601 ret = 0;
3602out:
3603 return ret;
f2fc6a54 3604
68fffc67
PZ
3605#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3606out_ip6_prohibit_entry:
3607 kfree(net->ipv6.ip6_prohibit_entry);
3608out_ip6_null_entry:
3609 kfree(net->ipv6.ip6_null_entry);
3610#endif
fc66f95c
ED
3611out_ip6_dst_entries:
3612 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3613out_ip6_dst_ops:
f2fc6a54 3614 goto out;
cdb18761
DL
3615}
3616
2c8c1e72 3617static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3618{
8ed67789
DL
3619 kfree(net->ipv6.ip6_null_entry);
3620#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3621 kfree(net->ipv6.ip6_prohibit_entry);
3622 kfree(net->ipv6.ip6_blk_hole_entry);
3623#endif
41bb78b4 3624 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3625}
3626
d189634e
TG
3627static int __net_init ip6_route_net_init_late(struct net *net)
3628{
3629#ifdef CONFIG_PROC_FS
d4beaa66
G
3630 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3631 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3632#endif
3633 return 0;
3634}
3635
3636static void __net_exit ip6_route_net_exit_late(struct net *net)
3637{
3638#ifdef CONFIG_PROC_FS
ece31ffd
G
3639 remove_proc_entry("ipv6_route", net->proc_net);
3640 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3641#endif
3642}
3643
cdb18761
DL
3644static struct pernet_operations ip6_route_net_ops = {
3645 .init = ip6_route_net_init,
3646 .exit = ip6_route_net_exit,
3647};
3648
c3426b47
DM
3649static int __net_init ipv6_inetpeer_init(struct net *net)
3650{
3651 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3652
3653 if (!bp)
3654 return -ENOMEM;
3655 inet_peer_base_init(bp);
3656 net->ipv6.peers = bp;
3657 return 0;
3658}
3659
3660static void __net_exit ipv6_inetpeer_exit(struct net *net)
3661{
3662 struct inet_peer_base *bp = net->ipv6.peers;
3663
3664 net->ipv6.peers = NULL;
56a6b248 3665 inetpeer_invalidate_tree(bp);
c3426b47
DM
3666 kfree(bp);
3667}
3668
2b823f72 3669static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3670 .init = ipv6_inetpeer_init,
3671 .exit = ipv6_inetpeer_exit,
3672};
3673
d189634e
TG
3674static struct pernet_operations ip6_route_net_late_ops = {
3675 .init = ip6_route_net_init_late,
3676 .exit = ip6_route_net_exit_late,
3677};
3678
8ed67789
DL
3679static struct notifier_block ip6_route_dev_notifier = {
3680 .notifier_call = ip6_route_dev_notify,
3681 .priority = 0,
3682};
3683
433d49c3 3684int __init ip6_route_init(void)
1da177e4 3685{
433d49c3 3686 int ret;
8d0b94af 3687 int cpu;
433d49c3 3688
9a7ec3a9
DL
3689 ret = -ENOMEM;
3690 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3691 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3692 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3693 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3694 goto out;
14e50e57 3695
fc66f95c 3696 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3697 if (ret)
bdb3289f 3698 goto out_kmem_cache;
bdb3289f 3699
c3426b47
DM
3700 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3701 if (ret)
e8803b6c 3702 goto out_dst_entries;
2a0c451a 3703
7e52b33b
DM
3704 ret = register_pernet_subsys(&ip6_route_net_ops);
3705 if (ret)
3706 goto out_register_inetpeer;
c3426b47 3707
5dc121e9
AE
3708 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3709
8ed67789
DL
3710 /* Registering of the loopback is done before this portion of code,
3711 * the loopback reference in rt6_info will not be taken, do it
3712 * manually for init_net */
d8d1f30b 3713 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3714 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3715 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3716 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3717 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3718 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3719 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3720 #endif
e8803b6c 3721 ret = fib6_init();
433d49c3 3722 if (ret)
8ed67789 3723 goto out_register_subsys;
433d49c3 3724
433d49c3
DL
3725 ret = xfrm6_init();
3726 if (ret)
e8803b6c 3727 goto out_fib6_init;
c35b7e72 3728
433d49c3
DL
3729 ret = fib6_rules_init();
3730 if (ret)
3731 goto xfrm6_init;
7e5449c2 3732
d189634e
TG
3733 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3734 if (ret)
3735 goto fib6_rules_init;
3736
433d49c3 3737 ret = -ENOBUFS;
c7ac8679
GR
3738 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3739 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3740 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3741 goto out_register_late_subsys;
c127ea2c 3742
8ed67789 3743 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3744 if (ret)
d189634e 3745 goto out_register_late_subsys;
8ed67789 3746
8d0b94af
MKL
3747 for_each_possible_cpu(cpu) {
3748 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3749
3750 INIT_LIST_HEAD(&ul->head);
3751 spin_lock_init(&ul->lock);
3752 }
3753
433d49c3
DL
3754out:
3755 return ret;
3756
d189634e
TG
3757out_register_late_subsys:
3758 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3759fib6_rules_init:
433d49c3
DL
3760 fib6_rules_cleanup();
3761xfrm6_init:
433d49c3 3762 xfrm6_fini();
2a0c451a
TG
3763out_fib6_init:
3764 fib6_gc_cleanup();
8ed67789
DL
3765out_register_subsys:
3766 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3767out_register_inetpeer:
3768 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3769out_dst_entries:
3770 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3771out_kmem_cache:
f2fc6a54 3772 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3773 goto out;
1da177e4
LT
3774}
3775
3776void ip6_route_cleanup(void)
3777{
8ed67789 3778 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3779 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3780 fib6_rules_cleanup();
1da177e4 3781 xfrm6_fini();
1da177e4 3782 fib6_gc_cleanup();
c3426b47 3783 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3784 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3785 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3786 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3787}
This page took 1.343652 seconds and 5 git commands to generate.