[NETNS]: Add namespace parameter to __ip_route_output_key.
[deliverable/linux.git] / net / ipv4 / fib_semantics.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
37
38 #include <net/arp.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
47
48 #include "fib_lookup.h"
49
50 static DEFINE_SPINLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
55
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62 static DEFINE_SPINLOCK(fib_multipath_lock);
63
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
73
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82 #define endfor_nexthops(fi) }
83
84
85 static const struct
86 {
87 int error;
88 u8 scope;
89 } fib_props[RTN_MAX + 1] = {
90 {
91 .error = 0,
92 .scope = RT_SCOPE_NOWHERE,
93 }, /* RTN_UNSPEC */
94 {
95 .error = 0,
96 .scope = RT_SCOPE_UNIVERSE,
97 }, /* RTN_UNICAST */
98 {
99 .error = 0,
100 .scope = RT_SCOPE_HOST,
101 }, /* RTN_LOCAL */
102 {
103 .error = 0,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
106 {
107 .error = 0,
108 .scope = RT_SCOPE_LINK,
109 }, /* RTN_ANYCAST */
110 {
111 .error = 0,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
114 {
115 .error = -EINVAL,
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
118 {
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
122 {
123 .error = -EACCES,
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
126 {
127 .error = -EAGAIN,
128 .scope = RT_SCOPE_UNIVERSE,
129 }, /* RTN_THROW */
130 {
131 .error = -EINVAL,
132 .scope = RT_SCOPE_NOWHERE,
133 }, /* RTN_NAT */
134 {
135 .error = -EINVAL,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
138 };
139
140
141 /* Release a nexthop info record */
142
143 void free_fib_info(struct fib_info *fi)
144 {
145 if (fi->fib_dead == 0) {
146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
147 return;
148 }
149 change_nexthops(fi) {
150 if (nh->nh_dev)
151 dev_put(nh->nh_dev);
152 nh->nh_dev = NULL;
153 } endfor_nexthops(fi);
154 fib_info_cnt--;
155 kfree(fi);
156 }
157
158 void fib_release_info(struct fib_info *fi)
159 {
160 spin_lock_bh(&fib_info_lock);
161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
163 if (fi->fib_prefsrc)
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
166 if (!nh->nh_dev)
167 continue;
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
170 fi->fib_dead = 1;
171 fib_info_put(fi);
172 }
173 spin_unlock_bh(&fib_info_lock);
174 }
175
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177 {
178 const struct fib_nh *onh = ofi->fib_nh;
179
180 for_nexthops(fi) {
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
186 #endif
187 #ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
189 #endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191 return -1;
192 onh++;
193 } endfor_nexthops(fi);
194 return 0;
195 }
196
197 static inline unsigned int fib_devindex_hashfn(unsigned int val)
198 {
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201 return (val ^
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204 }
205
206 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
207 {
208 unsigned int mask = (fib_hash_size - 1);
209 unsigned int val = fi->fib_nhs;
210
211 val ^= fi->fib_protocol;
212 val ^= (__force u32)fi->fib_prefsrc;
213 val ^= fi->fib_priority;
214 for_nexthops(fi) {
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
217
218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
219 }
220
221 static struct fib_info *fib_find_info(const struct fib_info *nfi)
222 {
223 struct hlist_head *head;
224 struct hlist_node *node;
225 struct fib_info *fi;
226 unsigned int hash;
227
228 hash = fib_info_hashfn(nfi);
229 head = &fib_info_hash[hash];
230
231 hlist_for_each_entry(fi, node, head, fib_hash) {
232 if (fi->fib_nhs != nfi->fib_nhs)
233 continue;
234 if (nfi->fib_protocol == fi->fib_protocol &&
235 nfi->fib_prefsrc == fi->fib_prefsrc &&
236 nfi->fib_priority == fi->fib_priority &&
237 memcmp(nfi->fib_metrics, fi->fib_metrics,
238 sizeof(fi->fib_metrics)) == 0 &&
239 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
240 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
241 return fi;
242 }
243
244 return NULL;
245 }
246
247 /* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
251 int ip_fib_check_default(__be32 gw, struct net_device *dev)
252 {
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
258 spin_lock(&fib_info_lock);
259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
266 spin_unlock(&fib_info_lock);
267 return 0;
268 }
269 }
270
271 spin_unlock(&fib_info_lock);
272
273 return -1;
274 }
275
276 static inline size_t fib_nlmsg_size(struct fib_info *fi)
277 {
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
283
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287 if (fi->fib_nhs) {
288 /* Also handles the special case fib_nhs == 1 */
289
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
295
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
298 }
299
300 return payload;
301 }
302
303 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
304 int dst_len, u32 tb_id, struct nl_info *info,
305 unsigned int nlm_flags)
306 {
307 struct sk_buff *skb;
308 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
309 int err = -ENOBUFS;
310
311 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
312 if (skb == NULL)
313 goto errout;
314
315 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
316 fa->fa_type, fa->fa_scope, key, dst_len,
317 fa->fa_tos, fa->fa_info, nlm_flags);
318 if (err < 0) {
319 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
320 WARN_ON(err == -EMSGSIZE);
321 kfree_skb(skb);
322 goto errout;
323 }
324 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
325 info->nlh, GFP_KERNEL);
326 errout:
327 if (err < 0)
328 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
329 }
330
331 /* Return the first fib alias matching TOS with
332 * priority less than or equal to PRIO.
333 */
334 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
335 {
336 if (fah) {
337 struct fib_alias *fa;
338 list_for_each_entry(fa, fah, fa_list) {
339 if (fa->fa_tos > tos)
340 continue;
341 if (fa->fa_info->fib_priority >= prio ||
342 fa->fa_tos < tos)
343 return fa;
344 }
345 }
346 return NULL;
347 }
348
349 int fib_detect_death(struct fib_info *fi, int order,
350 struct fib_info **last_resort, int *last_idx, int dflt)
351 {
352 struct neighbour *n;
353 int state = NUD_NONE;
354
355 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
356 if (n) {
357 state = n->nud_state;
358 neigh_release(n);
359 }
360 if (state==NUD_REACHABLE)
361 return 0;
362 if ((state&NUD_VALID) && order != dflt)
363 return 0;
364 if ((state&NUD_VALID) ||
365 (*last_idx<0 && order > dflt)) {
366 *last_resort = fi;
367 *last_idx = order;
368 }
369 return 1;
370 }
371
372 #ifdef CONFIG_IP_ROUTE_MULTIPATH
373
374 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
375 {
376 int nhs = 0;
377
378 while (rtnh_ok(rtnh, remaining)) {
379 nhs++;
380 rtnh = rtnh_next(rtnh, &remaining);
381 }
382
383 /* leftover implies invalid nexthop configuration, discard it */
384 return remaining > 0 ? 0 : nhs;
385 }
386
387 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
388 int remaining, struct fib_config *cfg)
389 {
390 change_nexthops(fi) {
391 int attrlen;
392
393 if (!rtnh_ok(rtnh, remaining))
394 return -EINVAL;
395
396 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
397 nh->nh_oif = rtnh->rtnh_ifindex;
398 nh->nh_weight = rtnh->rtnh_hops + 1;
399
400 attrlen = rtnh_attrlen(rtnh);
401 if (attrlen > 0) {
402 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
403
404 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
405 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
406 #ifdef CONFIG_NET_CLS_ROUTE
407 nla = nla_find(attrs, attrlen, RTA_FLOW);
408 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
409 #endif
410 }
411
412 rtnh = rtnh_next(rtnh, &remaining);
413 } endfor_nexthops(fi);
414
415 return 0;
416 }
417
418 #endif
419
420 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
421 {
422 #ifdef CONFIG_IP_ROUTE_MULTIPATH
423 struct rtnexthop *rtnh;
424 int remaining;
425 #endif
426
427 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
428 return 1;
429
430 if (cfg->fc_oif || cfg->fc_gw) {
431 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
432 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
433 return 0;
434 return 1;
435 }
436
437 #ifdef CONFIG_IP_ROUTE_MULTIPATH
438 if (cfg->fc_mp == NULL)
439 return 0;
440
441 rtnh = cfg->fc_mp;
442 remaining = cfg->fc_mp_len;
443
444 for_nexthops(fi) {
445 int attrlen;
446
447 if (!rtnh_ok(rtnh, remaining))
448 return -EINVAL;
449
450 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
451 return 1;
452
453 attrlen = rtnh_attrlen(rtnh);
454 if (attrlen < 0) {
455 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
456
457 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
458 if (nla && nla_get_be32(nla) != nh->nh_gw)
459 return 1;
460 #ifdef CONFIG_NET_CLS_ROUTE
461 nla = nla_find(attrs, attrlen, RTA_FLOW);
462 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
463 return 1;
464 #endif
465 }
466
467 rtnh = rtnh_next(rtnh, &remaining);
468 } endfor_nexthops(fi);
469 #endif
470 return 0;
471 }
472
473
474 /*
475 Picture
476 -------
477
478 Semantics of nexthop is very messy by historical reasons.
479 We have to take into account, that:
480 a) gateway can be actually local interface address,
481 so that gatewayed route is direct.
482 b) gateway must be on-link address, possibly
483 described not by an ifaddr, but also by a direct route.
484 c) If both gateway and interface are specified, they should not
485 contradict.
486 d) If we use tunnel routes, gateway could be not on-link.
487
488 Attempt to reconcile all of these (alas, self-contradictory) conditions
489 results in pretty ugly and hairy code with obscure logic.
490
491 I chose to generalized it instead, so that the size
492 of code does not increase practically, but it becomes
493 much more general.
494 Every prefix is assigned a "scope" value: "host" is local address,
495 "link" is direct route,
496 [ ... "site" ... "interior" ... ]
497 and "universe" is true gateway route with global meaning.
498
499 Every prefix refers to a set of "nexthop"s (gw, oif),
500 where gw must have narrower scope. This recursion stops
501 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
502 which means that gw is forced to be on link.
503
504 Code is still hairy, but now it is apparently logically
505 consistent and very flexible. F.e. as by-product it allows
506 to co-exists in peace independent exterior and interior
507 routing processes.
508
509 Normally it looks as following.
510
511 {universe prefix} -> (gw, oif) [scope link]
512 |
513 |-> {link prefix} -> (gw, oif) [scope local]
514 |
515 |-> {local prefix} (terminal node)
516 */
517
518 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
519 struct fib_nh *nh)
520 {
521 int err;
522 struct net *net;
523
524 net = cfg->fc_nlinfo.nl_net;
525 if (nh->nh_gw) {
526 struct fib_result res;
527
528 #ifdef CONFIG_IP_ROUTE_PERVASIVE
529 if (nh->nh_flags&RTNH_F_PERVASIVE)
530 return 0;
531 #endif
532 if (nh->nh_flags&RTNH_F_ONLINK) {
533 struct net_device *dev;
534
535 if (cfg->fc_scope >= RT_SCOPE_LINK)
536 return -EINVAL;
537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
538 return -EINVAL;
539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
540 return -ENODEV;
541 if (!(dev->flags&IFF_UP))
542 return -ENETDOWN;
543 nh->nh_dev = dev;
544 dev_hold(dev);
545 nh->nh_scope = RT_SCOPE_LINK;
546 return 0;
547 }
548 {
549 struct flowi fl = {
550 .nl_u = {
551 .ip4_u = {
552 .daddr = nh->nh_gw,
553 .scope = cfg->fc_scope + 1,
554 },
555 },
556 .oif = nh->nh_oif,
557 };
558
559 /* It is not necessary, but requires a bit of thinking */
560 if (fl.fl4_scope < RT_SCOPE_LINK)
561 fl.fl4_scope = RT_SCOPE_LINK;
562 if ((err = fib_lookup(net, &fl, &res)) != 0)
563 return err;
564 }
565 err = -EINVAL;
566 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567 goto out;
568 nh->nh_scope = res.scope;
569 nh->nh_oif = FIB_RES_OIF(res);
570 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
571 goto out;
572 dev_hold(nh->nh_dev);
573 err = -ENETDOWN;
574 if (!(nh->nh_dev->flags & IFF_UP))
575 goto out;
576 err = 0;
577 out:
578 fib_res_put(&res);
579 return err;
580 } else {
581 struct in_device *in_dev;
582
583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
584 return -EINVAL;
585
586 in_dev = inetdev_by_index(net, nh->nh_oif);
587 if (in_dev == NULL)
588 return -ENODEV;
589 if (!(in_dev->dev->flags&IFF_UP)) {
590 in_dev_put(in_dev);
591 return -ENETDOWN;
592 }
593 nh->nh_dev = in_dev->dev;
594 dev_hold(nh->nh_dev);
595 nh->nh_scope = RT_SCOPE_HOST;
596 in_dev_put(in_dev);
597 }
598 return 0;
599 }
600
601 static inline unsigned int fib_laddr_hashfn(__be32 val)
602 {
603 unsigned int mask = (fib_hash_size - 1);
604
605 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
606 }
607
608 static struct hlist_head *fib_hash_alloc(int bytes)
609 {
610 if (bytes <= PAGE_SIZE)
611 return kzalloc(bytes, GFP_KERNEL);
612 else
613 return (struct hlist_head *)
614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
615 }
616
617 static void fib_hash_free(struct hlist_head *hash, int bytes)
618 {
619 if (!hash)
620 return;
621
622 if (bytes <= PAGE_SIZE)
623 kfree(hash);
624 else
625 free_pages((unsigned long) hash, get_order(bytes));
626 }
627
628 static void fib_hash_move(struct hlist_head *new_info_hash,
629 struct hlist_head *new_laddrhash,
630 unsigned int new_size)
631 {
632 struct hlist_head *old_info_hash, *old_laddrhash;
633 unsigned int old_size = fib_hash_size;
634 unsigned int i, bytes;
635
636 spin_lock_bh(&fib_info_lock);
637 old_info_hash = fib_info_hash;
638 old_laddrhash = fib_info_laddrhash;
639 fib_hash_size = new_size;
640
641 for (i = 0; i < old_size; i++) {
642 struct hlist_head *head = &fib_info_hash[i];
643 struct hlist_node *node, *n;
644 struct fib_info *fi;
645
646 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
647 struct hlist_head *dest;
648 unsigned int new_hash;
649
650 hlist_del(&fi->fib_hash);
651
652 new_hash = fib_info_hashfn(fi);
653 dest = &new_info_hash[new_hash];
654 hlist_add_head(&fi->fib_hash, dest);
655 }
656 }
657 fib_info_hash = new_info_hash;
658
659 for (i = 0; i < old_size; i++) {
660 struct hlist_head *lhead = &fib_info_laddrhash[i];
661 struct hlist_node *node, *n;
662 struct fib_info *fi;
663
664 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
665 struct hlist_head *ldest;
666 unsigned int new_hash;
667
668 hlist_del(&fi->fib_lhash);
669
670 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
671 ldest = &new_laddrhash[new_hash];
672 hlist_add_head(&fi->fib_lhash, ldest);
673 }
674 }
675 fib_info_laddrhash = new_laddrhash;
676
677 spin_unlock_bh(&fib_info_lock);
678
679 bytes = old_size * sizeof(struct hlist_head *);
680 fib_hash_free(old_info_hash, bytes);
681 fib_hash_free(old_laddrhash, bytes);
682 }
683
684 struct fib_info *fib_create_info(struct fib_config *cfg)
685 {
686 int err;
687 struct fib_info *fi = NULL;
688 struct fib_info *ofi;
689 int nhs = 1;
690
691 /* Fast check to catch the most weird cases */
692 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
693 goto err_inval;
694
695 #ifdef CONFIG_IP_ROUTE_MULTIPATH
696 if (cfg->fc_mp) {
697 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
698 if (nhs == 0)
699 goto err_inval;
700 }
701 #endif
702
703 err = -ENOBUFS;
704 if (fib_info_cnt >= fib_hash_size) {
705 unsigned int new_size = fib_hash_size << 1;
706 struct hlist_head *new_info_hash;
707 struct hlist_head *new_laddrhash;
708 unsigned int bytes;
709
710 if (!new_size)
711 new_size = 1;
712 bytes = new_size * sizeof(struct hlist_head *);
713 new_info_hash = fib_hash_alloc(bytes);
714 new_laddrhash = fib_hash_alloc(bytes);
715 if (!new_info_hash || !new_laddrhash) {
716 fib_hash_free(new_info_hash, bytes);
717 fib_hash_free(new_laddrhash, bytes);
718 } else
719 fib_hash_move(new_info_hash, new_laddrhash, new_size);
720
721 if (!fib_hash_size)
722 goto failure;
723 }
724
725 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
726 if (fi == NULL)
727 goto failure;
728 fib_info_cnt++;
729
730 fi->fib_protocol = cfg->fc_protocol;
731 fi->fib_flags = cfg->fc_flags;
732 fi->fib_priority = cfg->fc_priority;
733 fi->fib_prefsrc = cfg->fc_prefsrc;
734
735 fi->fib_nhs = nhs;
736 change_nexthops(fi) {
737 nh->nh_parent = fi;
738 } endfor_nexthops(fi)
739
740 if (cfg->fc_mx) {
741 struct nlattr *nla;
742 int remaining;
743
744 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
745 int type = nla_type(nla);
746
747 if (type) {
748 if (type > RTAX_MAX)
749 goto err_inval;
750 fi->fib_metrics[type - 1] = nla_get_u32(nla);
751 }
752 }
753 }
754
755 if (cfg->fc_mp) {
756 #ifdef CONFIG_IP_ROUTE_MULTIPATH
757 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
758 if (err != 0)
759 goto failure;
760 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
761 goto err_inval;
762 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
763 goto err_inval;
764 #ifdef CONFIG_NET_CLS_ROUTE
765 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
766 goto err_inval;
767 #endif
768 #else
769 goto err_inval;
770 #endif
771 } else {
772 struct fib_nh *nh = fi->fib_nh;
773
774 nh->nh_oif = cfg->fc_oif;
775 nh->nh_gw = cfg->fc_gw;
776 nh->nh_flags = cfg->fc_flags;
777 #ifdef CONFIG_NET_CLS_ROUTE
778 nh->nh_tclassid = cfg->fc_flow;
779 #endif
780 #ifdef CONFIG_IP_ROUTE_MULTIPATH
781 nh->nh_weight = 1;
782 #endif
783 }
784
785 if (fib_props[cfg->fc_type].error) {
786 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
787 goto err_inval;
788 goto link_it;
789 }
790
791 if (cfg->fc_scope > RT_SCOPE_HOST)
792 goto err_inval;
793
794 if (cfg->fc_scope == RT_SCOPE_HOST) {
795 struct fib_nh *nh = fi->fib_nh;
796
797 /* Local address is added. */
798 if (nhs != 1 || nh->nh_gw)
799 goto err_inval;
800 nh->nh_scope = RT_SCOPE_NOWHERE;
801 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
802 fi->fib_nh->nh_oif);
803 err = -ENODEV;
804 if (nh->nh_dev == NULL)
805 goto failure;
806 } else {
807 change_nexthops(fi) {
808 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
809 goto failure;
810 } endfor_nexthops(fi)
811 }
812
813 if (fi->fib_prefsrc) {
814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst)
816 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
817 fi->fib_prefsrc) != RTN_LOCAL)
818 goto err_inval;
819 }
820
821 link_it:
822 if ((ofi = fib_find_info(fi)) != NULL) {
823 fi->fib_dead = 1;
824 free_fib_info(fi);
825 ofi->fib_treeref++;
826 return ofi;
827 }
828
829 fi->fib_treeref++;
830 atomic_inc(&fi->fib_clntref);
831 spin_lock_bh(&fib_info_lock);
832 hlist_add_head(&fi->fib_hash,
833 &fib_info_hash[fib_info_hashfn(fi)]);
834 if (fi->fib_prefsrc) {
835 struct hlist_head *head;
836
837 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
838 hlist_add_head(&fi->fib_lhash, head);
839 }
840 change_nexthops(fi) {
841 struct hlist_head *head;
842 unsigned int hash;
843
844 if (!nh->nh_dev)
845 continue;
846 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
847 head = &fib_info_devhash[hash];
848 hlist_add_head(&nh->nh_hash, head);
849 } endfor_nexthops(fi)
850 spin_unlock_bh(&fib_info_lock);
851 return fi;
852
853 err_inval:
854 err = -EINVAL;
855
856 failure:
857 if (fi) {
858 fi->fib_dead = 1;
859 free_fib_info(fi);
860 }
861
862 return ERR_PTR(err);
863 }
864
865 /* Note! fib_semantic_match intentionally uses RCU list functions. */
866 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
867 struct fib_result *res, __be32 zone, __be32 mask,
868 int prefixlen)
869 {
870 struct fib_alias *fa;
871 int nh_sel = 0;
872
873 list_for_each_entry_rcu(fa, head, fa_list) {
874 int err;
875
876 if (fa->fa_tos &&
877 fa->fa_tos != flp->fl4_tos)
878 continue;
879
880 if (fa->fa_scope < flp->fl4_scope)
881 continue;
882
883 fa->fa_state |= FA_S_ACCESSED;
884
885 err = fib_props[fa->fa_type].error;
886 if (err == 0) {
887 struct fib_info *fi = fa->fa_info;
888
889 if (fi->fib_flags & RTNH_F_DEAD)
890 continue;
891
892 switch (fa->fa_type) {
893 case RTN_UNICAST:
894 case RTN_LOCAL:
895 case RTN_BROADCAST:
896 case RTN_ANYCAST:
897 case RTN_MULTICAST:
898 for_nexthops(fi) {
899 if (nh->nh_flags&RTNH_F_DEAD)
900 continue;
901 if (!flp->oif || flp->oif == nh->nh_oif)
902 break;
903 }
904 #ifdef CONFIG_IP_ROUTE_MULTIPATH
905 if (nhsel < fi->fib_nhs) {
906 nh_sel = nhsel;
907 goto out_fill_res;
908 }
909 #else
910 if (nhsel < 1) {
911 goto out_fill_res;
912 }
913 #endif
914 endfor_nexthops(fi);
915 continue;
916
917 default:
918 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
919 fa->fa_type);
920 return -EINVAL;
921 }
922 }
923 return err;
924 }
925 return 1;
926
927 out_fill_res:
928 res->prefixlen = prefixlen;
929 res->nh_sel = nh_sel;
930 res->type = fa->fa_type;
931 res->scope = fa->fa_scope;
932 res->fi = fa->fa_info;
933 atomic_inc(&res->fi->fib_clntref);
934 return 0;
935 }
936
937 /* Find appropriate source address to this destination */
938
939 __be32 __fib_res_prefsrc(struct fib_result *res)
940 {
941 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
942 }
943
944 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
945 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
946 struct fib_info *fi, unsigned int flags)
947 {
948 struct nlmsghdr *nlh;
949 struct rtmsg *rtm;
950
951 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
952 if (nlh == NULL)
953 return -EMSGSIZE;
954
955 rtm = nlmsg_data(nlh);
956 rtm->rtm_family = AF_INET;
957 rtm->rtm_dst_len = dst_len;
958 rtm->rtm_src_len = 0;
959 rtm->rtm_tos = tos;
960 rtm->rtm_table = tb_id;
961 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
962 rtm->rtm_type = type;
963 rtm->rtm_flags = fi->fib_flags;
964 rtm->rtm_scope = scope;
965 rtm->rtm_protocol = fi->fib_protocol;
966
967 if (rtm->rtm_dst_len)
968 NLA_PUT_BE32(skb, RTA_DST, dst);
969
970 if (fi->fib_priority)
971 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
972
973 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
974 goto nla_put_failure;
975
976 if (fi->fib_prefsrc)
977 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
978
979 if (fi->fib_nhs == 1) {
980 if (fi->fib_nh->nh_gw)
981 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
982
983 if (fi->fib_nh->nh_oif)
984 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
985 #ifdef CONFIG_NET_CLS_ROUTE
986 if (fi->fib_nh[0].nh_tclassid)
987 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
988 #endif
989 }
990 #ifdef CONFIG_IP_ROUTE_MULTIPATH
991 if (fi->fib_nhs > 1) {
992 struct rtnexthop *rtnh;
993 struct nlattr *mp;
994
995 mp = nla_nest_start(skb, RTA_MULTIPATH);
996 if (mp == NULL)
997 goto nla_put_failure;
998
999 for_nexthops(fi) {
1000 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1001 if (rtnh == NULL)
1002 goto nla_put_failure;
1003
1004 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1005 rtnh->rtnh_hops = nh->nh_weight - 1;
1006 rtnh->rtnh_ifindex = nh->nh_oif;
1007
1008 if (nh->nh_gw)
1009 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1010 #ifdef CONFIG_NET_CLS_ROUTE
1011 if (nh->nh_tclassid)
1012 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1013 #endif
1014 /* length of rtnetlink header + attributes */
1015 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1016 } endfor_nexthops(fi);
1017
1018 nla_nest_end(skb, mp);
1019 }
1020 #endif
1021 return nlmsg_end(skb, nlh);
1022
1023 nla_put_failure:
1024 nlmsg_cancel(skb, nlh);
1025 return -EMSGSIZE;
1026 }
1027
1028 /*
1029 Update FIB if:
1030 - local address disappeared -> we must delete all the entries
1031 referring to it.
1032 - device went down -> we must shutdown all nexthops going via it.
1033 */
1034
1035 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1036 {
1037 int ret = 0;
1038 int scope = RT_SCOPE_NOWHERE;
1039
1040 if (force)
1041 scope = -1;
1042
1043 if (local && fib_info_laddrhash) {
1044 unsigned int hash = fib_laddr_hashfn(local);
1045 struct hlist_head *head = &fib_info_laddrhash[hash];
1046 struct hlist_node *node;
1047 struct fib_info *fi;
1048
1049 hlist_for_each_entry(fi, node, head, fib_lhash) {
1050 if (fi->fib_prefsrc == local) {
1051 fi->fib_flags |= RTNH_F_DEAD;
1052 ret++;
1053 }
1054 }
1055 }
1056
1057 if (dev) {
1058 struct fib_info *prev_fi = NULL;
1059 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1060 struct hlist_head *head = &fib_info_devhash[hash];
1061 struct hlist_node *node;
1062 struct fib_nh *nh;
1063
1064 hlist_for_each_entry(nh, node, head, nh_hash) {
1065 struct fib_info *fi = nh->nh_parent;
1066 int dead;
1067
1068 BUG_ON(!fi->fib_nhs);
1069 if (nh->nh_dev != dev || fi == prev_fi)
1070 continue;
1071 prev_fi = fi;
1072 dead = 0;
1073 change_nexthops(fi) {
1074 if (nh->nh_flags&RTNH_F_DEAD)
1075 dead++;
1076 else if (nh->nh_dev == dev &&
1077 nh->nh_scope != scope) {
1078 nh->nh_flags |= RTNH_F_DEAD;
1079 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1080 spin_lock_bh(&fib_multipath_lock);
1081 fi->fib_power -= nh->nh_power;
1082 nh->nh_power = 0;
1083 spin_unlock_bh(&fib_multipath_lock);
1084 #endif
1085 dead++;
1086 }
1087 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1088 if (force > 1 && nh->nh_dev == dev) {
1089 dead = fi->fib_nhs;
1090 break;
1091 }
1092 #endif
1093 } endfor_nexthops(fi)
1094 if (dead == fi->fib_nhs) {
1095 fi->fib_flags |= RTNH_F_DEAD;
1096 ret++;
1097 }
1098 }
1099 }
1100
1101 return ret;
1102 }
1103
1104 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1105
1106 /*
1107 Dead device goes up. We wake up dead nexthops.
1108 It takes sense only on multipath routes.
1109 */
1110
1111 int fib_sync_up(struct net_device *dev)
1112 {
1113 struct fib_info *prev_fi;
1114 unsigned int hash;
1115 struct hlist_head *head;
1116 struct hlist_node *node;
1117 struct fib_nh *nh;
1118 int ret;
1119
1120 if (!(dev->flags&IFF_UP))
1121 return 0;
1122
1123 prev_fi = NULL;
1124 hash = fib_devindex_hashfn(dev->ifindex);
1125 head = &fib_info_devhash[hash];
1126 ret = 0;
1127
1128 hlist_for_each_entry(nh, node, head, nh_hash) {
1129 struct fib_info *fi = nh->nh_parent;
1130 int alive;
1131
1132 BUG_ON(!fi->fib_nhs);
1133 if (nh->nh_dev != dev || fi == prev_fi)
1134 continue;
1135
1136 prev_fi = fi;
1137 alive = 0;
1138 change_nexthops(fi) {
1139 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1140 alive++;
1141 continue;
1142 }
1143 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1144 continue;
1145 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1146 continue;
1147 alive++;
1148 spin_lock_bh(&fib_multipath_lock);
1149 nh->nh_power = 0;
1150 nh->nh_flags &= ~RTNH_F_DEAD;
1151 spin_unlock_bh(&fib_multipath_lock);
1152 } endfor_nexthops(fi)
1153
1154 if (alive > 0) {
1155 fi->fib_flags &= ~RTNH_F_DEAD;
1156 ret++;
1157 }
1158 }
1159
1160 return ret;
1161 }
1162
1163 /*
1164 The algorithm is suboptimal, but it provides really
1165 fair weighted route distribution.
1166 */
1167
1168 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1169 {
1170 struct fib_info *fi = res->fi;
1171 int w;
1172
1173 spin_lock_bh(&fib_multipath_lock);
1174 if (fi->fib_power <= 0) {
1175 int power = 0;
1176 change_nexthops(fi) {
1177 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1178 power += nh->nh_weight;
1179 nh->nh_power = nh->nh_weight;
1180 }
1181 } endfor_nexthops(fi);
1182 fi->fib_power = power;
1183 if (power <= 0) {
1184 spin_unlock_bh(&fib_multipath_lock);
1185 /* Race condition: route has just become dead. */
1186 res->nh_sel = 0;
1187 return;
1188 }
1189 }
1190
1191
1192 /* w should be random number [0..fi->fib_power-1],
1193 it is pretty bad approximation.
1194 */
1195
1196 w = jiffies % fi->fib_power;
1197
1198 change_nexthops(fi) {
1199 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1200 if ((w -= nh->nh_power) <= 0) {
1201 nh->nh_power--;
1202 fi->fib_power--;
1203 res->nh_sel = nhsel;
1204 spin_unlock_bh(&fib_multipath_lock);
1205 return;
1206 }
1207 }
1208 } endfor_nexthops(fi);
1209
1210 /* Race condition: route has just become dead. */
1211 res->nh_sel = 0;
1212 spin_unlock_bh(&fib_multipath_lock);
1213 }
1214 #endif
This page took 0.121642 seconds and 5 git commands to generate.