ipv4: fib table algorithm performance improvement
[deliverable/linux.git] / net / ipv4 / fib_frontend.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37
38 #include <net/ip.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
41 #include <net/tcp.h>
42 #include <net/sock.h>
43 #include <net/arp.h>
44 #include <net/ip_fib.h>
45 #include <net/rtnetlink.h>
46
47 #ifndef CONFIG_IP_MULTIPLE_TABLES
48
49 static int __net_init fib4_rules_init(struct net *net)
50 {
51 struct fib_table *local_table, *main_table;
52
53 local_table = fib_hash_table(RT_TABLE_LOCAL);
54 if (local_table == NULL)
55 return -ENOMEM;
56
57 main_table = fib_hash_table(RT_TABLE_MAIN);
58 if (main_table == NULL)
59 goto fail;
60
61 hlist_add_head_rcu(&local_table->tb_hlist,
62 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63 hlist_add_head_rcu(&main_table->tb_hlist,
64 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65 return 0;
66
67 fail:
68 kfree(local_table);
69 return -ENOMEM;
70 }
71 #else
72
73 struct fib_table *fib_new_table(struct net *net, u32 id)
74 {
75 struct fib_table *tb;
76 unsigned int h;
77
78 if (id == 0)
79 id = RT_TABLE_MAIN;
80 tb = fib_get_table(net, id);
81 if (tb)
82 return tb;
83
84 tb = fib_hash_table(id);
85 if (!tb)
86 return NULL;
87 h = id & (FIB_TABLE_HASHSZ - 1);
88 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89 return tb;
90 }
91
92 struct fib_table *fib_get_table(struct net *net, u32 id)
93 {
94 struct fib_table *tb;
95 struct hlist_node *node;
96 struct hlist_head *head;
97 unsigned int h;
98
99 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
102
103 rcu_read_lock();
104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113 }
114 #endif /* CONFIG_IP_MULTIPLE_TABLES */
115
116 void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
118 {
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121 #ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125 #endif
126 tb = fib_get_table(net, table);
127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 fib_table_select_default(tb, flp, res);
129 }
130
131 static void fib_flush(struct net *net)
132 {
133 int flushed = 0;
134 struct fib_table *tb;
135 struct hlist_node *node;
136 struct hlist_head *head;
137 unsigned int h;
138
139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
142 flushed += fib_table_flush(tb);
143 }
144
145 if (flushed)
146 rt_cache_flush(net, -1);
147 }
148
149 /*
150 * Find the first device with a given source address.
151 */
152
153 struct net_device * ip_dev_find(struct net *net, __be32 addr)
154 {
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
158 struct fib_table *local_table;
159
160 #ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162 #endif
163
164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173 out:
174 fib_res_put(&res);
175 return dev;
176 }
177
178 /*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
182 static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
184 __be32 addr)
185 {
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
189 struct fib_table *local_table;
190
191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192 return RTN_BROADCAST;
193 if (ipv4_is_multicast(addr))
194 return RTN_MULTICAST;
195
196 #ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198 #endif
199
200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
201 if (local_table) {
202 ret = RTN_UNICAST;
203 if (!fib_table_lookup(local_table, &fl, &res)) {
204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
206 fib_res_put(&res);
207 }
208 }
209 return ret;
210 }
211
212 unsigned int inet_addr_type(struct net *net, __be32 addr)
213 {
214 return __inet_dev_addr_type(net, NULL, addr);
215 }
216
217 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
219 {
220 return __inet_dev_addr_type(net, dev, addr);
221 }
222
223 /* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
231 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232 struct net_device *dev, __be32 *spec_dst, u32 *itag)
233 {
234 struct in_device *in_dev;
235 struct flowi fl = { .nl_u = { .ip4_u =
236 { .daddr = src,
237 .saddr = dst,
238 .tos = tos } },
239 .iif = oif };
240 struct fib_result res;
241 int no_addr, rpf;
242 int ret;
243 struct net *net;
244
245 no_addr = rpf = 0;
246 rcu_read_lock();
247 in_dev = __in_dev_get_rcu(dev);
248 if (in_dev) {
249 no_addr = in_dev->ifa_list == NULL;
250 rpf = IN_DEV_RPFILTER(in_dev);
251 }
252 rcu_read_unlock();
253
254 if (in_dev == NULL)
255 goto e_inval;
256
257 net = dev_net(dev);
258 if (fib_lookup(net, &fl, &res))
259 goto last_resort;
260 if (res.type != RTN_UNICAST)
261 goto e_inval_res;
262 *spec_dst = FIB_RES_PREFSRC(res);
263 fib_combine_itag(itag, &res);
264 #ifdef CONFIG_IP_ROUTE_MULTIPATH
265 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
266 #else
267 if (FIB_RES_DEV(res) == dev)
268 #endif
269 {
270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271 fib_res_put(&res);
272 return ret;
273 }
274 fib_res_put(&res);
275 if (no_addr)
276 goto last_resort;
277 if (rpf == 1)
278 goto e_inval;
279 fl.oif = dev->ifindex;
280
281 ret = 0;
282 if (fib_lookup(net, &fl, &res) == 0) {
283 if (res.type == RTN_UNICAST) {
284 *spec_dst = FIB_RES_PREFSRC(res);
285 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
286 }
287 fib_res_put(&res);
288 }
289 return ret;
290
291 last_resort:
292 if (rpf)
293 goto e_inval;
294 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
295 *itag = 0;
296 return 0;
297
298 e_inval_res:
299 fib_res_put(&res);
300 e_inval:
301 return -EINVAL;
302 }
303
304 static inline __be32 sk_extract_addr(struct sockaddr *addr)
305 {
306 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
307 }
308
309 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
310 {
311 struct nlattr *nla;
312
313 nla = (struct nlattr *) ((char *) mx + len);
314 nla->nla_type = type;
315 nla->nla_len = nla_attr_size(4);
316 *(u32 *) nla_data(nla) = value;
317
318 return len + nla_total_size(4);
319 }
320
321 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
322 struct fib_config *cfg)
323 {
324 __be32 addr;
325 int plen;
326
327 memset(cfg, 0, sizeof(*cfg));
328 cfg->fc_nlinfo.nl_net = net;
329
330 if (rt->rt_dst.sa_family != AF_INET)
331 return -EAFNOSUPPORT;
332
333 /*
334 * Check mask for validity:
335 * a) it must be contiguous.
336 * b) destination must have all host bits clear.
337 * c) if application forgot to set correct family (AF_INET),
338 * reject request unless it is absolutely clear i.e.
339 * both family and mask are zero.
340 */
341 plen = 32;
342 addr = sk_extract_addr(&rt->rt_dst);
343 if (!(rt->rt_flags & RTF_HOST)) {
344 __be32 mask = sk_extract_addr(&rt->rt_genmask);
345
346 if (rt->rt_genmask.sa_family != AF_INET) {
347 if (mask || rt->rt_genmask.sa_family)
348 return -EAFNOSUPPORT;
349 }
350
351 if (bad_mask(mask, addr))
352 return -EINVAL;
353
354 plen = inet_mask_len(mask);
355 }
356
357 cfg->fc_dst_len = plen;
358 cfg->fc_dst = addr;
359
360 if (cmd != SIOCDELRT) {
361 cfg->fc_nlflags = NLM_F_CREATE;
362 cfg->fc_protocol = RTPROT_BOOT;
363 }
364
365 if (rt->rt_metric)
366 cfg->fc_priority = rt->rt_metric - 1;
367
368 if (rt->rt_flags & RTF_REJECT) {
369 cfg->fc_scope = RT_SCOPE_HOST;
370 cfg->fc_type = RTN_UNREACHABLE;
371 return 0;
372 }
373
374 cfg->fc_scope = RT_SCOPE_NOWHERE;
375 cfg->fc_type = RTN_UNICAST;
376
377 if (rt->rt_dev) {
378 char *colon;
379 struct net_device *dev;
380 char devname[IFNAMSIZ];
381
382 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
383 return -EFAULT;
384
385 devname[IFNAMSIZ-1] = 0;
386 colon = strchr(devname, ':');
387 if (colon)
388 *colon = 0;
389 dev = __dev_get_by_name(net, devname);
390 if (!dev)
391 return -ENODEV;
392 cfg->fc_oif = dev->ifindex;
393 if (colon) {
394 struct in_ifaddr *ifa;
395 struct in_device *in_dev = __in_dev_get_rtnl(dev);
396 if (!in_dev)
397 return -ENODEV;
398 *colon = ':';
399 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
400 if (strcmp(ifa->ifa_label, devname) == 0)
401 break;
402 if (ifa == NULL)
403 return -ENODEV;
404 cfg->fc_prefsrc = ifa->ifa_local;
405 }
406 }
407
408 addr = sk_extract_addr(&rt->rt_gateway);
409 if (rt->rt_gateway.sa_family == AF_INET && addr) {
410 cfg->fc_gw = addr;
411 if (rt->rt_flags & RTF_GATEWAY &&
412 inet_addr_type(net, addr) == RTN_UNICAST)
413 cfg->fc_scope = RT_SCOPE_UNIVERSE;
414 }
415
416 if (cmd == SIOCDELRT)
417 return 0;
418
419 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
420 return -EINVAL;
421
422 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
423 cfg->fc_scope = RT_SCOPE_LINK;
424
425 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
426 struct nlattr *mx;
427 int len = 0;
428
429 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
430 if (mx == NULL)
431 return -ENOMEM;
432
433 if (rt->rt_flags & RTF_MTU)
434 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
435
436 if (rt->rt_flags & RTF_WINDOW)
437 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
438
439 if (rt->rt_flags & RTF_IRTT)
440 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
441
442 cfg->fc_mx = mx;
443 cfg->fc_mx_len = len;
444 }
445
446 return 0;
447 }
448
449 /*
450 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
451 */
452
453 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
454 {
455 struct fib_config cfg;
456 struct rtentry rt;
457 int err;
458
459 switch (cmd) {
460 case SIOCADDRT: /* Add a route */
461 case SIOCDELRT: /* Delete a route */
462 if (!capable(CAP_NET_ADMIN))
463 return -EPERM;
464
465 if (copy_from_user(&rt, arg, sizeof(rt)))
466 return -EFAULT;
467
468 rtnl_lock();
469 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
470 if (err == 0) {
471 struct fib_table *tb;
472
473 if (cmd == SIOCDELRT) {
474 tb = fib_get_table(net, cfg.fc_table);
475 if (tb)
476 err = fib_table_delete(tb, &cfg);
477 else
478 err = -ESRCH;
479 } else {
480 tb = fib_new_table(net, cfg.fc_table);
481 if (tb)
482 err = fib_table_insert(tb, &cfg);
483 else
484 err = -ENOBUFS;
485 }
486
487 /* allocated by rtentry_to_fib_config() */
488 kfree(cfg.fc_mx);
489 }
490 rtnl_unlock();
491 return err;
492 }
493 return -EINVAL;
494 }
495
496 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
497 [RTA_DST] = { .type = NLA_U32 },
498 [RTA_SRC] = { .type = NLA_U32 },
499 [RTA_IIF] = { .type = NLA_U32 },
500 [RTA_OIF] = { .type = NLA_U32 },
501 [RTA_GATEWAY] = { .type = NLA_U32 },
502 [RTA_PRIORITY] = { .type = NLA_U32 },
503 [RTA_PREFSRC] = { .type = NLA_U32 },
504 [RTA_METRICS] = { .type = NLA_NESTED },
505 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
506 [RTA_FLOW] = { .type = NLA_U32 },
507 };
508
509 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
510 struct nlmsghdr *nlh, struct fib_config *cfg)
511 {
512 struct nlattr *attr;
513 int err, remaining;
514 struct rtmsg *rtm;
515
516 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
517 if (err < 0)
518 goto errout;
519
520 memset(cfg, 0, sizeof(*cfg));
521
522 rtm = nlmsg_data(nlh);
523 cfg->fc_dst_len = rtm->rtm_dst_len;
524 cfg->fc_tos = rtm->rtm_tos;
525 cfg->fc_table = rtm->rtm_table;
526 cfg->fc_protocol = rtm->rtm_protocol;
527 cfg->fc_scope = rtm->rtm_scope;
528 cfg->fc_type = rtm->rtm_type;
529 cfg->fc_flags = rtm->rtm_flags;
530 cfg->fc_nlflags = nlh->nlmsg_flags;
531
532 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
533 cfg->fc_nlinfo.nlh = nlh;
534 cfg->fc_nlinfo.nl_net = net;
535
536 if (cfg->fc_type > RTN_MAX) {
537 err = -EINVAL;
538 goto errout;
539 }
540
541 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
542 switch (nla_type(attr)) {
543 case RTA_DST:
544 cfg->fc_dst = nla_get_be32(attr);
545 break;
546 case RTA_OIF:
547 cfg->fc_oif = nla_get_u32(attr);
548 break;
549 case RTA_GATEWAY:
550 cfg->fc_gw = nla_get_be32(attr);
551 break;
552 case RTA_PRIORITY:
553 cfg->fc_priority = nla_get_u32(attr);
554 break;
555 case RTA_PREFSRC:
556 cfg->fc_prefsrc = nla_get_be32(attr);
557 break;
558 case RTA_METRICS:
559 cfg->fc_mx = nla_data(attr);
560 cfg->fc_mx_len = nla_len(attr);
561 break;
562 case RTA_MULTIPATH:
563 cfg->fc_mp = nla_data(attr);
564 cfg->fc_mp_len = nla_len(attr);
565 break;
566 case RTA_FLOW:
567 cfg->fc_flow = nla_get_u32(attr);
568 break;
569 case RTA_TABLE:
570 cfg->fc_table = nla_get_u32(attr);
571 break;
572 }
573 }
574
575 return 0;
576 errout:
577 return err;
578 }
579
580 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
581 {
582 struct net *net = sock_net(skb->sk);
583 struct fib_config cfg;
584 struct fib_table *tb;
585 int err;
586
587 err = rtm_to_fib_config(net, skb, nlh, &cfg);
588 if (err < 0)
589 goto errout;
590
591 tb = fib_get_table(net, cfg.fc_table);
592 if (tb == NULL) {
593 err = -ESRCH;
594 goto errout;
595 }
596
597 err = fib_table_delete(tb, &cfg);
598 errout:
599 return err;
600 }
601
602 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
603 {
604 struct net *net = sock_net(skb->sk);
605 struct fib_config cfg;
606 struct fib_table *tb;
607 int err;
608
609 err = rtm_to_fib_config(net, skb, nlh, &cfg);
610 if (err < 0)
611 goto errout;
612
613 tb = fib_new_table(net, cfg.fc_table);
614 if (tb == NULL) {
615 err = -ENOBUFS;
616 goto errout;
617 }
618
619 err = fib_table_insert(tb, &cfg);
620 errout:
621 return err;
622 }
623
624 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
625 {
626 struct net *net = sock_net(skb->sk);
627 unsigned int h, s_h;
628 unsigned int e = 0, s_e;
629 struct fib_table *tb;
630 struct hlist_node *node;
631 struct hlist_head *head;
632 int dumped = 0;
633
634 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
635 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
636 return ip_rt_dump(skb, cb);
637
638 s_h = cb->args[0];
639 s_e = cb->args[1];
640
641 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
642 e = 0;
643 head = &net->ipv4.fib_table_hash[h];
644 hlist_for_each_entry(tb, node, head, tb_hlist) {
645 if (e < s_e)
646 goto next;
647 if (dumped)
648 memset(&cb->args[2], 0, sizeof(cb->args) -
649 2 * sizeof(cb->args[0]));
650 if (fib_table_dump(tb, skb, cb) < 0)
651 goto out;
652 dumped = 1;
653 next:
654 e++;
655 }
656 }
657 out:
658 cb->args[1] = e;
659 cb->args[0] = h;
660
661 return skb->len;
662 }
663
664 /* Prepare and feed intra-kernel routing request.
665 Really, it should be netlink message, but :-( netlink
666 can be not configured, so that we feed it directly
667 to fib engine. It is legal, because all events occur
668 only when netlink is already locked.
669 */
670
671 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
672 {
673 struct net *net = dev_net(ifa->ifa_dev->dev);
674 struct fib_table *tb;
675 struct fib_config cfg = {
676 .fc_protocol = RTPROT_KERNEL,
677 .fc_type = type,
678 .fc_dst = dst,
679 .fc_dst_len = dst_len,
680 .fc_prefsrc = ifa->ifa_local,
681 .fc_oif = ifa->ifa_dev->dev->ifindex,
682 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
683 .fc_nlinfo = {
684 .nl_net = net,
685 },
686 };
687
688 if (type == RTN_UNICAST)
689 tb = fib_new_table(net, RT_TABLE_MAIN);
690 else
691 tb = fib_new_table(net, RT_TABLE_LOCAL);
692
693 if (tb == NULL)
694 return;
695
696 cfg.fc_table = tb->tb_id;
697
698 if (type != RTN_LOCAL)
699 cfg.fc_scope = RT_SCOPE_LINK;
700 else
701 cfg.fc_scope = RT_SCOPE_HOST;
702
703 if (cmd == RTM_NEWROUTE)
704 fib_table_insert(tb, &cfg);
705 else
706 fib_table_delete(tb, &cfg);
707 }
708
709 void fib_add_ifaddr(struct in_ifaddr *ifa)
710 {
711 struct in_device *in_dev = ifa->ifa_dev;
712 struct net_device *dev = in_dev->dev;
713 struct in_ifaddr *prim = ifa;
714 __be32 mask = ifa->ifa_mask;
715 __be32 addr = ifa->ifa_local;
716 __be32 prefix = ifa->ifa_address&mask;
717
718 if (ifa->ifa_flags&IFA_F_SECONDARY) {
719 prim = inet_ifa_byprefix(in_dev, prefix, mask);
720 if (prim == NULL) {
721 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
722 return;
723 }
724 }
725
726 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
727
728 if (!(dev->flags&IFF_UP))
729 return;
730
731 /* Add broadcast address, if it is explicitly assigned. */
732 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734
735 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
736 (prefix != addr || ifa->ifa_prefixlen < 32)) {
737 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
738 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
739
740 /* Add network specific broadcasts, when it takes a sense */
741 if (ifa->ifa_prefixlen < 31) {
742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
743 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
744 }
745 }
746 }
747
748 static void fib_del_ifaddr(struct in_ifaddr *ifa)
749 {
750 struct in_device *in_dev = ifa->ifa_dev;
751 struct net_device *dev = in_dev->dev;
752 struct in_ifaddr *ifa1;
753 struct in_ifaddr *prim = ifa;
754 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
755 __be32 any = ifa->ifa_address&ifa->ifa_mask;
756 #define LOCAL_OK 1
757 #define BRD_OK 2
758 #define BRD0_OK 4
759 #define BRD1_OK 8
760 unsigned ok = 0;
761
762 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
763 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
764 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
765 else {
766 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
767 if (prim == NULL) {
768 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
769 return;
770 }
771 }
772
773 /* Deletion is more complicated than add.
774 We should take care of not to delete too much :-)
775
776 Scan address list to be sure that addresses are really gone.
777 */
778
779 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
780 if (ifa->ifa_local == ifa1->ifa_local)
781 ok |= LOCAL_OK;
782 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
783 ok |= BRD_OK;
784 if (brd == ifa1->ifa_broadcast)
785 ok |= BRD1_OK;
786 if (any == ifa1->ifa_broadcast)
787 ok |= BRD0_OK;
788 }
789
790 if (!(ok&BRD_OK))
791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
792 if (!(ok&BRD1_OK))
793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
794 if (!(ok&BRD0_OK))
795 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
796 if (!(ok&LOCAL_OK)) {
797 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
798
799 /* Check, that this local address finally disappeared. */
800 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
801 /* And the last, but not the least thing.
802 We must flush stray FIB entries.
803
804 First of all, we scan fib_info list searching
805 for stray nexthop entries, then ignite fib_flush.
806 */
807 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
808 fib_flush(dev_net(dev));
809 }
810 }
811 #undef LOCAL_OK
812 #undef BRD_OK
813 #undef BRD0_OK
814 #undef BRD1_OK
815 }
816
817 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
818 {
819
820 struct fib_result res;
821 struct flowi fl = { .mark = frn->fl_mark,
822 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
823 .tos = frn->fl_tos,
824 .scope = frn->fl_scope } } };
825
826 #ifdef CONFIG_IP_MULTIPLE_TABLES
827 res.r = NULL;
828 #endif
829
830 frn->err = -ENOENT;
831 if (tb) {
832 local_bh_disable();
833
834 frn->tb_id = tb->tb_id;
835 frn->err = fib_table_lookup(tb, &fl, &res);
836
837 if (!frn->err) {
838 frn->prefixlen = res.prefixlen;
839 frn->nh_sel = res.nh_sel;
840 frn->type = res.type;
841 frn->scope = res.scope;
842 fib_res_put(&res);
843 }
844 local_bh_enable();
845 }
846 }
847
848 static void nl_fib_input(struct sk_buff *skb)
849 {
850 struct net *net;
851 struct fib_result_nl *frn;
852 struct nlmsghdr *nlh;
853 struct fib_table *tb;
854 u32 pid;
855
856 net = sock_net(skb->sk);
857 nlh = nlmsg_hdr(skb);
858 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
859 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
860 return;
861
862 skb = skb_clone(skb, GFP_KERNEL);
863 if (skb == NULL)
864 return;
865 nlh = nlmsg_hdr(skb);
866
867 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
868 tb = fib_get_table(net, frn->tb_id_in);
869
870 nl_fib_lookup(frn, tb);
871
872 pid = NETLINK_CB(skb).pid; /* pid of sending process */
873 NETLINK_CB(skb).pid = 0; /* from kernel */
874 NETLINK_CB(skb).dst_group = 0; /* unicast */
875 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
876 }
877
878 static int nl_fib_lookup_init(struct net *net)
879 {
880 struct sock *sk;
881 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
882 nl_fib_input, NULL, THIS_MODULE);
883 if (sk == NULL)
884 return -EAFNOSUPPORT;
885 net->ipv4.fibnl = sk;
886 return 0;
887 }
888
889 static void nl_fib_lookup_exit(struct net *net)
890 {
891 netlink_kernel_release(net->ipv4.fibnl);
892 net->ipv4.fibnl = NULL;
893 }
894
895 static void fib_disable_ip(struct net_device *dev, int force)
896 {
897 if (fib_sync_down_dev(dev, force))
898 fib_flush(dev_net(dev));
899 rt_cache_flush(dev_net(dev), 0);
900 arp_ifdown(dev);
901 }
902
903 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
904 {
905 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
906 struct net_device *dev = ifa->ifa_dev->dev;
907
908 switch (event) {
909 case NETDEV_UP:
910 fib_add_ifaddr(ifa);
911 #ifdef CONFIG_IP_ROUTE_MULTIPATH
912 fib_sync_up(dev);
913 #endif
914 rt_cache_flush(dev_net(dev), -1);
915 break;
916 case NETDEV_DOWN:
917 fib_del_ifaddr(ifa);
918 if (ifa->ifa_dev->ifa_list == NULL) {
919 /* Last address was deleted from this interface.
920 Disable IP.
921 */
922 fib_disable_ip(dev, 1);
923 } else {
924 rt_cache_flush(dev_net(dev), -1);
925 }
926 break;
927 }
928 return NOTIFY_DONE;
929 }
930
931 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932 {
933 struct net_device *dev = ptr;
934 struct in_device *in_dev = __in_dev_get_rtnl(dev);
935
936 if (event == NETDEV_UNREGISTER) {
937 fib_disable_ip(dev, 2);
938 return NOTIFY_DONE;
939 }
940
941 if (!in_dev)
942 return NOTIFY_DONE;
943
944 switch (event) {
945 case NETDEV_UP:
946 for_ifa(in_dev) {
947 fib_add_ifaddr(ifa);
948 } endfor_ifa(in_dev);
949 #ifdef CONFIG_IP_ROUTE_MULTIPATH
950 fib_sync_up(dev);
951 #endif
952 rt_cache_flush(dev_net(dev), -1);
953 break;
954 case NETDEV_DOWN:
955 fib_disable_ip(dev, 0);
956 break;
957 case NETDEV_CHANGEMTU:
958 case NETDEV_CHANGE:
959 rt_cache_flush(dev_net(dev), 0);
960 break;
961 }
962 return NOTIFY_DONE;
963 }
964
965 static struct notifier_block fib_inetaddr_notifier = {
966 .notifier_call = fib_inetaddr_event,
967 };
968
969 static struct notifier_block fib_netdev_notifier = {
970 .notifier_call = fib_netdev_event,
971 };
972
973 static int __net_init ip_fib_net_init(struct net *net)
974 {
975 int err;
976 unsigned int i;
977
978 net->ipv4.fib_table_hash = kzalloc(
979 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980 if (net->ipv4.fib_table_hash == NULL)
981 return -ENOMEM;
982
983 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
984 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
985
986 err = fib4_rules_init(net);
987 if (err < 0)
988 goto fail;
989 return 0;
990
991 fail:
992 kfree(net->ipv4.fib_table_hash);
993 return err;
994 }
995
996 static void __net_exit ip_fib_net_exit(struct net *net)
997 {
998 unsigned int i;
999
1000 #ifdef CONFIG_IP_MULTIPLE_TABLES
1001 fib4_rules_exit(net);
1002 #endif
1003
1004 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005 struct fib_table *tb;
1006 struct hlist_head *head;
1007 struct hlist_node *node, *tmp;
1008
1009 head = &net->ipv4.fib_table_hash[i];
1010 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011 hlist_del(node);
1012 fib_table_flush(tb);
1013 kfree(tb);
1014 }
1015 }
1016 kfree(net->ipv4.fib_table_hash);
1017 }
1018
1019 static int __net_init fib_net_init(struct net *net)
1020 {
1021 int error;
1022
1023 error = ip_fib_net_init(net);
1024 if (error < 0)
1025 goto out;
1026 error = nl_fib_lookup_init(net);
1027 if (error < 0)
1028 goto out_nlfl;
1029 error = fib_proc_init(net);
1030 if (error < 0)
1031 goto out_proc;
1032 out:
1033 return error;
1034
1035 out_proc:
1036 nl_fib_lookup_exit(net);
1037 out_nlfl:
1038 ip_fib_net_exit(net);
1039 goto out;
1040 }
1041
1042 static void __net_exit fib_net_exit(struct net *net)
1043 {
1044 fib_proc_exit(net);
1045 nl_fib_lookup_exit(net);
1046 ip_fib_net_exit(net);
1047 }
1048
1049 static struct pernet_operations fib_net_ops = {
1050 .init = fib_net_init,
1051 .exit = fib_net_exit,
1052 };
1053
1054 void __init ip_fib_init(void)
1055 {
1056 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1059
1060 register_pernet_subsys(&fib_net_ops);
1061 register_netdevice_notifier(&fib_netdev_notifier);
1062 register_inetaddr_notifier(&fib_inetaddr_notifier);
1063
1064 fib_hash_init();
1065 }
1066
1067 EXPORT_SYMBOL(inet_addr_type);
1068 EXPORT_SYMBOL(inet_dev_addr_type);
1069 EXPORT_SYMBOL(ip_dev_find);
This page took 0.053549 seconds and 5 git commands to generate.