net: Add getsockopt support for TCP thin-streams
[deliverable/linux.git] / net / ipv4 / fib_frontend.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 #include <linux/slab.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/arp.h>
45 #include <net/ip_fib.h>
46 #include <net/rtnetlink.h>
47
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
49
50 static int __net_init fib4_rules_init(struct net *net)
51 {
52 struct fib_table *local_table, *main_table;
53
54 local_table = fib_hash_table(RT_TABLE_LOCAL);
55 if (local_table == NULL)
56 return -ENOMEM;
57
58 main_table = fib_hash_table(RT_TABLE_MAIN);
59 if (main_table == NULL)
60 goto fail;
61
62 hlist_add_head_rcu(&local_table->tb_hlist,
63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64 hlist_add_head_rcu(&main_table->tb_hlist,
65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66 return 0;
67
68 fail:
69 kfree(local_table);
70 return -ENOMEM;
71 }
72 #else
73
74 struct fib_table *fib_new_table(struct net *net, u32 id)
75 {
76 struct fib_table *tb;
77 unsigned int h;
78
79 if (id == 0)
80 id = RT_TABLE_MAIN;
81 tb = fib_get_table(net, id);
82 if (tb)
83 return tb;
84
85 tb = fib_hash_table(id);
86 if (!tb)
87 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1);
89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90 return tb;
91 }
92
93 struct fib_table *fib_get_table(struct net *net, u32 id)
94 {
95 struct fib_table *tb;
96 struct hlist_node *node;
97 struct hlist_head *head;
98 unsigned int h;
99
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
103
104 rcu_read_lock();
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114 }
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
116
117 void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119 {
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122 #ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126 #endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130 }
131
132 static void fib_flush(struct net *net)
133 {
134 int flushed = 0;
135 struct fib_table *tb;
136 struct hlist_node *node;
137 struct hlist_head *head;
138 unsigned int h;
139
140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
143 flushed += fib_table_flush(tb);
144 }
145
146 if (flushed)
147 rt_cache_flush(net, -1);
148 }
149
150 /*
151 * Find the first device with a given source address.
152 */
153
154 struct net_device * ip_dev_find(struct net *net, __be32 addr)
155 {
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
159 struct fib_table *local_table;
160
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163 #endif
164
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174 out:
175 fib_res_put(&res);
176 return dev;
177 }
178 EXPORT_SYMBOL(ip_dev_find);
179
180 /*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
184 static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
186 __be32 addr)
187 {
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
191 struct fib_table *local_table;
192
193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
194 return RTN_BROADCAST;
195 if (ipv4_is_multicast(addr))
196 return RTN_MULTICAST;
197
198 #ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200 #endif
201
202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
203 if (local_table) {
204 ret = RTN_UNICAST;
205 if (!fib_table_lookup(local_table, &fl, &res)) {
206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
208 fib_res_put(&res);
209 }
210 }
211 return ret;
212 }
213
214 unsigned int inet_addr_type(struct net *net, __be32 addr)
215 {
216 return __inet_dev_addr_type(net, NULL, addr);
217 }
218 EXPORT_SYMBOL(inet_addr_type);
219
220 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
222 {
223 return __inet_dev_addr_type(net, dev, addr);
224 }
225 EXPORT_SYMBOL(inet_dev_addr_type);
226
227 /* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
233 */
234
235 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
238 {
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
244 .mark = mark,
245 .iif = oif };
246
247 struct fib_result res;
248 int no_addr, rpf, accept_local;
249 int ret;
250 struct net *net;
251
252 no_addr = rpf = accept_local = 0;
253 rcu_read_lock();
254 in_dev = __in_dev_get_rcu(dev);
255 if (in_dev) {
256 no_addr = in_dev->ifa_list == NULL;
257 rpf = IN_DEV_RPFILTER(in_dev);
258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
259 if (mark && !IN_DEV_SRC_VMARK(in_dev))
260 fl.mark = 0;
261 }
262 rcu_read_unlock();
263
264 if (in_dev == NULL)
265 goto e_inval;
266
267 net = dev_net(dev);
268 if (fib_lookup(net, &fl, &res))
269 goto last_resort;
270 if (res.type != RTN_UNICAST) {
271 if (res.type != RTN_LOCAL || !accept_local)
272 goto e_inval_res;
273 }
274 *spec_dst = FIB_RES_PREFSRC(res);
275 fib_combine_itag(itag, &res);
276 #ifdef CONFIG_IP_ROUTE_MULTIPATH
277 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
278 #else
279 if (FIB_RES_DEV(res) == dev)
280 #endif
281 {
282 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
283 fib_res_put(&res);
284 return ret;
285 }
286 fib_res_put(&res);
287 if (no_addr)
288 goto last_resort;
289 if (rpf == 1)
290 goto e_rpf;
291 fl.oif = dev->ifindex;
292
293 ret = 0;
294 if (fib_lookup(net, &fl, &res) == 0) {
295 if (res.type == RTN_UNICAST) {
296 *spec_dst = FIB_RES_PREFSRC(res);
297 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
298 }
299 fib_res_put(&res);
300 }
301 return ret;
302
303 last_resort:
304 if (rpf)
305 goto e_rpf;
306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
307 *itag = 0;
308 return 0;
309
310 e_inval_res:
311 fib_res_put(&res);
312 e_inval:
313 return -EINVAL;
314 e_rpf:
315 return -EXDEV;
316 }
317
318 static inline __be32 sk_extract_addr(struct sockaddr *addr)
319 {
320 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
321 }
322
323 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
324 {
325 struct nlattr *nla;
326
327 nla = (struct nlattr *) ((char *) mx + len);
328 nla->nla_type = type;
329 nla->nla_len = nla_attr_size(4);
330 *(u32 *) nla_data(nla) = value;
331
332 return len + nla_total_size(4);
333 }
334
335 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
336 struct fib_config *cfg)
337 {
338 __be32 addr;
339 int plen;
340
341 memset(cfg, 0, sizeof(*cfg));
342 cfg->fc_nlinfo.nl_net = net;
343
344 if (rt->rt_dst.sa_family != AF_INET)
345 return -EAFNOSUPPORT;
346
347 /*
348 * Check mask for validity:
349 * a) it must be contiguous.
350 * b) destination must have all host bits clear.
351 * c) if application forgot to set correct family (AF_INET),
352 * reject request unless it is absolutely clear i.e.
353 * both family and mask are zero.
354 */
355 plen = 32;
356 addr = sk_extract_addr(&rt->rt_dst);
357 if (!(rt->rt_flags & RTF_HOST)) {
358 __be32 mask = sk_extract_addr(&rt->rt_genmask);
359
360 if (rt->rt_genmask.sa_family != AF_INET) {
361 if (mask || rt->rt_genmask.sa_family)
362 return -EAFNOSUPPORT;
363 }
364
365 if (bad_mask(mask, addr))
366 return -EINVAL;
367
368 plen = inet_mask_len(mask);
369 }
370
371 cfg->fc_dst_len = plen;
372 cfg->fc_dst = addr;
373
374 if (cmd != SIOCDELRT) {
375 cfg->fc_nlflags = NLM_F_CREATE;
376 cfg->fc_protocol = RTPROT_BOOT;
377 }
378
379 if (rt->rt_metric)
380 cfg->fc_priority = rt->rt_metric - 1;
381
382 if (rt->rt_flags & RTF_REJECT) {
383 cfg->fc_scope = RT_SCOPE_HOST;
384 cfg->fc_type = RTN_UNREACHABLE;
385 return 0;
386 }
387
388 cfg->fc_scope = RT_SCOPE_NOWHERE;
389 cfg->fc_type = RTN_UNICAST;
390
391 if (rt->rt_dev) {
392 char *colon;
393 struct net_device *dev;
394 char devname[IFNAMSIZ];
395
396 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
397 return -EFAULT;
398
399 devname[IFNAMSIZ-1] = 0;
400 colon = strchr(devname, ':');
401 if (colon)
402 *colon = 0;
403 dev = __dev_get_by_name(net, devname);
404 if (!dev)
405 return -ENODEV;
406 cfg->fc_oif = dev->ifindex;
407 if (colon) {
408 struct in_ifaddr *ifa;
409 struct in_device *in_dev = __in_dev_get_rtnl(dev);
410 if (!in_dev)
411 return -ENODEV;
412 *colon = ':';
413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
414 if (strcmp(ifa->ifa_label, devname) == 0)
415 break;
416 if (ifa == NULL)
417 return -ENODEV;
418 cfg->fc_prefsrc = ifa->ifa_local;
419 }
420 }
421
422 addr = sk_extract_addr(&rt->rt_gateway);
423 if (rt->rt_gateway.sa_family == AF_INET && addr) {
424 cfg->fc_gw = addr;
425 if (rt->rt_flags & RTF_GATEWAY &&
426 inet_addr_type(net, addr) == RTN_UNICAST)
427 cfg->fc_scope = RT_SCOPE_UNIVERSE;
428 }
429
430 if (cmd == SIOCDELRT)
431 return 0;
432
433 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
434 return -EINVAL;
435
436 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
437 cfg->fc_scope = RT_SCOPE_LINK;
438
439 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
440 struct nlattr *mx;
441 int len = 0;
442
443 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
444 if (mx == NULL)
445 return -ENOMEM;
446
447 if (rt->rt_flags & RTF_MTU)
448 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
449
450 if (rt->rt_flags & RTF_WINDOW)
451 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
452
453 if (rt->rt_flags & RTF_IRTT)
454 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
455
456 cfg->fc_mx = mx;
457 cfg->fc_mx_len = len;
458 }
459
460 return 0;
461 }
462
463 /*
464 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
465 */
466
467 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
468 {
469 struct fib_config cfg;
470 struct rtentry rt;
471 int err;
472
473 switch (cmd) {
474 case SIOCADDRT: /* Add a route */
475 case SIOCDELRT: /* Delete a route */
476 if (!capable(CAP_NET_ADMIN))
477 return -EPERM;
478
479 if (copy_from_user(&rt, arg, sizeof(rt)))
480 return -EFAULT;
481
482 rtnl_lock();
483 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
484 if (err == 0) {
485 struct fib_table *tb;
486
487 if (cmd == SIOCDELRT) {
488 tb = fib_get_table(net, cfg.fc_table);
489 if (tb)
490 err = fib_table_delete(tb, &cfg);
491 else
492 err = -ESRCH;
493 } else {
494 tb = fib_new_table(net, cfg.fc_table);
495 if (tb)
496 err = fib_table_insert(tb, &cfg);
497 else
498 err = -ENOBUFS;
499 }
500
501 /* allocated by rtentry_to_fib_config() */
502 kfree(cfg.fc_mx);
503 }
504 rtnl_unlock();
505 return err;
506 }
507 return -EINVAL;
508 }
509
510 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
511 [RTA_DST] = { .type = NLA_U32 },
512 [RTA_SRC] = { .type = NLA_U32 },
513 [RTA_IIF] = { .type = NLA_U32 },
514 [RTA_OIF] = { .type = NLA_U32 },
515 [RTA_GATEWAY] = { .type = NLA_U32 },
516 [RTA_PRIORITY] = { .type = NLA_U32 },
517 [RTA_PREFSRC] = { .type = NLA_U32 },
518 [RTA_METRICS] = { .type = NLA_NESTED },
519 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
520 [RTA_FLOW] = { .type = NLA_U32 },
521 };
522
523 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
524 struct nlmsghdr *nlh, struct fib_config *cfg)
525 {
526 struct nlattr *attr;
527 int err, remaining;
528 struct rtmsg *rtm;
529
530 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
531 if (err < 0)
532 goto errout;
533
534 memset(cfg, 0, sizeof(*cfg));
535
536 rtm = nlmsg_data(nlh);
537 cfg->fc_dst_len = rtm->rtm_dst_len;
538 cfg->fc_tos = rtm->rtm_tos;
539 cfg->fc_table = rtm->rtm_table;
540 cfg->fc_protocol = rtm->rtm_protocol;
541 cfg->fc_scope = rtm->rtm_scope;
542 cfg->fc_type = rtm->rtm_type;
543 cfg->fc_flags = rtm->rtm_flags;
544 cfg->fc_nlflags = nlh->nlmsg_flags;
545
546 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
547 cfg->fc_nlinfo.nlh = nlh;
548 cfg->fc_nlinfo.nl_net = net;
549
550 if (cfg->fc_type > RTN_MAX) {
551 err = -EINVAL;
552 goto errout;
553 }
554
555 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
556 switch (nla_type(attr)) {
557 case RTA_DST:
558 cfg->fc_dst = nla_get_be32(attr);
559 break;
560 case RTA_OIF:
561 cfg->fc_oif = nla_get_u32(attr);
562 break;
563 case RTA_GATEWAY:
564 cfg->fc_gw = nla_get_be32(attr);
565 break;
566 case RTA_PRIORITY:
567 cfg->fc_priority = nla_get_u32(attr);
568 break;
569 case RTA_PREFSRC:
570 cfg->fc_prefsrc = nla_get_be32(attr);
571 break;
572 case RTA_METRICS:
573 cfg->fc_mx = nla_data(attr);
574 cfg->fc_mx_len = nla_len(attr);
575 break;
576 case RTA_MULTIPATH:
577 cfg->fc_mp = nla_data(attr);
578 cfg->fc_mp_len = nla_len(attr);
579 break;
580 case RTA_FLOW:
581 cfg->fc_flow = nla_get_u32(attr);
582 break;
583 case RTA_TABLE:
584 cfg->fc_table = nla_get_u32(attr);
585 break;
586 }
587 }
588
589 return 0;
590 errout:
591 return err;
592 }
593
594 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
595 {
596 struct net *net = sock_net(skb->sk);
597 struct fib_config cfg;
598 struct fib_table *tb;
599 int err;
600
601 err = rtm_to_fib_config(net, skb, nlh, &cfg);
602 if (err < 0)
603 goto errout;
604
605 tb = fib_get_table(net, cfg.fc_table);
606 if (tb == NULL) {
607 err = -ESRCH;
608 goto errout;
609 }
610
611 err = fib_table_delete(tb, &cfg);
612 errout:
613 return err;
614 }
615
616 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
617 {
618 struct net *net = sock_net(skb->sk);
619 struct fib_config cfg;
620 struct fib_table *tb;
621 int err;
622
623 err = rtm_to_fib_config(net, skb, nlh, &cfg);
624 if (err < 0)
625 goto errout;
626
627 tb = fib_new_table(net, cfg.fc_table);
628 if (tb == NULL) {
629 err = -ENOBUFS;
630 goto errout;
631 }
632
633 err = fib_table_insert(tb, &cfg);
634 errout:
635 return err;
636 }
637
638 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
639 {
640 struct net *net = sock_net(skb->sk);
641 unsigned int h, s_h;
642 unsigned int e = 0, s_e;
643 struct fib_table *tb;
644 struct hlist_node *node;
645 struct hlist_head *head;
646 int dumped = 0;
647
648 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
649 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
650 return ip_rt_dump(skb, cb);
651
652 s_h = cb->args[0];
653 s_e = cb->args[1];
654
655 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
656 e = 0;
657 head = &net->ipv4.fib_table_hash[h];
658 hlist_for_each_entry(tb, node, head, tb_hlist) {
659 if (e < s_e)
660 goto next;
661 if (dumped)
662 memset(&cb->args[2], 0, sizeof(cb->args) -
663 2 * sizeof(cb->args[0]));
664 if (fib_table_dump(tb, skb, cb) < 0)
665 goto out;
666 dumped = 1;
667 next:
668 e++;
669 }
670 }
671 out:
672 cb->args[1] = e;
673 cb->args[0] = h;
674
675 return skb->len;
676 }
677
678 /* Prepare and feed intra-kernel routing request.
679 Really, it should be netlink message, but :-( netlink
680 can be not configured, so that we feed it directly
681 to fib engine. It is legal, because all events occur
682 only when netlink is already locked.
683 */
684
685 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
686 {
687 struct net *net = dev_net(ifa->ifa_dev->dev);
688 struct fib_table *tb;
689 struct fib_config cfg = {
690 .fc_protocol = RTPROT_KERNEL,
691 .fc_type = type,
692 .fc_dst = dst,
693 .fc_dst_len = dst_len,
694 .fc_prefsrc = ifa->ifa_local,
695 .fc_oif = ifa->ifa_dev->dev->ifindex,
696 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
697 .fc_nlinfo = {
698 .nl_net = net,
699 },
700 };
701
702 if (type == RTN_UNICAST)
703 tb = fib_new_table(net, RT_TABLE_MAIN);
704 else
705 tb = fib_new_table(net, RT_TABLE_LOCAL);
706
707 if (tb == NULL)
708 return;
709
710 cfg.fc_table = tb->tb_id;
711
712 if (type != RTN_LOCAL)
713 cfg.fc_scope = RT_SCOPE_LINK;
714 else
715 cfg.fc_scope = RT_SCOPE_HOST;
716
717 if (cmd == RTM_NEWROUTE)
718 fib_table_insert(tb, &cfg);
719 else
720 fib_table_delete(tb, &cfg);
721 }
722
723 void fib_add_ifaddr(struct in_ifaddr *ifa)
724 {
725 struct in_device *in_dev = ifa->ifa_dev;
726 struct net_device *dev = in_dev->dev;
727 struct in_ifaddr *prim = ifa;
728 __be32 mask = ifa->ifa_mask;
729 __be32 addr = ifa->ifa_local;
730 __be32 prefix = ifa->ifa_address&mask;
731
732 if (ifa->ifa_flags&IFA_F_SECONDARY) {
733 prim = inet_ifa_byprefix(in_dev, prefix, mask);
734 if (prim == NULL) {
735 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
736 return;
737 }
738 }
739
740 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
741
742 if (!(dev->flags&IFF_UP))
743 return;
744
745 /* Add broadcast address, if it is explicitly assigned. */
746 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
747 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
748
749 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
750 (prefix != addr || ifa->ifa_prefixlen < 32)) {
751 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
752 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
753
754 /* Add network specific broadcasts, when it takes a sense */
755 if (ifa->ifa_prefixlen < 31) {
756 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
758 }
759 }
760 }
761
762 static void fib_del_ifaddr(struct in_ifaddr *ifa)
763 {
764 struct in_device *in_dev = ifa->ifa_dev;
765 struct net_device *dev = in_dev->dev;
766 struct in_ifaddr *ifa1;
767 struct in_ifaddr *prim = ifa;
768 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
769 __be32 any = ifa->ifa_address&ifa->ifa_mask;
770 #define LOCAL_OK 1
771 #define BRD_OK 2
772 #define BRD0_OK 4
773 #define BRD1_OK 8
774 unsigned ok = 0;
775
776 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
777 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
778 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
779 else {
780 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
781 if (prim == NULL) {
782 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
783 return;
784 }
785 }
786
787 /* Deletion is more complicated than add.
788 We should take care of not to delete too much :-)
789
790 Scan address list to be sure that addresses are really gone.
791 */
792
793 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
794 if (ifa->ifa_local == ifa1->ifa_local)
795 ok |= LOCAL_OK;
796 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
797 ok |= BRD_OK;
798 if (brd == ifa1->ifa_broadcast)
799 ok |= BRD1_OK;
800 if (any == ifa1->ifa_broadcast)
801 ok |= BRD0_OK;
802 }
803
804 if (!(ok&BRD_OK))
805 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
806 if (!(ok&BRD1_OK))
807 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
808 if (!(ok&BRD0_OK))
809 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
810 if (!(ok&LOCAL_OK)) {
811 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
812
813 /* Check, that this local address finally disappeared. */
814 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
815 /* And the last, but not the least thing.
816 We must flush stray FIB entries.
817
818 First of all, we scan fib_info list searching
819 for stray nexthop entries, then ignite fib_flush.
820 */
821 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
822 fib_flush(dev_net(dev));
823 }
824 }
825 #undef LOCAL_OK
826 #undef BRD_OK
827 #undef BRD0_OK
828 #undef BRD1_OK
829 }
830
831 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
832 {
833
834 struct fib_result res;
835 struct flowi fl = { .mark = frn->fl_mark,
836 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
837 .tos = frn->fl_tos,
838 .scope = frn->fl_scope } } };
839
840 #ifdef CONFIG_IP_MULTIPLE_TABLES
841 res.r = NULL;
842 #endif
843
844 frn->err = -ENOENT;
845 if (tb) {
846 local_bh_disable();
847
848 frn->tb_id = tb->tb_id;
849 frn->err = fib_table_lookup(tb, &fl, &res);
850
851 if (!frn->err) {
852 frn->prefixlen = res.prefixlen;
853 frn->nh_sel = res.nh_sel;
854 frn->type = res.type;
855 frn->scope = res.scope;
856 fib_res_put(&res);
857 }
858 local_bh_enable();
859 }
860 }
861
862 static void nl_fib_input(struct sk_buff *skb)
863 {
864 struct net *net;
865 struct fib_result_nl *frn;
866 struct nlmsghdr *nlh;
867 struct fib_table *tb;
868 u32 pid;
869
870 net = sock_net(skb->sk);
871 nlh = nlmsg_hdr(skb);
872 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
873 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
874 return;
875
876 skb = skb_clone(skb, GFP_KERNEL);
877 if (skb == NULL)
878 return;
879 nlh = nlmsg_hdr(skb);
880
881 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
882 tb = fib_get_table(net, frn->tb_id_in);
883
884 nl_fib_lookup(frn, tb);
885
886 pid = NETLINK_CB(skb).pid; /* pid of sending process */
887 NETLINK_CB(skb).pid = 0; /* from kernel */
888 NETLINK_CB(skb).dst_group = 0; /* unicast */
889 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
890 }
891
892 static int __net_init nl_fib_lookup_init(struct net *net)
893 {
894 struct sock *sk;
895 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
896 nl_fib_input, NULL, THIS_MODULE);
897 if (sk == NULL)
898 return -EAFNOSUPPORT;
899 net->ipv4.fibnl = sk;
900 return 0;
901 }
902
903 static void nl_fib_lookup_exit(struct net *net)
904 {
905 netlink_kernel_release(net->ipv4.fibnl);
906 net->ipv4.fibnl = NULL;
907 }
908
909 static void fib_disable_ip(struct net_device *dev, int force, int delay)
910 {
911 if (fib_sync_down_dev(dev, force))
912 fib_flush(dev_net(dev));
913 rt_cache_flush(dev_net(dev), delay);
914 arp_ifdown(dev);
915 }
916
917 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
918 {
919 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
920 struct net_device *dev = ifa->ifa_dev->dev;
921
922 switch (event) {
923 case NETDEV_UP:
924 fib_add_ifaddr(ifa);
925 #ifdef CONFIG_IP_ROUTE_MULTIPATH
926 fib_sync_up(dev);
927 #endif
928 rt_cache_flush(dev_net(dev), -1);
929 break;
930 case NETDEV_DOWN:
931 fib_del_ifaddr(ifa);
932 if (ifa->ifa_dev->ifa_list == NULL) {
933 /* Last address was deleted from this interface.
934 Disable IP.
935 */
936 fib_disable_ip(dev, 1, 0);
937 } else {
938 rt_cache_flush(dev_net(dev), -1);
939 }
940 break;
941 }
942 return NOTIFY_DONE;
943 }
944
945 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
946 {
947 struct net_device *dev = ptr;
948 struct in_device *in_dev = __in_dev_get_rtnl(dev);
949
950 if (event == NETDEV_UNREGISTER) {
951 fib_disable_ip(dev, 2, -1);
952 return NOTIFY_DONE;
953 }
954
955 if (!in_dev)
956 return NOTIFY_DONE;
957
958 switch (event) {
959 case NETDEV_UP:
960 for_ifa(in_dev) {
961 fib_add_ifaddr(ifa);
962 } endfor_ifa(in_dev);
963 #ifdef CONFIG_IP_ROUTE_MULTIPATH
964 fib_sync_up(dev);
965 #endif
966 rt_cache_flush(dev_net(dev), -1);
967 break;
968 case NETDEV_DOWN:
969 fib_disable_ip(dev, 0, 0);
970 break;
971 case NETDEV_CHANGEMTU:
972 case NETDEV_CHANGE:
973 rt_cache_flush(dev_net(dev), 0);
974 break;
975 case NETDEV_UNREGISTER_BATCH:
976 rt_cache_flush_batch();
977 break;
978 }
979 return NOTIFY_DONE;
980 }
981
982 static struct notifier_block fib_inetaddr_notifier = {
983 .notifier_call = fib_inetaddr_event,
984 };
985
986 static struct notifier_block fib_netdev_notifier = {
987 .notifier_call = fib_netdev_event,
988 };
989
990 static int __net_init ip_fib_net_init(struct net *net)
991 {
992 int err;
993 unsigned int i;
994
995 net->ipv4.fib_table_hash = kzalloc(
996 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
997 if (net->ipv4.fib_table_hash == NULL)
998 return -ENOMEM;
999
1000 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1001 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1002
1003 err = fib4_rules_init(net);
1004 if (err < 0)
1005 goto fail;
1006 return 0;
1007
1008 fail:
1009 kfree(net->ipv4.fib_table_hash);
1010 return err;
1011 }
1012
1013 static void ip_fib_net_exit(struct net *net)
1014 {
1015 unsigned int i;
1016
1017 #ifdef CONFIG_IP_MULTIPLE_TABLES
1018 fib4_rules_exit(net);
1019 #endif
1020
1021 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1022 struct fib_table *tb;
1023 struct hlist_head *head;
1024 struct hlist_node *node, *tmp;
1025
1026 head = &net->ipv4.fib_table_hash[i];
1027 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1028 hlist_del(node);
1029 fib_table_flush(tb);
1030 kfree(tb);
1031 }
1032 }
1033 kfree(net->ipv4.fib_table_hash);
1034 }
1035
1036 static int __net_init fib_net_init(struct net *net)
1037 {
1038 int error;
1039
1040 error = ip_fib_net_init(net);
1041 if (error < 0)
1042 goto out;
1043 error = nl_fib_lookup_init(net);
1044 if (error < 0)
1045 goto out_nlfl;
1046 error = fib_proc_init(net);
1047 if (error < 0)
1048 goto out_proc;
1049 out:
1050 return error;
1051
1052 out_proc:
1053 nl_fib_lookup_exit(net);
1054 out_nlfl:
1055 ip_fib_net_exit(net);
1056 goto out;
1057 }
1058
1059 static void __net_exit fib_net_exit(struct net *net)
1060 {
1061 fib_proc_exit(net);
1062 nl_fib_lookup_exit(net);
1063 ip_fib_net_exit(net);
1064 }
1065
1066 static struct pernet_operations fib_net_ops = {
1067 .init = fib_net_init,
1068 .exit = fib_net_exit,
1069 };
1070
1071 void __init ip_fib_init(void)
1072 {
1073 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1074 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1075 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1076
1077 register_pernet_subsys(&fib_net_ops);
1078 register_netdevice_notifier(&fib_netdev_notifier);
1079 register_inetaddr_notifier(&fib_inetaddr_notifier);
1080
1081 fib_hash_init();
1082 }
This page took 0.067642 seconds and 5 git commands to generate.