8a01bfb7c87379772655db29a111bcfbcd3c1f39
[deliverable/linux.git] / net / ipv4 / devinet.c
1 /*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 #include "fib_lookup.h"
67
68 static struct ipv4_devconf ipv4_devconf = {
69 .data = {
70 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74 },
75 };
76
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78 .data = {
79 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 },
85 };
86
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91 [IFA_LOCAL] = { .type = NLA_U32 },
92 [IFA_ADDRESS] = { .type = NLA_U32 },
93 [IFA_BROADCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
98 * value. So if you change this define, make appropriate changes to
99 * inet_addr_hash as well.
100 */
101 #define IN4_ADDR_HSIZE 256
102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
103 static DEFINE_SPINLOCK(inet_addr_hash_lock);
104
105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
106 {
107 u32 val = (__force u32) addr ^ hash_ptr(net, 8);
108
109 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
110 (IN4_ADDR_HSIZE - 1));
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
116
117 spin_lock(&inet_addr_hash_lock);
118 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 spin_lock(&inet_addr_hash_lock);
125 hlist_del_init_rcu(&ifa->hash);
126 spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
134 *
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
136 */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 unsigned int hash = inet_addr_hash(net, addr);
140 struct net_device *result = NULL;
141 struct in_ifaddr *ifa;
142 struct hlist_node *node;
143
144 rcu_read_lock();
145 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146 struct net_device *dev = ifa->ifa_dev->dev;
147
148 if (!net_eq(dev_net(dev), net))
149 continue;
150 if (ifa->ifa_local == addr) {
151 result = dev;
152 break;
153 }
154 }
155 if (!result) {
156 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 };
158 struct fib_table *local;
159
160 /* Fallback to FIB local table so that communication
161 * over loopback subnets work.
162 */
163 local = fib_get_table(net, RT_TABLE_LOCAL);
164 if (local &&
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res);
168 }
169 if (result && devref)
170 dev_hold(result);
171 rcu_read_unlock();
172 return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static inline void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static inline void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 if (ifa->ifa_dev)
204 in_dev_put(ifa->ifa_dev);
205 kfree(ifa);
206 }
207
208 static inline void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215 struct net_device *dev = idev->dev;
216
217 WARN_ON(idev->ifa_list);
218 WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
221 idev, dev ? dev->name : "NIL");
222 #endif
223 dev_put(dev);
224 if (!idev->dead)
225 pr_err("Freeing alive in_device %p\n", idev);
226 else
227 kfree(idev);
228 }
229 EXPORT_SYMBOL(in_dev_finish_destroy);
230
231 static struct in_device *inetdev_init(struct net_device *dev)
232 {
233 struct in_device *in_dev;
234
235 ASSERT_RTNL();
236
237 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238 if (!in_dev)
239 goto out;
240 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
241 sizeof(in_dev->cnf));
242 in_dev->cnf.sysctl = NULL;
243 in_dev->dev = dev;
244 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
245 if (!in_dev->arp_parms)
246 goto out_kfree;
247 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
248 dev_disable_lro(dev);
249 /* Reference in_dev->dev */
250 dev_hold(dev);
251 /* Account for reference dev->ip_ptr (below) */
252 in_dev_hold(in_dev);
253
254 devinet_sysctl_register(in_dev);
255 ip_mc_init_dev(in_dev);
256 if (dev->flags & IFF_UP)
257 ip_mc_up(in_dev);
258
259 /* we can receive as soon as ip_ptr is set -- do this last */
260 rcu_assign_pointer(dev->ip_ptr, in_dev);
261 out:
262 return in_dev;
263 out_kfree:
264 kfree(in_dev);
265 in_dev = NULL;
266 goto out;
267 }
268
269 static void in_dev_rcu_put(struct rcu_head *head)
270 {
271 struct in_device *idev = container_of(head, struct in_device, rcu_head);
272 in_dev_put(idev);
273 }
274
275 static void inetdev_destroy(struct in_device *in_dev)
276 {
277 struct in_ifaddr *ifa;
278 struct net_device *dev;
279
280 ASSERT_RTNL();
281
282 dev = in_dev->dev;
283
284 in_dev->dead = 1;
285
286 ip_mc_destroy_dev(in_dev);
287
288 while ((ifa = in_dev->ifa_list) != NULL) {
289 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
290 inet_free_ifa(ifa);
291 }
292
293 RCU_INIT_POINTER(dev->ip_ptr, NULL);
294
295 devinet_sysctl_unregister(in_dev);
296 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297 arp_ifdown(dev);
298
299 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
300 }
301
302 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
303 {
304 rcu_read_lock();
305 for_primary_ifa(in_dev) {
306 if (inet_ifa_match(a, ifa)) {
307 if (!b || inet_ifa_match(b, ifa)) {
308 rcu_read_unlock();
309 return 1;
310 }
311 }
312 } endfor_ifa(in_dev);
313 rcu_read_unlock();
314 return 0;
315 }
316
317 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
318 int destroy, struct nlmsghdr *nlh, u32 pid)
319 {
320 struct in_ifaddr *promote = NULL;
321 struct in_ifaddr *ifa, *ifa1 = *ifap;
322 struct in_ifaddr *last_prim = in_dev->ifa_list;
323 struct in_ifaddr *prev_prom = NULL;
324 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
325
326 ASSERT_RTNL();
327
328 /* 1. Deleting primary ifaddr forces deletion all secondaries
329 * unless alias promotion is set
330 **/
331
332 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
333 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
334
335 while ((ifa = *ifap1) != NULL) {
336 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
337 ifa1->ifa_scope <= ifa->ifa_scope)
338 last_prim = ifa;
339
340 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
341 ifa1->ifa_mask != ifa->ifa_mask ||
342 !inet_ifa_match(ifa1->ifa_address, ifa)) {
343 ifap1 = &ifa->ifa_next;
344 prev_prom = ifa;
345 continue;
346 }
347
348 if (!do_promote) {
349 inet_hash_remove(ifa);
350 *ifap1 = ifa->ifa_next;
351
352 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
353 blocking_notifier_call_chain(&inetaddr_chain,
354 NETDEV_DOWN, ifa);
355 inet_free_ifa(ifa);
356 } else {
357 promote = ifa;
358 break;
359 }
360 }
361 }
362
363 /* On promotion all secondaries from subnet are changing
364 * the primary IP, we must remove all their routes silently
365 * and later to add them back with new prefsrc. Do this
366 * while all addresses are on the device list.
367 */
368 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
369 if (ifa1->ifa_mask == ifa->ifa_mask &&
370 inet_ifa_match(ifa1->ifa_address, ifa))
371 fib_del_ifaddr(ifa, ifa1);
372 }
373
374 /* 2. Unlink it */
375
376 *ifap = ifa1->ifa_next;
377 inet_hash_remove(ifa1);
378
379 /* 3. Announce address deletion */
380
381 /* Send message first, then call notifier.
382 At first sight, FIB update triggered by notifier
383 will refer to already deleted ifaddr, that could confuse
384 netlink listeners. It is not true: look, gated sees
385 that route deleted and if it still thinks that ifaddr
386 is valid, it will try to restore deleted routes... Grr.
387 So that, this order is correct.
388 */
389 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
390 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
391
392 if (promote) {
393 struct in_ifaddr *next_sec = promote->ifa_next;
394
395 if (prev_prom) {
396 prev_prom->ifa_next = promote->ifa_next;
397 promote->ifa_next = last_prim->ifa_next;
398 last_prim->ifa_next = promote;
399 }
400
401 promote->ifa_flags &= ~IFA_F_SECONDARY;
402 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
403 blocking_notifier_call_chain(&inetaddr_chain,
404 NETDEV_UP, promote);
405 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
406 if (ifa1->ifa_mask != ifa->ifa_mask ||
407 !inet_ifa_match(ifa1->ifa_address, ifa))
408 continue;
409 fib_add_ifaddr(ifa);
410 }
411
412 }
413 if (destroy)
414 inet_free_ifa(ifa1);
415 }
416
417 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418 int destroy)
419 {
420 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
421 }
422
423 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
424 u32 pid)
425 {
426 struct in_device *in_dev = ifa->ifa_dev;
427 struct in_ifaddr *ifa1, **ifap, **last_primary;
428
429 ASSERT_RTNL();
430
431 if (!ifa->ifa_local) {
432 inet_free_ifa(ifa);
433 return 0;
434 }
435
436 ifa->ifa_flags &= ~IFA_F_SECONDARY;
437 last_primary = &in_dev->ifa_list;
438
439 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
440 ifap = &ifa1->ifa_next) {
441 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
442 ifa->ifa_scope <= ifa1->ifa_scope)
443 last_primary = &ifa1->ifa_next;
444 if (ifa1->ifa_mask == ifa->ifa_mask &&
445 inet_ifa_match(ifa1->ifa_address, ifa)) {
446 if (ifa1->ifa_local == ifa->ifa_local) {
447 inet_free_ifa(ifa);
448 return -EEXIST;
449 }
450 if (ifa1->ifa_scope != ifa->ifa_scope) {
451 inet_free_ifa(ifa);
452 return -EINVAL;
453 }
454 ifa->ifa_flags |= IFA_F_SECONDARY;
455 }
456 }
457
458 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
459 net_srandom(ifa->ifa_local);
460 ifap = last_primary;
461 }
462
463 ifa->ifa_next = *ifap;
464 *ifap = ifa;
465
466 inet_hash_insert(dev_net(in_dev->dev), ifa);
467
468 /* Send message first, then call notifier.
469 Notifier will trigger FIB update, so that
470 listeners of netlink will know about new ifaddr */
471 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
472 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
473
474 return 0;
475 }
476
477 static int inet_insert_ifa(struct in_ifaddr *ifa)
478 {
479 return __inet_insert_ifa(ifa, NULL, 0);
480 }
481
482 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
483 {
484 struct in_device *in_dev = __in_dev_get_rtnl(dev);
485
486 ASSERT_RTNL();
487
488 if (!in_dev) {
489 inet_free_ifa(ifa);
490 return -ENOBUFS;
491 }
492 ipv4_devconf_setall(in_dev);
493 if (ifa->ifa_dev != in_dev) {
494 WARN_ON(ifa->ifa_dev);
495 in_dev_hold(in_dev);
496 ifa->ifa_dev = in_dev;
497 }
498 if (ipv4_is_loopback(ifa->ifa_local))
499 ifa->ifa_scope = RT_SCOPE_HOST;
500 return inet_insert_ifa(ifa);
501 }
502
503 /* Caller must hold RCU or RTNL :
504 * We dont take a reference on found in_device
505 */
506 struct in_device *inetdev_by_index(struct net *net, int ifindex)
507 {
508 struct net_device *dev;
509 struct in_device *in_dev = NULL;
510
511 rcu_read_lock();
512 dev = dev_get_by_index_rcu(net, ifindex);
513 if (dev)
514 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
515 rcu_read_unlock();
516 return in_dev;
517 }
518 EXPORT_SYMBOL(inetdev_by_index);
519
520 /* Called only from RTNL semaphored context. No locks. */
521
522 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
523 __be32 mask)
524 {
525 ASSERT_RTNL();
526
527 for_primary_ifa(in_dev) {
528 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
529 return ifa;
530 } endfor_ifa(in_dev);
531 return NULL;
532 }
533
534 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
535 {
536 struct net *net = sock_net(skb->sk);
537 struct nlattr *tb[IFA_MAX+1];
538 struct in_device *in_dev;
539 struct ifaddrmsg *ifm;
540 struct in_ifaddr *ifa, **ifap;
541 int err = -EINVAL;
542
543 ASSERT_RTNL();
544
545 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
546 if (err < 0)
547 goto errout;
548
549 ifm = nlmsg_data(nlh);
550 in_dev = inetdev_by_index(net, ifm->ifa_index);
551 if (in_dev == NULL) {
552 err = -ENODEV;
553 goto errout;
554 }
555
556 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
557 ifap = &ifa->ifa_next) {
558 if (tb[IFA_LOCAL] &&
559 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
560 continue;
561
562 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
563 continue;
564
565 if (tb[IFA_ADDRESS] &&
566 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
567 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
568 continue;
569
570 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
571 return 0;
572 }
573
574 err = -EADDRNOTAVAIL;
575 errout:
576 return err;
577 }
578
579 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
580 {
581 struct nlattr *tb[IFA_MAX+1];
582 struct in_ifaddr *ifa;
583 struct ifaddrmsg *ifm;
584 struct net_device *dev;
585 struct in_device *in_dev;
586 int err;
587
588 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
589 if (err < 0)
590 goto errout;
591
592 ifm = nlmsg_data(nlh);
593 err = -EINVAL;
594 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
595 goto errout;
596
597 dev = __dev_get_by_index(net, ifm->ifa_index);
598 err = -ENODEV;
599 if (dev == NULL)
600 goto errout;
601
602 in_dev = __in_dev_get_rtnl(dev);
603 err = -ENOBUFS;
604 if (in_dev == NULL)
605 goto errout;
606
607 ifa = inet_alloc_ifa();
608 if (ifa == NULL)
609 /*
610 * A potential indev allocation can be left alive, it stays
611 * assigned to its device and is destroy with it.
612 */
613 goto errout;
614
615 ipv4_devconf_setall(in_dev);
616 in_dev_hold(in_dev);
617
618 if (tb[IFA_ADDRESS] == NULL)
619 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
620
621 INIT_HLIST_NODE(&ifa->hash);
622 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
623 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
624 ifa->ifa_flags = ifm->ifa_flags;
625 ifa->ifa_scope = ifm->ifa_scope;
626 ifa->ifa_dev = in_dev;
627
628 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
629 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
630
631 if (tb[IFA_BROADCAST])
632 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
633
634 if (tb[IFA_LABEL])
635 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
636 else
637 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
638
639 return ifa;
640
641 errout:
642 return ERR_PTR(err);
643 }
644
645 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
646 {
647 struct net *net = sock_net(skb->sk);
648 struct in_ifaddr *ifa;
649
650 ASSERT_RTNL();
651
652 ifa = rtm_to_ifaddr(net, nlh);
653 if (IS_ERR(ifa))
654 return PTR_ERR(ifa);
655
656 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
657 }
658
659 /*
660 * Determine a default network mask, based on the IP address.
661 */
662
663 static inline int inet_abc_len(__be32 addr)
664 {
665 int rc = -1; /* Something else, probably a multicast. */
666
667 if (ipv4_is_zeronet(addr))
668 rc = 0;
669 else {
670 __u32 haddr = ntohl(addr);
671
672 if (IN_CLASSA(haddr))
673 rc = 8;
674 else if (IN_CLASSB(haddr))
675 rc = 16;
676 else if (IN_CLASSC(haddr))
677 rc = 24;
678 }
679
680 return rc;
681 }
682
683
684 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
685 {
686 struct ifreq ifr;
687 struct sockaddr_in sin_orig;
688 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
689 struct in_device *in_dev;
690 struct in_ifaddr **ifap = NULL;
691 struct in_ifaddr *ifa = NULL;
692 struct net_device *dev;
693 char *colon;
694 int ret = -EFAULT;
695 int tryaddrmatch = 0;
696
697 /*
698 * Fetch the caller's info block into kernel space
699 */
700
701 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
702 goto out;
703 ifr.ifr_name[IFNAMSIZ - 1] = 0;
704
705 /* save original address for comparison */
706 memcpy(&sin_orig, sin, sizeof(*sin));
707
708 colon = strchr(ifr.ifr_name, ':');
709 if (colon)
710 *colon = 0;
711
712 dev_load(net, ifr.ifr_name);
713
714 switch (cmd) {
715 case SIOCGIFADDR: /* Get interface address */
716 case SIOCGIFBRDADDR: /* Get the broadcast address */
717 case SIOCGIFDSTADDR: /* Get the destination address */
718 case SIOCGIFNETMASK: /* Get the netmask for the interface */
719 /* Note that these ioctls will not sleep,
720 so that we do not impose a lock.
721 One day we will be forced to put shlock here (I mean SMP)
722 */
723 tryaddrmatch = (sin_orig.sin_family == AF_INET);
724 memset(sin, 0, sizeof(*sin));
725 sin->sin_family = AF_INET;
726 break;
727
728 case SIOCSIFFLAGS:
729 ret = -EACCES;
730 if (!capable(CAP_NET_ADMIN))
731 goto out;
732 break;
733 case SIOCSIFADDR: /* Set interface address (and family) */
734 case SIOCSIFBRDADDR: /* Set the broadcast address */
735 case SIOCSIFDSTADDR: /* Set the destination address */
736 case SIOCSIFNETMASK: /* Set the netmask for the interface */
737 ret = -EACCES;
738 if (!capable(CAP_NET_ADMIN))
739 goto out;
740 ret = -EINVAL;
741 if (sin->sin_family != AF_INET)
742 goto out;
743 break;
744 default:
745 ret = -EINVAL;
746 goto out;
747 }
748
749 rtnl_lock();
750
751 ret = -ENODEV;
752 dev = __dev_get_by_name(net, ifr.ifr_name);
753 if (!dev)
754 goto done;
755
756 if (colon)
757 *colon = ':';
758
759 in_dev = __in_dev_get_rtnl(dev);
760 if (in_dev) {
761 if (tryaddrmatch) {
762 /* Matthias Andree */
763 /* compare label and address (4.4BSD style) */
764 /* note: we only do this for a limited set of ioctls
765 and only if the original address family was AF_INET.
766 This is checked above. */
767 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
768 ifap = &ifa->ifa_next) {
769 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
770 sin_orig.sin_addr.s_addr ==
771 ifa->ifa_local) {
772 break; /* found */
773 }
774 }
775 }
776 /* we didn't get a match, maybe the application is
777 4.3BSD-style and passed in junk so we fall back to
778 comparing just the label */
779 if (!ifa) {
780 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
781 ifap = &ifa->ifa_next)
782 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
783 break;
784 }
785 }
786
787 ret = -EADDRNOTAVAIL;
788 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
789 goto done;
790
791 switch (cmd) {
792 case SIOCGIFADDR: /* Get interface address */
793 sin->sin_addr.s_addr = ifa->ifa_local;
794 goto rarok;
795
796 case SIOCGIFBRDADDR: /* Get the broadcast address */
797 sin->sin_addr.s_addr = ifa->ifa_broadcast;
798 goto rarok;
799
800 case SIOCGIFDSTADDR: /* Get the destination address */
801 sin->sin_addr.s_addr = ifa->ifa_address;
802 goto rarok;
803
804 case SIOCGIFNETMASK: /* Get the netmask for the interface */
805 sin->sin_addr.s_addr = ifa->ifa_mask;
806 goto rarok;
807
808 case SIOCSIFFLAGS:
809 if (colon) {
810 ret = -EADDRNOTAVAIL;
811 if (!ifa)
812 break;
813 ret = 0;
814 if (!(ifr.ifr_flags & IFF_UP))
815 inet_del_ifa(in_dev, ifap, 1);
816 break;
817 }
818 ret = dev_change_flags(dev, ifr.ifr_flags);
819 break;
820
821 case SIOCSIFADDR: /* Set interface address (and family) */
822 ret = -EINVAL;
823 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
824 break;
825
826 if (!ifa) {
827 ret = -ENOBUFS;
828 ifa = inet_alloc_ifa();
829 INIT_HLIST_NODE(&ifa->hash);
830 if (!ifa)
831 break;
832 if (colon)
833 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
834 else
835 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
836 } else {
837 ret = 0;
838 if (ifa->ifa_local == sin->sin_addr.s_addr)
839 break;
840 inet_del_ifa(in_dev, ifap, 0);
841 ifa->ifa_broadcast = 0;
842 ifa->ifa_scope = 0;
843 }
844
845 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
846
847 if (!(dev->flags & IFF_POINTOPOINT)) {
848 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
849 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
850 if ((dev->flags & IFF_BROADCAST) &&
851 ifa->ifa_prefixlen < 31)
852 ifa->ifa_broadcast = ifa->ifa_address |
853 ~ifa->ifa_mask;
854 } else {
855 ifa->ifa_prefixlen = 32;
856 ifa->ifa_mask = inet_make_mask(32);
857 }
858 ret = inet_set_ifa(dev, ifa);
859 break;
860
861 case SIOCSIFBRDADDR: /* Set the broadcast address */
862 ret = 0;
863 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
864 inet_del_ifa(in_dev, ifap, 0);
865 ifa->ifa_broadcast = sin->sin_addr.s_addr;
866 inet_insert_ifa(ifa);
867 }
868 break;
869
870 case SIOCSIFDSTADDR: /* Set the destination address */
871 ret = 0;
872 if (ifa->ifa_address == sin->sin_addr.s_addr)
873 break;
874 ret = -EINVAL;
875 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
876 break;
877 ret = 0;
878 inet_del_ifa(in_dev, ifap, 0);
879 ifa->ifa_address = sin->sin_addr.s_addr;
880 inet_insert_ifa(ifa);
881 break;
882
883 case SIOCSIFNETMASK: /* Set the netmask for the interface */
884
885 /*
886 * The mask we set must be legal.
887 */
888 ret = -EINVAL;
889 if (bad_mask(sin->sin_addr.s_addr, 0))
890 break;
891 ret = 0;
892 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
893 __be32 old_mask = ifa->ifa_mask;
894 inet_del_ifa(in_dev, ifap, 0);
895 ifa->ifa_mask = sin->sin_addr.s_addr;
896 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
897
898 /* See if current broadcast address matches
899 * with current netmask, then recalculate
900 * the broadcast address. Otherwise it's a
901 * funny address, so don't touch it since
902 * the user seems to know what (s)he's doing...
903 */
904 if ((dev->flags & IFF_BROADCAST) &&
905 (ifa->ifa_prefixlen < 31) &&
906 (ifa->ifa_broadcast ==
907 (ifa->ifa_local|~old_mask))) {
908 ifa->ifa_broadcast = (ifa->ifa_local |
909 ~sin->sin_addr.s_addr);
910 }
911 inet_insert_ifa(ifa);
912 }
913 break;
914 }
915 done:
916 rtnl_unlock();
917 out:
918 return ret;
919 rarok:
920 rtnl_unlock();
921 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
922 goto out;
923 }
924
925 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
926 {
927 struct in_device *in_dev = __in_dev_get_rtnl(dev);
928 struct in_ifaddr *ifa;
929 struct ifreq ifr;
930 int done = 0;
931
932 if (!in_dev)
933 goto out;
934
935 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
936 if (!buf) {
937 done += sizeof(ifr);
938 continue;
939 }
940 if (len < (int) sizeof(ifr))
941 break;
942 memset(&ifr, 0, sizeof(struct ifreq));
943 if (ifa->ifa_label)
944 strcpy(ifr.ifr_name, ifa->ifa_label);
945 else
946 strcpy(ifr.ifr_name, dev->name);
947
948 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
949 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
950 ifa->ifa_local;
951
952 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
953 done = -EFAULT;
954 break;
955 }
956 buf += sizeof(struct ifreq);
957 len -= sizeof(struct ifreq);
958 done += sizeof(struct ifreq);
959 }
960 out:
961 return done;
962 }
963
964 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
965 {
966 __be32 addr = 0;
967 struct in_device *in_dev;
968 struct net *net = dev_net(dev);
969
970 rcu_read_lock();
971 in_dev = __in_dev_get_rcu(dev);
972 if (!in_dev)
973 goto no_in_dev;
974
975 for_primary_ifa(in_dev) {
976 if (ifa->ifa_scope > scope)
977 continue;
978 if (!dst || inet_ifa_match(dst, ifa)) {
979 addr = ifa->ifa_local;
980 break;
981 }
982 if (!addr)
983 addr = ifa->ifa_local;
984 } endfor_ifa(in_dev);
985
986 if (addr)
987 goto out_unlock;
988 no_in_dev:
989
990 /* Not loopback addresses on loopback should be preferred
991 in this case. It is importnat that lo is the first interface
992 in dev_base list.
993 */
994 for_each_netdev_rcu(net, dev) {
995 in_dev = __in_dev_get_rcu(dev);
996 if (!in_dev)
997 continue;
998
999 for_primary_ifa(in_dev) {
1000 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1001 ifa->ifa_scope <= scope) {
1002 addr = ifa->ifa_local;
1003 goto out_unlock;
1004 }
1005 } endfor_ifa(in_dev);
1006 }
1007 out_unlock:
1008 rcu_read_unlock();
1009 return addr;
1010 }
1011 EXPORT_SYMBOL(inet_select_addr);
1012
1013 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1014 __be32 local, int scope)
1015 {
1016 int same = 0;
1017 __be32 addr = 0;
1018
1019 for_ifa(in_dev) {
1020 if (!addr &&
1021 (local == ifa->ifa_local || !local) &&
1022 ifa->ifa_scope <= scope) {
1023 addr = ifa->ifa_local;
1024 if (same)
1025 break;
1026 }
1027 if (!same) {
1028 same = (!local || inet_ifa_match(local, ifa)) &&
1029 (!dst || inet_ifa_match(dst, ifa));
1030 if (same && addr) {
1031 if (local || !dst)
1032 break;
1033 /* Is the selected addr into dst subnet? */
1034 if (inet_ifa_match(addr, ifa))
1035 break;
1036 /* No, then can we use new local src? */
1037 if (ifa->ifa_scope <= scope) {
1038 addr = ifa->ifa_local;
1039 break;
1040 }
1041 /* search for large dst subnet for addr */
1042 same = 0;
1043 }
1044 }
1045 } endfor_ifa(in_dev);
1046
1047 return same ? addr : 0;
1048 }
1049
1050 /*
1051 * Confirm that local IP address exists using wildcards:
1052 * - in_dev: only on this interface, 0=any interface
1053 * - dst: only in the same subnet as dst, 0=any dst
1054 * - local: address, 0=autoselect the local address
1055 * - scope: maximum allowed scope value for the local address
1056 */
1057 __be32 inet_confirm_addr(struct in_device *in_dev,
1058 __be32 dst, __be32 local, int scope)
1059 {
1060 __be32 addr = 0;
1061 struct net_device *dev;
1062 struct net *net;
1063
1064 if (scope != RT_SCOPE_LINK)
1065 return confirm_addr_indev(in_dev, dst, local, scope);
1066
1067 net = dev_net(in_dev->dev);
1068 rcu_read_lock();
1069 for_each_netdev_rcu(net, dev) {
1070 in_dev = __in_dev_get_rcu(dev);
1071 if (in_dev) {
1072 addr = confirm_addr_indev(in_dev, dst, local, scope);
1073 if (addr)
1074 break;
1075 }
1076 }
1077 rcu_read_unlock();
1078
1079 return addr;
1080 }
1081 EXPORT_SYMBOL(inet_confirm_addr);
1082
1083 /*
1084 * Device notifier
1085 */
1086
1087 int register_inetaddr_notifier(struct notifier_block *nb)
1088 {
1089 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090 }
1091 EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093 int unregister_inetaddr_notifier(struct notifier_block *nb)
1094 {
1095 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096 }
1097 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099 /* Rename ifa_labels for a device name change. Make some effort to preserve
1100 * existing alias numbering and to create unique labels if possible.
1101 */
1102 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103 {
1104 struct in_ifaddr *ifa;
1105 int named = 0;
1106
1107 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108 char old[IFNAMSIZ], *dot;
1109
1110 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112 if (named++ == 0)
1113 goto skip;
1114 dot = strchr(old, ':');
1115 if (dot == NULL) {
1116 sprintf(old, ":%d", named);
1117 dot = old;
1118 }
1119 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120 strcat(ifa->ifa_label, dot);
1121 else
1122 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123 skip:
1124 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125 }
1126 }
1127
1128 static inline bool inetdev_valid_mtu(unsigned int mtu)
1129 {
1130 return mtu >= 68;
1131 }
1132
1133 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134 struct in_device *in_dev)
1135
1136 {
1137 struct in_ifaddr *ifa;
1138
1139 for (ifa = in_dev->ifa_list; ifa;
1140 ifa = ifa->ifa_next) {
1141 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142 ifa->ifa_local, dev,
1143 ifa->ifa_local, NULL,
1144 dev->dev_addr, NULL);
1145 }
1146 }
1147
1148 /* Called only under RTNL semaphore */
1149
1150 static int inetdev_event(struct notifier_block *this, unsigned long event,
1151 void *ptr)
1152 {
1153 struct net_device *dev = ptr;
1154 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156 ASSERT_RTNL();
1157
1158 if (!in_dev) {
1159 if (event == NETDEV_REGISTER) {
1160 in_dev = inetdev_init(dev);
1161 if (!in_dev)
1162 return notifier_from_errno(-ENOMEM);
1163 if (dev->flags & IFF_LOOPBACK) {
1164 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166 }
1167 } else if (event == NETDEV_CHANGEMTU) {
1168 /* Re-enabling IP */
1169 if (inetdev_valid_mtu(dev->mtu))
1170 in_dev = inetdev_init(dev);
1171 }
1172 goto out;
1173 }
1174
1175 switch (event) {
1176 case NETDEV_REGISTER:
1177 printk(KERN_DEBUG "inetdev_event: bug\n");
1178 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179 break;
1180 case NETDEV_UP:
1181 if (!inetdev_valid_mtu(dev->mtu))
1182 break;
1183 if (dev->flags & IFF_LOOPBACK) {
1184 struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186 if (ifa) {
1187 INIT_HLIST_NODE(&ifa->hash);
1188 ifa->ifa_local =
1189 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190 ifa->ifa_prefixlen = 8;
1191 ifa->ifa_mask = inet_make_mask(8);
1192 in_dev_hold(in_dev);
1193 ifa->ifa_dev = in_dev;
1194 ifa->ifa_scope = RT_SCOPE_HOST;
1195 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196 inet_insert_ifa(ifa);
1197 }
1198 }
1199 ip_mc_up(in_dev);
1200 /* fall through */
1201 case NETDEV_CHANGEADDR:
1202 if (!IN_DEV_ARP_NOTIFY(in_dev))
1203 break;
1204 /* fall through */
1205 case NETDEV_NOTIFY_PEERS:
1206 /* Send gratuitous ARP to notify of link change */
1207 inetdev_send_gratuitous_arp(dev, in_dev);
1208 break;
1209 case NETDEV_DOWN:
1210 ip_mc_down(in_dev);
1211 break;
1212 case NETDEV_PRE_TYPE_CHANGE:
1213 ip_mc_unmap(in_dev);
1214 break;
1215 case NETDEV_POST_TYPE_CHANGE:
1216 ip_mc_remap(in_dev);
1217 break;
1218 case NETDEV_CHANGEMTU:
1219 if (inetdev_valid_mtu(dev->mtu))
1220 break;
1221 /* disable IP when MTU is not enough */
1222 case NETDEV_UNREGISTER:
1223 inetdev_destroy(in_dev);
1224 break;
1225 case NETDEV_CHANGENAME:
1226 /* Do not notify about label change, this event is
1227 * not interesting to applications using netlink.
1228 */
1229 inetdev_changename(dev, in_dev);
1230
1231 devinet_sysctl_unregister(in_dev);
1232 devinet_sysctl_register(in_dev);
1233 break;
1234 }
1235 out:
1236 return NOTIFY_DONE;
1237 }
1238
1239 static struct notifier_block ip_netdev_notifier = {
1240 .notifier_call = inetdev_event,
1241 };
1242
1243 static inline size_t inet_nlmsg_size(void)
1244 {
1245 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246 + nla_total_size(4) /* IFA_ADDRESS */
1247 + nla_total_size(4) /* IFA_LOCAL */
1248 + nla_total_size(4) /* IFA_BROADCAST */
1249 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250 }
1251
1252 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253 u32 pid, u32 seq, int event, unsigned int flags)
1254 {
1255 struct ifaddrmsg *ifm;
1256 struct nlmsghdr *nlh;
1257
1258 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259 if (nlh == NULL)
1260 return -EMSGSIZE;
1261
1262 ifm = nlmsg_data(nlh);
1263 ifm->ifa_family = AF_INET;
1264 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266 ifm->ifa_scope = ifa->ifa_scope;
1267 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269 if ((ifa->ifa_address &&
1270 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1271 (ifa->ifa_local &&
1272 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1273 (ifa->ifa_broadcast &&
1274 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1275 (ifa->ifa_label[0] &&
1276 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1277 goto nla_put_failure;
1278
1279 return nlmsg_end(skb, nlh);
1280
1281 nla_put_failure:
1282 nlmsg_cancel(skb, nlh);
1283 return -EMSGSIZE;
1284 }
1285
1286 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1287 {
1288 struct net *net = sock_net(skb->sk);
1289 int h, s_h;
1290 int idx, s_idx;
1291 int ip_idx, s_ip_idx;
1292 struct net_device *dev;
1293 struct in_device *in_dev;
1294 struct in_ifaddr *ifa;
1295 struct hlist_head *head;
1296 struct hlist_node *node;
1297
1298 s_h = cb->args[0];
1299 s_idx = idx = cb->args[1];
1300 s_ip_idx = ip_idx = cb->args[2];
1301
1302 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1303 idx = 0;
1304 head = &net->dev_index_head[h];
1305 rcu_read_lock();
1306 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1307 if (idx < s_idx)
1308 goto cont;
1309 if (h > s_h || idx > s_idx)
1310 s_ip_idx = 0;
1311 in_dev = __in_dev_get_rcu(dev);
1312 if (!in_dev)
1313 goto cont;
1314
1315 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1316 ifa = ifa->ifa_next, ip_idx++) {
1317 if (ip_idx < s_ip_idx)
1318 continue;
1319 if (inet_fill_ifaddr(skb, ifa,
1320 NETLINK_CB(cb->skb).pid,
1321 cb->nlh->nlmsg_seq,
1322 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1323 rcu_read_unlock();
1324 goto done;
1325 }
1326 }
1327 cont:
1328 idx++;
1329 }
1330 rcu_read_unlock();
1331 }
1332
1333 done:
1334 cb->args[0] = h;
1335 cb->args[1] = idx;
1336 cb->args[2] = ip_idx;
1337
1338 return skb->len;
1339 }
1340
1341 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1342 u32 pid)
1343 {
1344 struct sk_buff *skb;
1345 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1346 int err = -ENOBUFS;
1347 struct net *net;
1348
1349 net = dev_net(ifa->ifa_dev->dev);
1350 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1351 if (skb == NULL)
1352 goto errout;
1353
1354 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1355 if (err < 0) {
1356 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1357 WARN_ON(err == -EMSGSIZE);
1358 kfree_skb(skb);
1359 goto errout;
1360 }
1361 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1362 return;
1363 errout:
1364 if (err < 0)
1365 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1366 }
1367
1368 static size_t inet_get_link_af_size(const struct net_device *dev)
1369 {
1370 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1371
1372 if (!in_dev)
1373 return 0;
1374
1375 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1376 }
1377
1378 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1379 {
1380 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1381 struct nlattr *nla;
1382 int i;
1383
1384 if (!in_dev)
1385 return -ENODATA;
1386
1387 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1388 if (nla == NULL)
1389 return -EMSGSIZE;
1390
1391 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1392 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1393
1394 return 0;
1395 }
1396
1397 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1398 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1399 };
1400
1401 static int inet_validate_link_af(const struct net_device *dev,
1402 const struct nlattr *nla)
1403 {
1404 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1405 int err, rem;
1406
1407 if (dev && !__in_dev_get_rtnl(dev))
1408 return -EAFNOSUPPORT;
1409
1410 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1411 if (err < 0)
1412 return err;
1413
1414 if (tb[IFLA_INET_CONF]) {
1415 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1416 int cfgid = nla_type(a);
1417
1418 if (nla_len(a) < 4)
1419 return -EINVAL;
1420
1421 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1422 return -EINVAL;
1423 }
1424 }
1425
1426 return 0;
1427 }
1428
1429 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1430 {
1431 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1432 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1433 int rem;
1434
1435 if (!in_dev)
1436 return -EAFNOSUPPORT;
1437
1438 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1439 BUG();
1440
1441 if (tb[IFLA_INET_CONF]) {
1442 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1443 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1444 }
1445
1446 return 0;
1447 }
1448
1449 #ifdef CONFIG_SYSCTL
1450
1451 static void devinet_copy_dflt_conf(struct net *net, int i)
1452 {
1453 struct net_device *dev;
1454
1455 rcu_read_lock();
1456 for_each_netdev_rcu(net, dev) {
1457 struct in_device *in_dev;
1458
1459 in_dev = __in_dev_get_rcu(dev);
1460 if (in_dev && !test_bit(i, in_dev->cnf.state))
1461 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1462 }
1463 rcu_read_unlock();
1464 }
1465
1466 /* called with RTNL locked */
1467 static void inet_forward_change(struct net *net)
1468 {
1469 struct net_device *dev;
1470 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1471
1472 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1473 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1474
1475 for_each_netdev(net, dev) {
1476 struct in_device *in_dev;
1477 if (on)
1478 dev_disable_lro(dev);
1479 rcu_read_lock();
1480 in_dev = __in_dev_get_rcu(dev);
1481 if (in_dev)
1482 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1483 rcu_read_unlock();
1484 }
1485 }
1486
1487 static int devinet_conf_proc(ctl_table *ctl, int write,
1488 void __user *buffer,
1489 size_t *lenp, loff_t *ppos)
1490 {
1491 int old_value = *(int *)ctl->data;
1492 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1493 int new_value = *(int *)ctl->data;
1494
1495 if (write) {
1496 struct ipv4_devconf *cnf = ctl->extra1;
1497 struct net *net = ctl->extra2;
1498 int i = (int *)ctl->data - cnf->data;
1499
1500 set_bit(i, cnf->state);
1501
1502 if (cnf == net->ipv4.devconf_dflt)
1503 devinet_copy_dflt_conf(net, i);
1504 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1505 if ((new_value == 0) && (old_value != 0))
1506 rt_cache_flush(net, 0);
1507 }
1508
1509 return ret;
1510 }
1511
1512 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1513 void __user *buffer,
1514 size_t *lenp, loff_t *ppos)
1515 {
1516 int *valp = ctl->data;
1517 int val = *valp;
1518 loff_t pos = *ppos;
1519 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1520
1521 if (write && *valp != val) {
1522 struct net *net = ctl->extra2;
1523
1524 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1525 if (!rtnl_trylock()) {
1526 /* Restore the original values before restarting */
1527 *valp = val;
1528 *ppos = pos;
1529 return restart_syscall();
1530 }
1531 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1532 inet_forward_change(net);
1533 } else if (*valp) {
1534 struct ipv4_devconf *cnf = ctl->extra1;
1535 struct in_device *idev =
1536 container_of(cnf, struct in_device, cnf);
1537 dev_disable_lro(idev->dev);
1538 }
1539 rtnl_unlock();
1540 rt_cache_flush(net, 0);
1541 }
1542 }
1543
1544 return ret;
1545 }
1546
1547 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1548 void __user *buffer,
1549 size_t *lenp, loff_t *ppos)
1550 {
1551 int *valp = ctl->data;
1552 int val = *valp;
1553 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1554 struct net *net = ctl->extra2;
1555
1556 if (write && *valp != val)
1557 rt_cache_flush(net, 0);
1558
1559 return ret;
1560 }
1561
1562 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1563 { \
1564 .procname = name, \
1565 .data = ipv4_devconf.data + \
1566 IPV4_DEVCONF_ ## attr - 1, \
1567 .maxlen = sizeof(int), \
1568 .mode = mval, \
1569 .proc_handler = proc, \
1570 .extra1 = &ipv4_devconf, \
1571 }
1572
1573 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1574 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1575
1576 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1577 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1578
1579 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1580 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1581
1582 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1583 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1584
1585 static struct devinet_sysctl_table {
1586 struct ctl_table_header *sysctl_header;
1587 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1588 char *dev_name;
1589 } devinet_sysctl = {
1590 .devinet_vars = {
1591 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1592 devinet_sysctl_forward),
1593 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1594
1595 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1596 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1597 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1598 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1599 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1600 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1601 "accept_source_route"),
1602 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1603 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1604 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1605 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1606 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1607 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1608 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1609 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1610 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1611 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1612 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1613 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1614 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1615
1616 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1617 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1618 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1619 "force_igmp_version"),
1620 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1621 "promote_secondaries"),
1622 },
1623 };
1624
1625 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1626 struct ipv4_devconf *p)
1627 {
1628 int i;
1629 struct devinet_sysctl_table *t;
1630
1631 #define DEVINET_CTL_PATH_DEV 3
1632
1633 struct ctl_path devinet_ctl_path[] = {
1634 { .procname = "net", },
1635 { .procname = "ipv4", },
1636 { .procname = "conf", },
1637 { /* to be set */ },
1638 { },
1639 };
1640
1641 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1642 if (!t)
1643 goto out;
1644
1645 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1646 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1647 t->devinet_vars[i].extra1 = p;
1648 t->devinet_vars[i].extra2 = net;
1649 }
1650
1651 /*
1652 * Make a copy of dev_name, because '.procname' is regarded as const
1653 * by sysctl and we wouldn't want anyone to change it under our feet
1654 * (see SIOCSIFNAME).
1655 */
1656 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1657 if (!t->dev_name)
1658 goto free;
1659
1660 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1661
1662 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1663 t->devinet_vars);
1664 if (!t->sysctl_header)
1665 goto free_procname;
1666
1667 p->sysctl = t;
1668 return 0;
1669
1670 free_procname:
1671 kfree(t->dev_name);
1672 free:
1673 kfree(t);
1674 out:
1675 return -ENOBUFS;
1676 }
1677
1678 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1679 {
1680 struct devinet_sysctl_table *t = cnf->sysctl;
1681
1682 if (t == NULL)
1683 return;
1684
1685 cnf->sysctl = NULL;
1686 unregister_net_sysctl_table(t->sysctl_header);
1687 kfree(t->dev_name);
1688 kfree(t);
1689 }
1690
1691 static void devinet_sysctl_register(struct in_device *idev)
1692 {
1693 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1694 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1695 &idev->cnf);
1696 }
1697
1698 static void devinet_sysctl_unregister(struct in_device *idev)
1699 {
1700 __devinet_sysctl_unregister(&idev->cnf);
1701 neigh_sysctl_unregister(idev->arp_parms);
1702 }
1703
1704 static struct ctl_table ctl_forward_entry[] = {
1705 {
1706 .procname = "ip_forward",
1707 .data = &ipv4_devconf.data[
1708 IPV4_DEVCONF_FORWARDING - 1],
1709 .maxlen = sizeof(int),
1710 .mode = 0644,
1711 .proc_handler = devinet_sysctl_forward,
1712 .extra1 = &ipv4_devconf,
1713 .extra2 = &init_net,
1714 },
1715 { },
1716 };
1717
1718 static __net_initdata struct ctl_path net_ipv4_path[] = {
1719 { .procname = "net", },
1720 { .procname = "ipv4", },
1721 { },
1722 };
1723 #endif
1724
1725 static __net_init int devinet_init_net(struct net *net)
1726 {
1727 int err;
1728 struct ipv4_devconf *all, *dflt;
1729 #ifdef CONFIG_SYSCTL
1730 struct ctl_table *tbl = ctl_forward_entry;
1731 struct ctl_table_header *forw_hdr;
1732 #endif
1733
1734 err = -ENOMEM;
1735 all = &ipv4_devconf;
1736 dflt = &ipv4_devconf_dflt;
1737
1738 if (!net_eq(net, &init_net)) {
1739 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1740 if (all == NULL)
1741 goto err_alloc_all;
1742
1743 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1744 if (dflt == NULL)
1745 goto err_alloc_dflt;
1746
1747 #ifdef CONFIG_SYSCTL
1748 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1749 if (tbl == NULL)
1750 goto err_alloc_ctl;
1751
1752 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1753 tbl[0].extra1 = all;
1754 tbl[0].extra2 = net;
1755 #endif
1756 }
1757
1758 #ifdef CONFIG_SYSCTL
1759 err = __devinet_sysctl_register(net, "all", all);
1760 if (err < 0)
1761 goto err_reg_all;
1762
1763 err = __devinet_sysctl_register(net, "default", dflt);
1764 if (err < 0)
1765 goto err_reg_dflt;
1766
1767 err = -ENOMEM;
1768 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1769 if (forw_hdr == NULL)
1770 goto err_reg_ctl;
1771 net->ipv4.forw_hdr = forw_hdr;
1772 #endif
1773
1774 net->ipv4.devconf_all = all;
1775 net->ipv4.devconf_dflt = dflt;
1776 return 0;
1777
1778 #ifdef CONFIG_SYSCTL
1779 err_reg_ctl:
1780 __devinet_sysctl_unregister(dflt);
1781 err_reg_dflt:
1782 __devinet_sysctl_unregister(all);
1783 err_reg_all:
1784 if (tbl != ctl_forward_entry)
1785 kfree(tbl);
1786 err_alloc_ctl:
1787 #endif
1788 if (dflt != &ipv4_devconf_dflt)
1789 kfree(dflt);
1790 err_alloc_dflt:
1791 if (all != &ipv4_devconf)
1792 kfree(all);
1793 err_alloc_all:
1794 return err;
1795 }
1796
1797 static __net_exit void devinet_exit_net(struct net *net)
1798 {
1799 #ifdef CONFIG_SYSCTL
1800 struct ctl_table *tbl;
1801
1802 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1803 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1804 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1805 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1806 kfree(tbl);
1807 #endif
1808 kfree(net->ipv4.devconf_dflt);
1809 kfree(net->ipv4.devconf_all);
1810 }
1811
1812 static __net_initdata struct pernet_operations devinet_ops = {
1813 .init = devinet_init_net,
1814 .exit = devinet_exit_net,
1815 };
1816
1817 static struct rtnl_af_ops inet_af_ops = {
1818 .family = AF_INET,
1819 .fill_link_af = inet_fill_link_af,
1820 .get_link_af_size = inet_get_link_af_size,
1821 .validate_link_af = inet_validate_link_af,
1822 .set_link_af = inet_set_link_af,
1823 };
1824
1825 void __init devinet_init(void)
1826 {
1827 int i;
1828
1829 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1830 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1831
1832 register_pernet_subsys(&devinet_ops);
1833
1834 register_gifconf(PF_INET, inet_gifconf);
1835 register_netdevice_notifier(&ip_netdev_notifier);
1836
1837 rtnl_af_register(&inet_af_ops);
1838
1839 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1840 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1841 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1842 }
1843
This page took 0.065284 seconds and 4 git commands to generate.