Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux...
[deliverable/linux.git] / net / ipv4 / devinet.c
1 /*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68 .data = {
69 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
73 },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 .data = {
78 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95
96 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
97 * value. So if you change this define, make appropriate changes to
98 * inet_addr_hash as well.
99 */
100 #define IN4_ADDR_HSIZE 256
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
105 {
106 u32 val = (__force u32) addr ^ hash_ptr(net, 8);
107
108 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
109 (IN4_ADDR_HSIZE - 1));
110 }
111
112 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
113 {
114 unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
115
116 spin_lock(&inet_addr_hash_lock);
117 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
118 spin_unlock(&inet_addr_hash_lock);
119 }
120
121 static void inet_hash_remove(struct in_ifaddr *ifa)
122 {
123 spin_lock(&inet_addr_hash_lock);
124 hlist_del_init_rcu(&ifa->hash);
125 spin_unlock(&inet_addr_hash_lock);
126 }
127
128 /**
129 * __ip_dev_find - find the first device with a given source address.
130 * @net: the net namespace
131 * @addr: the source address
132 * @devref: if true, take a reference on the found device
133 *
134 * If a caller uses devref=false, it should be protected by RCU, or RTNL
135 */
136 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
137 {
138 unsigned int hash = inet_addr_hash(net, addr);
139 struct net_device *result = NULL;
140 struct in_ifaddr *ifa;
141 struct hlist_node *node;
142
143 rcu_read_lock();
144 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
145 struct net_device *dev = ifa->ifa_dev->dev;
146
147 if (!net_eq(dev_net(dev), net))
148 continue;
149 if (ifa->ifa_local == addr) {
150 result = dev;
151 break;
152 }
153 }
154 if (result && devref)
155 dev_hold(result);
156 rcu_read_unlock();
157 return result;
158 }
159 EXPORT_SYMBOL(__ip_dev_find);
160
161 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
162
163 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
164 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
165 int destroy);
166 #ifdef CONFIG_SYSCTL
167 static void devinet_sysctl_register(struct in_device *idev);
168 static void devinet_sysctl_unregister(struct in_device *idev);
169 #else
170 static inline void devinet_sysctl_register(struct in_device *idev)
171 {
172 }
173 static inline void devinet_sysctl_unregister(struct in_device *idev)
174 {
175 }
176 #endif
177
178 /* Locks all the inet devices. */
179
180 static struct in_ifaddr *inet_alloc_ifa(void)
181 {
182 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
183 }
184
185 static void inet_rcu_free_ifa(struct rcu_head *head)
186 {
187 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
188 if (ifa->ifa_dev)
189 in_dev_put(ifa->ifa_dev);
190 kfree(ifa);
191 }
192
193 static inline void inet_free_ifa(struct in_ifaddr *ifa)
194 {
195 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
196 }
197
198 void in_dev_finish_destroy(struct in_device *idev)
199 {
200 struct net_device *dev = idev->dev;
201
202 WARN_ON(idev->ifa_list);
203 WARN_ON(idev->mc_list);
204 #ifdef NET_REFCNT_DEBUG
205 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
206 idev, dev ? dev->name : "NIL");
207 #endif
208 dev_put(dev);
209 if (!idev->dead)
210 pr_err("Freeing alive in_device %p\n", idev);
211 else
212 kfree(idev);
213 }
214 EXPORT_SYMBOL(in_dev_finish_destroy);
215
216 static struct in_device *inetdev_init(struct net_device *dev)
217 {
218 struct in_device *in_dev;
219
220 ASSERT_RTNL();
221
222 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
223 if (!in_dev)
224 goto out;
225 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
226 sizeof(in_dev->cnf));
227 in_dev->cnf.sysctl = NULL;
228 in_dev->dev = dev;
229 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
230 if (!in_dev->arp_parms)
231 goto out_kfree;
232 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
233 dev_disable_lro(dev);
234 /* Reference in_dev->dev */
235 dev_hold(dev);
236 /* Account for reference dev->ip_ptr (below) */
237 in_dev_hold(in_dev);
238
239 devinet_sysctl_register(in_dev);
240 ip_mc_init_dev(in_dev);
241 if (dev->flags & IFF_UP)
242 ip_mc_up(in_dev);
243
244 /* we can receive as soon as ip_ptr is set -- do this last */
245 rcu_assign_pointer(dev->ip_ptr, in_dev);
246 out:
247 return in_dev;
248 out_kfree:
249 kfree(in_dev);
250 in_dev = NULL;
251 goto out;
252 }
253
254 static void in_dev_rcu_put(struct rcu_head *head)
255 {
256 struct in_device *idev = container_of(head, struct in_device, rcu_head);
257 in_dev_put(idev);
258 }
259
260 static void inetdev_destroy(struct in_device *in_dev)
261 {
262 struct in_ifaddr *ifa;
263 struct net_device *dev;
264
265 ASSERT_RTNL();
266
267 dev = in_dev->dev;
268
269 in_dev->dead = 1;
270
271 ip_mc_destroy_dev(in_dev);
272
273 while ((ifa = in_dev->ifa_list) != NULL) {
274 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
275 inet_free_ifa(ifa);
276 }
277
278 rcu_assign_pointer(dev->ip_ptr, NULL);
279
280 devinet_sysctl_unregister(in_dev);
281 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282 arp_ifdown(dev);
283
284 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
285 }
286
287 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
288 {
289 rcu_read_lock();
290 for_primary_ifa(in_dev) {
291 if (inet_ifa_match(a, ifa)) {
292 if (!b || inet_ifa_match(b, ifa)) {
293 rcu_read_unlock();
294 return 1;
295 }
296 }
297 } endfor_ifa(in_dev);
298 rcu_read_unlock();
299 return 0;
300 }
301
302 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
303 int destroy, struct nlmsghdr *nlh, u32 pid)
304 {
305 struct in_ifaddr *promote = NULL;
306 struct in_ifaddr *ifa, *ifa1 = *ifap;
307 struct in_ifaddr *last_prim = in_dev->ifa_list;
308 struct in_ifaddr *prev_prom = NULL;
309 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
310
311 ASSERT_RTNL();
312
313 /* 1. Deleting primary ifaddr forces deletion all secondaries
314 * unless alias promotion is set
315 **/
316
317 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
318 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
319
320 while ((ifa = *ifap1) != NULL) {
321 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
322 ifa1->ifa_scope <= ifa->ifa_scope)
323 last_prim = ifa;
324
325 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
326 ifa1->ifa_mask != ifa->ifa_mask ||
327 !inet_ifa_match(ifa1->ifa_address, ifa)) {
328 ifap1 = &ifa->ifa_next;
329 prev_prom = ifa;
330 continue;
331 }
332
333 if (!do_promote) {
334 inet_hash_remove(ifa);
335 *ifap1 = ifa->ifa_next;
336
337 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
338 blocking_notifier_call_chain(&inetaddr_chain,
339 NETDEV_DOWN, ifa);
340 inet_free_ifa(ifa);
341 } else {
342 promote = ifa;
343 break;
344 }
345 }
346 }
347
348 /* 2. Unlink it */
349
350 *ifap = ifa1->ifa_next;
351 inet_hash_remove(ifa1);
352
353 /* 3. Announce address deletion */
354
355 /* Send message first, then call notifier.
356 At first sight, FIB update triggered by notifier
357 will refer to already deleted ifaddr, that could confuse
358 netlink listeners. It is not true: look, gated sees
359 that route deleted and if it still thinks that ifaddr
360 is valid, it will try to restore deleted routes... Grr.
361 So that, this order is correct.
362 */
363 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
364 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
365
366 if (promote) {
367
368 if (prev_prom) {
369 prev_prom->ifa_next = promote->ifa_next;
370 promote->ifa_next = last_prim->ifa_next;
371 last_prim->ifa_next = promote;
372 }
373
374 promote->ifa_flags &= ~IFA_F_SECONDARY;
375 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
376 blocking_notifier_call_chain(&inetaddr_chain,
377 NETDEV_UP, promote);
378 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
379 if (ifa1->ifa_mask != ifa->ifa_mask ||
380 !inet_ifa_match(ifa1->ifa_address, ifa))
381 continue;
382 fib_add_ifaddr(ifa);
383 }
384
385 }
386 if (destroy)
387 inet_free_ifa(ifa1);
388 }
389
390 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
391 int destroy)
392 {
393 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
394 }
395
396 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
397 u32 pid)
398 {
399 struct in_device *in_dev = ifa->ifa_dev;
400 struct in_ifaddr *ifa1, **ifap, **last_primary;
401
402 ASSERT_RTNL();
403
404 if (!ifa->ifa_local) {
405 inet_free_ifa(ifa);
406 return 0;
407 }
408
409 ifa->ifa_flags &= ~IFA_F_SECONDARY;
410 last_primary = &in_dev->ifa_list;
411
412 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
413 ifap = &ifa1->ifa_next) {
414 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
415 ifa->ifa_scope <= ifa1->ifa_scope)
416 last_primary = &ifa1->ifa_next;
417 if (ifa1->ifa_mask == ifa->ifa_mask &&
418 inet_ifa_match(ifa1->ifa_address, ifa)) {
419 if (ifa1->ifa_local == ifa->ifa_local) {
420 inet_free_ifa(ifa);
421 return -EEXIST;
422 }
423 if (ifa1->ifa_scope != ifa->ifa_scope) {
424 inet_free_ifa(ifa);
425 return -EINVAL;
426 }
427 ifa->ifa_flags |= IFA_F_SECONDARY;
428 }
429 }
430
431 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
432 net_srandom(ifa->ifa_local);
433 ifap = last_primary;
434 }
435
436 ifa->ifa_next = *ifap;
437 *ifap = ifa;
438
439 inet_hash_insert(dev_net(in_dev->dev), ifa);
440
441 /* Send message first, then call notifier.
442 Notifier will trigger FIB update, so that
443 listeners of netlink will know about new ifaddr */
444 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
445 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
446
447 return 0;
448 }
449
450 static int inet_insert_ifa(struct in_ifaddr *ifa)
451 {
452 return __inet_insert_ifa(ifa, NULL, 0);
453 }
454
455 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
456 {
457 struct in_device *in_dev = __in_dev_get_rtnl(dev);
458
459 ASSERT_RTNL();
460
461 if (!in_dev) {
462 inet_free_ifa(ifa);
463 return -ENOBUFS;
464 }
465 ipv4_devconf_setall(in_dev);
466 if (ifa->ifa_dev != in_dev) {
467 WARN_ON(ifa->ifa_dev);
468 in_dev_hold(in_dev);
469 ifa->ifa_dev = in_dev;
470 }
471 if (ipv4_is_loopback(ifa->ifa_local))
472 ifa->ifa_scope = RT_SCOPE_HOST;
473 return inet_insert_ifa(ifa);
474 }
475
476 /* Caller must hold RCU or RTNL :
477 * We dont take a reference on found in_device
478 */
479 struct in_device *inetdev_by_index(struct net *net, int ifindex)
480 {
481 struct net_device *dev;
482 struct in_device *in_dev = NULL;
483
484 rcu_read_lock();
485 dev = dev_get_by_index_rcu(net, ifindex);
486 if (dev)
487 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
488 rcu_read_unlock();
489 return in_dev;
490 }
491 EXPORT_SYMBOL(inetdev_by_index);
492
493 /* Called only from RTNL semaphored context. No locks. */
494
495 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
496 __be32 mask)
497 {
498 ASSERT_RTNL();
499
500 for_primary_ifa(in_dev) {
501 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
502 return ifa;
503 } endfor_ifa(in_dev);
504 return NULL;
505 }
506
507 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508 {
509 struct net *net = sock_net(skb->sk);
510 struct nlattr *tb[IFA_MAX+1];
511 struct in_device *in_dev;
512 struct ifaddrmsg *ifm;
513 struct in_ifaddr *ifa, **ifap;
514 int err = -EINVAL;
515
516 ASSERT_RTNL();
517
518 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
519 if (err < 0)
520 goto errout;
521
522 ifm = nlmsg_data(nlh);
523 in_dev = inetdev_by_index(net, ifm->ifa_index);
524 if (in_dev == NULL) {
525 err = -ENODEV;
526 goto errout;
527 }
528
529 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
530 ifap = &ifa->ifa_next) {
531 if (tb[IFA_LOCAL] &&
532 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
533 continue;
534
535 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
536 continue;
537
538 if (tb[IFA_ADDRESS] &&
539 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
540 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
541 continue;
542
543 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
544 return 0;
545 }
546
547 err = -EADDRNOTAVAIL;
548 errout:
549 return err;
550 }
551
552 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
553 {
554 struct nlattr *tb[IFA_MAX+1];
555 struct in_ifaddr *ifa;
556 struct ifaddrmsg *ifm;
557 struct net_device *dev;
558 struct in_device *in_dev;
559 int err;
560
561 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
562 if (err < 0)
563 goto errout;
564
565 ifm = nlmsg_data(nlh);
566 err = -EINVAL;
567 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
568 goto errout;
569
570 dev = __dev_get_by_index(net, ifm->ifa_index);
571 err = -ENODEV;
572 if (dev == NULL)
573 goto errout;
574
575 in_dev = __in_dev_get_rtnl(dev);
576 err = -ENOBUFS;
577 if (in_dev == NULL)
578 goto errout;
579
580 ifa = inet_alloc_ifa();
581 if (ifa == NULL)
582 /*
583 * A potential indev allocation can be left alive, it stays
584 * assigned to its device and is destroy with it.
585 */
586 goto errout;
587
588 ipv4_devconf_setall(in_dev);
589 in_dev_hold(in_dev);
590
591 if (tb[IFA_ADDRESS] == NULL)
592 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
593
594 INIT_HLIST_NODE(&ifa->hash);
595 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
596 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
597 ifa->ifa_flags = ifm->ifa_flags;
598 ifa->ifa_scope = ifm->ifa_scope;
599 ifa->ifa_dev = in_dev;
600
601 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
602 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
603
604 if (tb[IFA_BROADCAST])
605 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
606
607 if (tb[IFA_LABEL])
608 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
609 else
610 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
611
612 return ifa;
613
614 errout:
615 return ERR_PTR(err);
616 }
617
618 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
619 {
620 struct net *net = sock_net(skb->sk);
621 struct in_ifaddr *ifa;
622
623 ASSERT_RTNL();
624
625 ifa = rtm_to_ifaddr(net, nlh);
626 if (IS_ERR(ifa))
627 return PTR_ERR(ifa);
628
629 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
630 }
631
632 /*
633 * Determine a default network mask, based on the IP address.
634 */
635
636 static inline int inet_abc_len(__be32 addr)
637 {
638 int rc = -1; /* Something else, probably a multicast. */
639
640 if (ipv4_is_zeronet(addr))
641 rc = 0;
642 else {
643 __u32 haddr = ntohl(addr);
644
645 if (IN_CLASSA(haddr))
646 rc = 8;
647 else if (IN_CLASSB(haddr))
648 rc = 16;
649 else if (IN_CLASSC(haddr))
650 rc = 24;
651 }
652
653 return rc;
654 }
655
656
657 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
658 {
659 struct ifreq ifr;
660 struct sockaddr_in sin_orig;
661 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
662 struct in_device *in_dev;
663 struct in_ifaddr **ifap = NULL;
664 struct in_ifaddr *ifa = NULL;
665 struct net_device *dev;
666 char *colon;
667 int ret = -EFAULT;
668 int tryaddrmatch = 0;
669
670 /*
671 * Fetch the caller's info block into kernel space
672 */
673
674 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
675 goto out;
676 ifr.ifr_name[IFNAMSIZ - 1] = 0;
677
678 /* save original address for comparison */
679 memcpy(&sin_orig, sin, sizeof(*sin));
680
681 colon = strchr(ifr.ifr_name, ':');
682 if (colon)
683 *colon = 0;
684
685 dev_load(net, ifr.ifr_name);
686
687 switch (cmd) {
688 case SIOCGIFADDR: /* Get interface address */
689 case SIOCGIFBRDADDR: /* Get the broadcast address */
690 case SIOCGIFDSTADDR: /* Get the destination address */
691 case SIOCGIFNETMASK: /* Get the netmask for the interface */
692 /* Note that these ioctls will not sleep,
693 so that we do not impose a lock.
694 One day we will be forced to put shlock here (I mean SMP)
695 */
696 tryaddrmatch = (sin_orig.sin_family == AF_INET);
697 memset(sin, 0, sizeof(*sin));
698 sin->sin_family = AF_INET;
699 break;
700
701 case SIOCSIFFLAGS:
702 ret = -EACCES;
703 if (!capable(CAP_NET_ADMIN))
704 goto out;
705 break;
706 case SIOCSIFADDR: /* Set interface address (and family) */
707 case SIOCSIFBRDADDR: /* Set the broadcast address */
708 case SIOCSIFDSTADDR: /* Set the destination address */
709 case SIOCSIFNETMASK: /* Set the netmask for the interface */
710 ret = -EACCES;
711 if (!capable(CAP_NET_ADMIN))
712 goto out;
713 ret = -EINVAL;
714 if (sin->sin_family != AF_INET)
715 goto out;
716 break;
717 default:
718 ret = -EINVAL;
719 goto out;
720 }
721
722 rtnl_lock();
723
724 ret = -ENODEV;
725 dev = __dev_get_by_name(net, ifr.ifr_name);
726 if (!dev)
727 goto done;
728
729 if (colon)
730 *colon = ':';
731
732 in_dev = __in_dev_get_rtnl(dev);
733 if (in_dev) {
734 if (tryaddrmatch) {
735 /* Matthias Andree */
736 /* compare label and address (4.4BSD style) */
737 /* note: we only do this for a limited set of ioctls
738 and only if the original address family was AF_INET.
739 This is checked above. */
740 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
741 ifap = &ifa->ifa_next) {
742 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
743 sin_orig.sin_addr.s_addr ==
744 ifa->ifa_local) {
745 break; /* found */
746 }
747 }
748 }
749 /* we didn't get a match, maybe the application is
750 4.3BSD-style and passed in junk so we fall back to
751 comparing just the label */
752 if (!ifa) {
753 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
754 ifap = &ifa->ifa_next)
755 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
756 break;
757 }
758 }
759
760 ret = -EADDRNOTAVAIL;
761 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
762 goto done;
763
764 switch (cmd) {
765 case SIOCGIFADDR: /* Get interface address */
766 sin->sin_addr.s_addr = ifa->ifa_local;
767 goto rarok;
768
769 case SIOCGIFBRDADDR: /* Get the broadcast address */
770 sin->sin_addr.s_addr = ifa->ifa_broadcast;
771 goto rarok;
772
773 case SIOCGIFDSTADDR: /* Get the destination address */
774 sin->sin_addr.s_addr = ifa->ifa_address;
775 goto rarok;
776
777 case SIOCGIFNETMASK: /* Get the netmask for the interface */
778 sin->sin_addr.s_addr = ifa->ifa_mask;
779 goto rarok;
780
781 case SIOCSIFFLAGS:
782 if (colon) {
783 ret = -EADDRNOTAVAIL;
784 if (!ifa)
785 break;
786 ret = 0;
787 if (!(ifr.ifr_flags & IFF_UP))
788 inet_del_ifa(in_dev, ifap, 1);
789 break;
790 }
791 ret = dev_change_flags(dev, ifr.ifr_flags);
792 break;
793
794 case SIOCSIFADDR: /* Set interface address (and family) */
795 ret = -EINVAL;
796 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
797 break;
798
799 if (!ifa) {
800 ret = -ENOBUFS;
801 ifa = inet_alloc_ifa();
802 INIT_HLIST_NODE(&ifa->hash);
803 if (!ifa)
804 break;
805 if (colon)
806 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
807 else
808 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809 } else {
810 ret = 0;
811 if (ifa->ifa_local == sin->sin_addr.s_addr)
812 break;
813 inet_del_ifa(in_dev, ifap, 0);
814 ifa->ifa_broadcast = 0;
815 ifa->ifa_scope = 0;
816 }
817
818 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
819
820 if (!(dev->flags & IFF_POINTOPOINT)) {
821 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
822 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
823 if ((dev->flags & IFF_BROADCAST) &&
824 ifa->ifa_prefixlen < 31)
825 ifa->ifa_broadcast = ifa->ifa_address |
826 ~ifa->ifa_mask;
827 } else {
828 ifa->ifa_prefixlen = 32;
829 ifa->ifa_mask = inet_make_mask(32);
830 }
831 ret = inet_set_ifa(dev, ifa);
832 break;
833
834 case SIOCSIFBRDADDR: /* Set the broadcast address */
835 ret = 0;
836 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
837 inet_del_ifa(in_dev, ifap, 0);
838 ifa->ifa_broadcast = sin->sin_addr.s_addr;
839 inet_insert_ifa(ifa);
840 }
841 break;
842
843 case SIOCSIFDSTADDR: /* Set the destination address */
844 ret = 0;
845 if (ifa->ifa_address == sin->sin_addr.s_addr)
846 break;
847 ret = -EINVAL;
848 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
849 break;
850 ret = 0;
851 inet_del_ifa(in_dev, ifap, 0);
852 ifa->ifa_address = sin->sin_addr.s_addr;
853 inet_insert_ifa(ifa);
854 break;
855
856 case SIOCSIFNETMASK: /* Set the netmask for the interface */
857
858 /*
859 * The mask we set must be legal.
860 */
861 ret = -EINVAL;
862 if (bad_mask(sin->sin_addr.s_addr, 0))
863 break;
864 ret = 0;
865 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
866 __be32 old_mask = ifa->ifa_mask;
867 inet_del_ifa(in_dev, ifap, 0);
868 ifa->ifa_mask = sin->sin_addr.s_addr;
869 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
870
871 /* See if current broadcast address matches
872 * with current netmask, then recalculate
873 * the broadcast address. Otherwise it's a
874 * funny address, so don't touch it since
875 * the user seems to know what (s)he's doing...
876 */
877 if ((dev->flags & IFF_BROADCAST) &&
878 (ifa->ifa_prefixlen < 31) &&
879 (ifa->ifa_broadcast ==
880 (ifa->ifa_local|~old_mask))) {
881 ifa->ifa_broadcast = (ifa->ifa_local |
882 ~sin->sin_addr.s_addr);
883 }
884 inet_insert_ifa(ifa);
885 }
886 break;
887 }
888 done:
889 rtnl_unlock();
890 out:
891 return ret;
892 rarok:
893 rtnl_unlock();
894 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
895 goto out;
896 }
897
898 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
899 {
900 struct in_device *in_dev = __in_dev_get_rtnl(dev);
901 struct in_ifaddr *ifa;
902 struct ifreq ifr;
903 int done = 0;
904
905 if (!in_dev)
906 goto out;
907
908 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
909 if (!buf) {
910 done += sizeof(ifr);
911 continue;
912 }
913 if (len < (int) sizeof(ifr))
914 break;
915 memset(&ifr, 0, sizeof(struct ifreq));
916 if (ifa->ifa_label)
917 strcpy(ifr.ifr_name, ifa->ifa_label);
918 else
919 strcpy(ifr.ifr_name, dev->name);
920
921 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
922 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
923 ifa->ifa_local;
924
925 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
926 done = -EFAULT;
927 break;
928 }
929 buf += sizeof(struct ifreq);
930 len -= sizeof(struct ifreq);
931 done += sizeof(struct ifreq);
932 }
933 out:
934 return done;
935 }
936
937 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
938 {
939 __be32 addr = 0;
940 struct in_device *in_dev;
941 struct net *net = dev_net(dev);
942
943 rcu_read_lock();
944 in_dev = __in_dev_get_rcu(dev);
945 if (!in_dev)
946 goto no_in_dev;
947
948 for_primary_ifa(in_dev) {
949 if (ifa->ifa_scope > scope)
950 continue;
951 if (!dst || inet_ifa_match(dst, ifa)) {
952 addr = ifa->ifa_local;
953 break;
954 }
955 if (!addr)
956 addr = ifa->ifa_local;
957 } endfor_ifa(in_dev);
958
959 if (addr)
960 goto out_unlock;
961 no_in_dev:
962
963 /* Not loopback addresses on loopback should be preferred
964 in this case. It is importnat that lo is the first interface
965 in dev_base list.
966 */
967 for_each_netdev_rcu(net, dev) {
968 in_dev = __in_dev_get_rcu(dev);
969 if (!in_dev)
970 continue;
971
972 for_primary_ifa(in_dev) {
973 if (ifa->ifa_scope != RT_SCOPE_LINK &&
974 ifa->ifa_scope <= scope) {
975 addr = ifa->ifa_local;
976 goto out_unlock;
977 }
978 } endfor_ifa(in_dev);
979 }
980 out_unlock:
981 rcu_read_unlock();
982 return addr;
983 }
984 EXPORT_SYMBOL(inet_select_addr);
985
986 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
987 __be32 local, int scope)
988 {
989 int same = 0;
990 __be32 addr = 0;
991
992 for_ifa(in_dev) {
993 if (!addr &&
994 (local == ifa->ifa_local || !local) &&
995 ifa->ifa_scope <= scope) {
996 addr = ifa->ifa_local;
997 if (same)
998 break;
999 }
1000 if (!same) {
1001 same = (!local || inet_ifa_match(local, ifa)) &&
1002 (!dst || inet_ifa_match(dst, ifa));
1003 if (same && addr) {
1004 if (local || !dst)
1005 break;
1006 /* Is the selected addr into dst subnet? */
1007 if (inet_ifa_match(addr, ifa))
1008 break;
1009 /* No, then can we use new local src? */
1010 if (ifa->ifa_scope <= scope) {
1011 addr = ifa->ifa_local;
1012 break;
1013 }
1014 /* search for large dst subnet for addr */
1015 same = 0;
1016 }
1017 }
1018 } endfor_ifa(in_dev);
1019
1020 return same ? addr : 0;
1021 }
1022
1023 /*
1024 * Confirm that local IP address exists using wildcards:
1025 * - in_dev: only on this interface, 0=any interface
1026 * - dst: only in the same subnet as dst, 0=any dst
1027 * - local: address, 0=autoselect the local address
1028 * - scope: maximum allowed scope value for the local address
1029 */
1030 __be32 inet_confirm_addr(struct in_device *in_dev,
1031 __be32 dst, __be32 local, int scope)
1032 {
1033 __be32 addr = 0;
1034 struct net_device *dev;
1035 struct net *net;
1036
1037 if (scope != RT_SCOPE_LINK)
1038 return confirm_addr_indev(in_dev, dst, local, scope);
1039
1040 net = dev_net(in_dev->dev);
1041 rcu_read_lock();
1042 for_each_netdev_rcu(net, dev) {
1043 in_dev = __in_dev_get_rcu(dev);
1044 if (in_dev) {
1045 addr = confirm_addr_indev(in_dev, dst, local, scope);
1046 if (addr)
1047 break;
1048 }
1049 }
1050 rcu_read_unlock();
1051
1052 return addr;
1053 }
1054
1055 /*
1056 * Device notifier
1057 */
1058
1059 int register_inetaddr_notifier(struct notifier_block *nb)
1060 {
1061 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1062 }
1063 EXPORT_SYMBOL(register_inetaddr_notifier);
1064
1065 int unregister_inetaddr_notifier(struct notifier_block *nb)
1066 {
1067 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1068 }
1069 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1070
1071 /* Rename ifa_labels for a device name change. Make some effort to preserve
1072 * existing alias numbering and to create unique labels if possible.
1073 */
1074 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1075 {
1076 struct in_ifaddr *ifa;
1077 int named = 0;
1078
1079 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1080 char old[IFNAMSIZ], *dot;
1081
1082 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1083 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084 if (named++ == 0)
1085 goto skip;
1086 dot = strchr(old, ':');
1087 if (dot == NULL) {
1088 sprintf(old, ":%d", named);
1089 dot = old;
1090 }
1091 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1092 strcat(ifa->ifa_label, dot);
1093 else
1094 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1095 skip:
1096 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1097 }
1098 }
1099
1100 static inline bool inetdev_valid_mtu(unsigned mtu)
1101 {
1102 return mtu >= 68;
1103 }
1104
1105 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1106 struct in_device *in_dev)
1107
1108 {
1109 struct in_ifaddr *ifa = in_dev->ifa_list;
1110
1111 if (!ifa)
1112 return;
1113
1114 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1115 ifa->ifa_local, dev,
1116 ifa->ifa_local, NULL,
1117 dev->dev_addr, NULL);
1118 }
1119
1120 /* Called only under RTNL semaphore */
1121
1122 static int inetdev_event(struct notifier_block *this, unsigned long event,
1123 void *ptr)
1124 {
1125 struct net_device *dev = ptr;
1126 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1127
1128 ASSERT_RTNL();
1129
1130 if (!in_dev) {
1131 if (event == NETDEV_REGISTER) {
1132 in_dev = inetdev_init(dev);
1133 if (!in_dev)
1134 return notifier_from_errno(-ENOMEM);
1135 if (dev->flags & IFF_LOOPBACK) {
1136 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1137 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1138 }
1139 } else if (event == NETDEV_CHANGEMTU) {
1140 /* Re-enabling IP */
1141 if (inetdev_valid_mtu(dev->mtu))
1142 in_dev = inetdev_init(dev);
1143 }
1144 goto out;
1145 }
1146
1147 switch (event) {
1148 case NETDEV_REGISTER:
1149 printk(KERN_DEBUG "inetdev_event: bug\n");
1150 rcu_assign_pointer(dev->ip_ptr, NULL);
1151 break;
1152 case NETDEV_UP:
1153 if (!inetdev_valid_mtu(dev->mtu))
1154 break;
1155 if (dev->flags & IFF_LOOPBACK) {
1156 struct in_ifaddr *ifa = inet_alloc_ifa();
1157
1158 if (ifa) {
1159 INIT_HLIST_NODE(&ifa->hash);
1160 ifa->ifa_local =
1161 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1162 ifa->ifa_prefixlen = 8;
1163 ifa->ifa_mask = inet_make_mask(8);
1164 in_dev_hold(in_dev);
1165 ifa->ifa_dev = in_dev;
1166 ifa->ifa_scope = RT_SCOPE_HOST;
1167 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1168 inet_insert_ifa(ifa);
1169 }
1170 }
1171 ip_mc_up(in_dev);
1172 /* fall through */
1173 case NETDEV_CHANGEADDR:
1174 if (!IN_DEV_ARP_NOTIFY(in_dev))
1175 break;
1176 /* fall through */
1177 case NETDEV_NOTIFY_PEERS:
1178 /* Send gratuitous ARP to notify of link change */
1179 inetdev_send_gratuitous_arp(dev, in_dev);
1180 break;
1181 case NETDEV_DOWN:
1182 ip_mc_down(in_dev);
1183 break;
1184 case NETDEV_PRE_TYPE_CHANGE:
1185 ip_mc_unmap(in_dev);
1186 break;
1187 case NETDEV_POST_TYPE_CHANGE:
1188 ip_mc_remap(in_dev);
1189 break;
1190 case NETDEV_CHANGEMTU:
1191 if (inetdev_valid_mtu(dev->mtu))
1192 break;
1193 /* disable IP when MTU is not enough */
1194 case NETDEV_UNREGISTER:
1195 inetdev_destroy(in_dev);
1196 break;
1197 case NETDEV_CHANGENAME:
1198 /* Do not notify about label change, this event is
1199 * not interesting to applications using netlink.
1200 */
1201 inetdev_changename(dev, in_dev);
1202
1203 devinet_sysctl_unregister(in_dev);
1204 devinet_sysctl_register(in_dev);
1205 break;
1206 }
1207 out:
1208 return NOTIFY_DONE;
1209 }
1210
1211 static struct notifier_block ip_netdev_notifier = {
1212 .notifier_call = inetdev_event,
1213 };
1214
1215 static inline size_t inet_nlmsg_size(void)
1216 {
1217 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1218 + nla_total_size(4) /* IFA_ADDRESS */
1219 + nla_total_size(4) /* IFA_LOCAL */
1220 + nla_total_size(4) /* IFA_BROADCAST */
1221 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1222 }
1223
1224 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1225 u32 pid, u32 seq, int event, unsigned int flags)
1226 {
1227 struct ifaddrmsg *ifm;
1228 struct nlmsghdr *nlh;
1229
1230 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1231 if (nlh == NULL)
1232 return -EMSGSIZE;
1233
1234 ifm = nlmsg_data(nlh);
1235 ifm->ifa_family = AF_INET;
1236 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1237 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1238 ifm->ifa_scope = ifa->ifa_scope;
1239 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1240
1241 if (ifa->ifa_address)
1242 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1243
1244 if (ifa->ifa_local)
1245 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1246
1247 if (ifa->ifa_broadcast)
1248 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1249
1250 if (ifa->ifa_label[0])
1251 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1252
1253 return nlmsg_end(skb, nlh);
1254
1255 nla_put_failure:
1256 nlmsg_cancel(skb, nlh);
1257 return -EMSGSIZE;
1258 }
1259
1260 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1261 {
1262 struct net *net = sock_net(skb->sk);
1263 int h, s_h;
1264 int idx, s_idx;
1265 int ip_idx, s_ip_idx;
1266 struct net_device *dev;
1267 struct in_device *in_dev;
1268 struct in_ifaddr *ifa;
1269 struct hlist_head *head;
1270 struct hlist_node *node;
1271
1272 s_h = cb->args[0];
1273 s_idx = idx = cb->args[1];
1274 s_ip_idx = ip_idx = cb->args[2];
1275
1276 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1277 idx = 0;
1278 head = &net->dev_index_head[h];
1279 rcu_read_lock();
1280 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1281 if (idx < s_idx)
1282 goto cont;
1283 if (h > s_h || idx > s_idx)
1284 s_ip_idx = 0;
1285 in_dev = __in_dev_get_rcu(dev);
1286 if (!in_dev)
1287 goto cont;
1288
1289 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1290 ifa = ifa->ifa_next, ip_idx++) {
1291 if (ip_idx < s_ip_idx)
1292 continue;
1293 if (inet_fill_ifaddr(skb, ifa,
1294 NETLINK_CB(cb->skb).pid,
1295 cb->nlh->nlmsg_seq,
1296 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1297 rcu_read_unlock();
1298 goto done;
1299 }
1300 }
1301 cont:
1302 idx++;
1303 }
1304 rcu_read_unlock();
1305 }
1306
1307 done:
1308 cb->args[0] = h;
1309 cb->args[1] = idx;
1310 cb->args[2] = ip_idx;
1311
1312 return skb->len;
1313 }
1314
1315 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1316 u32 pid)
1317 {
1318 struct sk_buff *skb;
1319 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1320 int err = -ENOBUFS;
1321 struct net *net;
1322
1323 net = dev_net(ifa->ifa_dev->dev);
1324 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1325 if (skb == NULL)
1326 goto errout;
1327
1328 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1329 if (err < 0) {
1330 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1331 WARN_ON(err == -EMSGSIZE);
1332 kfree_skb(skb);
1333 goto errout;
1334 }
1335 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1336 return;
1337 errout:
1338 if (err < 0)
1339 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1340 }
1341
1342 static size_t inet_get_link_af_size(const struct net_device *dev)
1343 {
1344 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345
1346 if (!in_dev)
1347 return 0;
1348
1349 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1350 }
1351
1352 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1353 {
1354 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1355 struct nlattr *nla;
1356 int i;
1357
1358 if (!in_dev)
1359 return -ENODATA;
1360
1361 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1362 if (nla == NULL)
1363 return -EMSGSIZE;
1364
1365 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1366 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1367
1368 return 0;
1369 }
1370
1371 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1372 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1373 };
1374
1375 static int inet_validate_link_af(const struct net_device *dev,
1376 const struct nlattr *nla)
1377 {
1378 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1379 int err, rem;
1380
1381 if (dev && !__in_dev_get_rtnl(dev))
1382 return -EAFNOSUPPORT;
1383
1384 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1385 if (err < 0)
1386 return err;
1387
1388 if (tb[IFLA_INET_CONF]) {
1389 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1390 int cfgid = nla_type(a);
1391
1392 if (nla_len(a) < 4)
1393 return -EINVAL;
1394
1395 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1396 return -EINVAL;
1397 }
1398 }
1399
1400 return 0;
1401 }
1402
1403 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1404 {
1405 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1406 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407 int rem;
1408
1409 if (!in_dev)
1410 return -EAFNOSUPPORT;
1411
1412 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1413 BUG();
1414
1415 if (tb[IFLA_INET_CONF]) {
1416 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1417 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1418 }
1419
1420 return 0;
1421 }
1422
1423 #ifdef CONFIG_SYSCTL
1424
1425 static void devinet_copy_dflt_conf(struct net *net, int i)
1426 {
1427 struct net_device *dev;
1428
1429 rcu_read_lock();
1430 for_each_netdev_rcu(net, dev) {
1431 struct in_device *in_dev;
1432
1433 in_dev = __in_dev_get_rcu(dev);
1434 if (in_dev && !test_bit(i, in_dev->cnf.state))
1435 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1436 }
1437 rcu_read_unlock();
1438 }
1439
1440 /* called with RTNL locked */
1441 static void inet_forward_change(struct net *net)
1442 {
1443 struct net_device *dev;
1444 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1445
1446 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1447 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1448
1449 for_each_netdev(net, dev) {
1450 struct in_device *in_dev;
1451 if (on)
1452 dev_disable_lro(dev);
1453 rcu_read_lock();
1454 in_dev = __in_dev_get_rcu(dev);
1455 if (in_dev)
1456 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1457 rcu_read_unlock();
1458 }
1459 }
1460
1461 static int devinet_conf_proc(ctl_table *ctl, int write,
1462 void __user *buffer,
1463 size_t *lenp, loff_t *ppos)
1464 {
1465 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1466
1467 if (write) {
1468 struct ipv4_devconf *cnf = ctl->extra1;
1469 struct net *net = ctl->extra2;
1470 int i = (int *)ctl->data - cnf->data;
1471
1472 set_bit(i, cnf->state);
1473
1474 if (cnf == net->ipv4.devconf_dflt)
1475 devinet_copy_dflt_conf(net, i);
1476 }
1477
1478 return ret;
1479 }
1480
1481 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1482 void __user *buffer,
1483 size_t *lenp, loff_t *ppos)
1484 {
1485 int *valp = ctl->data;
1486 int val = *valp;
1487 loff_t pos = *ppos;
1488 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489
1490 if (write && *valp != val) {
1491 struct net *net = ctl->extra2;
1492
1493 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1494 if (!rtnl_trylock()) {
1495 /* Restore the original values before restarting */
1496 *valp = val;
1497 *ppos = pos;
1498 return restart_syscall();
1499 }
1500 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1501 inet_forward_change(net);
1502 } else if (*valp) {
1503 struct ipv4_devconf *cnf = ctl->extra1;
1504 struct in_device *idev =
1505 container_of(cnf, struct in_device, cnf);
1506 dev_disable_lro(idev->dev);
1507 }
1508 rtnl_unlock();
1509 rt_cache_flush(net, 0);
1510 }
1511 }
1512
1513 return ret;
1514 }
1515
1516 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1517 void __user *buffer,
1518 size_t *lenp, loff_t *ppos)
1519 {
1520 int *valp = ctl->data;
1521 int val = *valp;
1522 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1523 struct net *net = ctl->extra2;
1524
1525 if (write && *valp != val)
1526 rt_cache_flush(net, 0);
1527
1528 return ret;
1529 }
1530
1531 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1532 { \
1533 .procname = name, \
1534 .data = ipv4_devconf.data + \
1535 IPV4_DEVCONF_ ## attr - 1, \
1536 .maxlen = sizeof(int), \
1537 .mode = mval, \
1538 .proc_handler = proc, \
1539 .extra1 = &ipv4_devconf, \
1540 }
1541
1542 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1543 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1544
1545 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1546 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1547
1548 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1549 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1550
1551 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1552 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1553
1554 static struct devinet_sysctl_table {
1555 struct ctl_table_header *sysctl_header;
1556 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1557 char *dev_name;
1558 } devinet_sysctl = {
1559 .devinet_vars = {
1560 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1561 devinet_sysctl_forward),
1562 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1563
1564 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1565 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1566 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1567 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1568 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1569 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1570 "accept_source_route"),
1571 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1572 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1573 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1574 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1575 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1576 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1577 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1578 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1579 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1580 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1581 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1582 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1583 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1584
1585 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1586 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1587 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1588 "force_igmp_version"),
1589 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1590 "promote_secondaries"),
1591 },
1592 };
1593
1594 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1595 struct ipv4_devconf *p)
1596 {
1597 int i;
1598 struct devinet_sysctl_table *t;
1599
1600 #define DEVINET_CTL_PATH_DEV 3
1601
1602 struct ctl_path devinet_ctl_path[] = {
1603 { .procname = "net", },
1604 { .procname = "ipv4", },
1605 { .procname = "conf", },
1606 { /* to be set */ },
1607 { },
1608 };
1609
1610 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1611 if (!t)
1612 goto out;
1613
1614 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1615 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1616 t->devinet_vars[i].extra1 = p;
1617 t->devinet_vars[i].extra2 = net;
1618 }
1619
1620 /*
1621 * Make a copy of dev_name, because '.procname' is regarded as const
1622 * by sysctl and we wouldn't want anyone to change it under our feet
1623 * (see SIOCSIFNAME).
1624 */
1625 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1626 if (!t->dev_name)
1627 goto free;
1628
1629 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1630
1631 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1632 t->devinet_vars);
1633 if (!t->sysctl_header)
1634 goto free_procname;
1635
1636 p->sysctl = t;
1637 return 0;
1638
1639 free_procname:
1640 kfree(t->dev_name);
1641 free:
1642 kfree(t);
1643 out:
1644 return -ENOBUFS;
1645 }
1646
1647 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1648 {
1649 struct devinet_sysctl_table *t = cnf->sysctl;
1650
1651 if (t == NULL)
1652 return;
1653
1654 cnf->sysctl = NULL;
1655 unregister_sysctl_table(t->sysctl_header);
1656 kfree(t->dev_name);
1657 kfree(t);
1658 }
1659
1660 static void devinet_sysctl_register(struct in_device *idev)
1661 {
1662 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1663 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1664 &idev->cnf);
1665 }
1666
1667 static void devinet_sysctl_unregister(struct in_device *idev)
1668 {
1669 __devinet_sysctl_unregister(&idev->cnf);
1670 neigh_sysctl_unregister(idev->arp_parms);
1671 }
1672
1673 static struct ctl_table ctl_forward_entry[] = {
1674 {
1675 .procname = "ip_forward",
1676 .data = &ipv4_devconf.data[
1677 IPV4_DEVCONF_FORWARDING - 1],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
1680 .proc_handler = devinet_sysctl_forward,
1681 .extra1 = &ipv4_devconf,
1682 .extra2 = &init_net,
1683 },
1684 { },
1685 };
1686
1687 static __net_initdata struct ctl_path net_ipv4_path[] = {
1688 { .procname = "net", },
1689 { .procname = "ipv4", },
1690 { },
1691 };
1692 #endif
1693
1694 static __net_init int devinet_init_net(struct net *net)
1695 {
1696 int err;
1697 struct ipv4_devconf *all, *dflt;
1698 #ifdef CONFIG_SYSCTL
1699 struct ctl_table *tbl = ctl_forward_entry;
1700 struct ctl_table_header *forw_hdr;
1701 #endif
1702
1703 err = -ENOMEM;
1704 all = &ipv4_devconf;
1705 dflt = &ipv4_devconf_dflt;
1706
1707 if (!net_eq(net, &init_net)) {
1708 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1709 if (all == NULL)
1710 goto err_alloc_all;
1711
1712 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1713 if (dflt == NULL)
1714 goto err_alloc_dflt;
1715
1716 #ifdef CONFIG_SYSCTL
1717 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1718 if (tbl == NULL)
1719 goto err_alloc_ctl;
1720
1721 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1722 tbl[0].extra1 = all;
1723 tbl[0].extra2 = net;
1724 #endif
1725 }
1726
1727 #ifdef CONFIG_SYSCTL
1728 err = __devinet_sysctl_register(net, "all", all);
1729 if (err < 0)
1730 goto err_reg_all;
1731
1732 err = __devinet_sysctl_register(net, "default", dflt);
1733 if (err < 0)
1734 goto err_reg_dflt;
1735
1736 err = -ENOMEM;
1737 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1738 if (forw_hdr == NULL)
1739 goto err_reg_ctl;
1740 net->ipv4.forw_hdr = forw_hdr;
1741 #endif
1742
1743 net->ipv4.devconf_all = all;
1744 net->ipv4.devconf_dflt = dflt;
1745 return 0;
1746
1747 #ifdef CONFIG_SYSCTL
1748 err_reg_ctl:
1749 __devinet_sysctl_unregister(dflt);
1750 err_reg_dflt:
1751 __devinet_sysctl_unregister(all);
1752 err_reg_all:
1753 if (tbl != ctl_forward_entry)
1754 kfree(tbl);
1755 err_alloc_ctl:
1756 #endif
1757 if (dflt != &ipv4_devconf_dflt)
1758 kfree(dflt);
1759 err_alloc_dflt:
1760 if (all != &ipv4_devconf)
1761 kfree(all);
1762 err_alloc_all:
1763 return err;
1764 }
1765
1766 static __net_exit void devinet_exit_net(struct net *net)
1767 {
1768 #ifdef CONFIG_SYSCTL
1769 struct ctl_table *tbl;
1770
1771 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1772 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1773 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1774 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1775 kfree(tbl);
1776 #endif
1777 kfree(net->ipv4.devconf_dflt);
1778 kfree(net->ipv4.devconf_all);
1779 }
1780
1781 static __net_initdata struct pernet_operations devinet_ops = {
1782 .init = devinet_init_net,
1783 .exit = devinet_exit_net,
1784 };
1785
1786 static struct rtnl_af_ops inet_af_ops = {
1787 .family = AF_INET,
1788 .fill_link_af = inet_fill_link_af,
1789 .get_link_af_size = inet_get_link_af_size,
1790 .validate_link_af = inet_validate_link_af,
1791 .set_link_af = inet_set_link_af,
1792 };
1793
1794 void __init devinet_init(void)
1795 {
1796 int i;
1797
1798 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1799 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1800
1801 register_pernet_subsys(&devinet_ops);
1802
1803 register_gifconf(PF_INET, inet_gifconf);
1804 register_netdevice_notifier(&ip_netdev_notifier);
1805
1806 rtnl_af_register(&inet_af_ops);
1807
1808 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1809 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1810 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1811 }
1812
This page took 0.093217 seconds and 5 git commands to generate.