cffaf00561e7782333710deaa193756d9a3ed06e
[deliverable/linux.git] / net / core / neighbour.c
1 /*
2 * Generic address resolution entity
3 *
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
16 */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...) \
47 do { \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
50 } while (0)
51
52 #define PNEIGH_HASHMASK 0xF
53
54 static void neigh_timer_handler(unsigned long arg);
55 static void __neigh_notify(struct neighbour *n, int type, int flags);
56 static void neigh_update_notify(struct neighbour *neigh);
57 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
58
59 #ifdef CONFIG_PROC_FS
60 static const struct file_operations neigh_stat_seq_fops;
61 #endif
62
63 /*
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
65
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
70 cache.
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
73
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
77
78 Reference count prevents destruction.
79
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
82 - timer
83 - resolution queue
84
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
89 */
90
91 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
92 {
93 kfree_skb(skb);
94 return -ENETDOWN;
95 }
96
97 static void neigh_cleanup_and_release(struct neighbour *neigh)
98 {
99 if (neigh->parms->neigh_cleanup)
100 neigh->parms->neigh_cleanup(neigh);
101
102 __neigh_notify(neigh, RTM_DELNEIGH, 0);
103 neigh_release(neigh);
104 }
105
106 /*
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
110 */
111
112 unsigned long neigh_rand_reach_time(unsigned long base)
113 {
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
115 }
116 EXPORT_SYMBOL(neigh_rand_reach_time);
117
118
119 static int neigh_forced_gc(struct neigh_table *tbl)
120 {
121 int shrunk = 0;
122 int i;
123 struct neigh_hash_table *nht;
124
125 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
126
127 write_lock_bh(&tbl->lock);
128 nht = rcu_dereference_protected(tbl->nht,
129 lockdep_is_held(&tbl->lock));
130 for (i = 0; i < (1 << nht->hash_shift); i++) {
131 struct neighbour *n;
132 struct neighbour __rcu **np;
133
134 np = &nht->hash_buckets[i];
135 while ((n = rcu_dereference_protected(*np,
136 lockdep_is_held(&tbl->lock))) != NULL) {
137 /* Neighbour record may be discarded if:
138 * - nobody refers to it.
139 * - it is not permanent
140 */
141 write_lock(&n->lock);
142 if (atomic_read(&n->refcnt) == 1 &&
143 !(n->nud_state & NUD_PERMANENT)) {
144 rcu_assign_pointer(*np,
145 rcu_dereference_protected(n->next,
146 lockdep_is_held(&tbl->lock)));
147 n->dead = 1;
148 shrunk = 1;
149 write_unlock(&n->lock);
150 neigh_cleanup_and_release(n);
151 continue;
152 }
153 write_unlock(&n->lock);
154 np = &n->next;
155 }
156 }
157
158 tbl->last_flush = jiffies;
159
160 write_unlock_bh(&tbl->lock);
161
162 return shrunk;
163 }
164
165 static void neigh_add_timer(struct neighbour *n, unsigned long when)
166 {
167 neigh_hold(n);
168 if (unlikely(mod_timer(&n->timer, when))) {
169 printk("NEIGH: BUG, double timer add, state is %x\n",
170 n->nud_state);
171 dump_stack();
172 }
173 }
174
175 static int neigh_del_timer(struct neighbour *n)
176 {
177 if ((n->nud_state & NUD_IN_TIMER) &&
178 del_timer(&n->timer)) {
179 neigh_release(n);
180 return 1;
181 }
182 return 0;
183 }
184
185 static void pneigh_queue_purge(struct sk_buff_head *list)
186 {
187 struct sk_buff *skb;
188
189 while ((skb = skb_dequeue(list)) != NULL) {
190 dev_put(skb->dev);
191 kfree_skb(skb);
192 }
193 }
194
195 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
196 {
197 int i;
198 struct neigh_hash_table *nht;
199
200 nht = rcu_dereference_protected(tbl->nht,
201 lockdep_is_held(&tbl->lock));
202
203 for (i = 0; i < (1 << nht->hash_shift); i++) {
204 struct neighbour *n;
205 struct neighbour __rcu **np = &nht->hash_buckets[i];
206
207 while ((n = rcu_dereference_protected(*np,
208 lockdep_is_held(&tbl->lock))) != NULL) {
209 if (dev && n->dev != dev) {
210 np = &n->next;
211 continue;
212 }
213 rcu_assign_pointer(*np,
214 rcu_dereference_protected(n->next,
215 lockdep_is_held(&tbl->lock)));
216 write_lock(&n->lock);
217 neigh_del_timer(n);
218 n->dead = 1;
219
220 if (atomic_read(&n->refcnt) != 1) {
221 /* The most unpleasant situation.
222 We must destroy neighbour entry,
223 but someone still uses it.
224
225 The destroy will be delayed until
226 the last user releases us, but
227 we must kill timers etc. and move
228 it to safe state.
229 */
230 __skb_queue_purge(&n->arp_queue);
231 n->arp_queue_len_bytes = 0;
232 n->output = neigh_blackhole;
233 if (n->nud_state & NUD_VALID)
234 n->nud_state = NUD_NOARP;
235 else
236 n->nud_state = NUD_NONE;
237 neigh_dbg(2, "neigh %p is stray\n", n);
238 }
239 write_unlock(&n->lock);
240 neigh_cleanup_and_release(n);
241 }
242 }
243 }
244
245 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
246 {
247 write_lock_bh(&tbl->lock);
248 neigh_flush_dev(tbl, dev);
249 write_unlock_bh(&tbl->lock);
250 }
251 EXPORT_SYMBOL(neigh_changeaddr);
252
253 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
254 {
255 write_lock_bh(&tbl->lock);
256 neigh_flush_dev(tbl, dev);
257 pneigh_ifdown(tbl, dev);
258 write_unlock_bh(&tbl->lock);
259
260 del_timer_sync(&tbl->proxy_timer);
261 pneigh_queue_purge(&tbl->proxy_queue);
262 return 0;
263 }
264 EXPORT_SYMBOL(neigh_ifdown);
265
266 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
267 {
268 struct neighbour *n = NULL;
269 unsigned long now = jiffies;
270 int entries;
271
272 entries = atomic_inc_return(&tbl->entries) - 1;
273 if (entries >= tbl->gc_thresh3 ||
274 (entries >= tbl->gc_thresh2 &&
275 time_after(now, tbl->last_flush + 5 * HZ))) {
276 if (!neigh_forced_gc(tbl) &&
277 entries >= tbl->gc_thresh3)
278 goto out_entries;
279 }
280
281 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
282 if (!n)
283 goto out_entries;
284
285 __skb_queue_head_init(&n->arp_queue);
286 rwlock_init(&n->lock);
287 seqlock_init(&n->ha_lock);
288 n->updated = n->used = now;
289 n->nud_state = NUD_NONE;
290 n->output = neigh_blackhole;
291 seqlock_init(&n->hh.hh_lock);
292 n->parms = neigh_parms_clone(&tbl->parms);
293 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
294
295 NEIGH_CACHE_STAT_INC(tbl, allocs);
296 n->tbl = tbl;
297 atomic_set(&n->refcnt, 1);
298 n->dead = 1;
299 out:
300 return n;
301
302 out_entries:
303 atomic_dec(&tbl->entries);
304 goto out;
305 }
306
307 static void neigh_get_hash_rnd(u32 *x)
308 {
309 get_random_bytes(x, sizeof(*x));
310 *x |= 1;
311 }
312
313 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
314 {
315 size_t size = (1 << shift) * sizeof(struct neighbour *);
316 struct neigh_hash_table *ret;
317 struct neighbour __rcu **buckets;
318 int i;
319
320 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
321 if (!ret)
322 return NULL;
323 if (size <= PAGE_SIZE)
324 buckets = kzalloc(size, GFP_ATOMIC);
325 else
326 buckets = (struct neighbour __rcu **)
327 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
328 get_order(size));
329 if (!buckets) {
330 kfree(ret);
331 return NULL;
332 }
333 ret->hash_buckets = buckets;
334 ret->hash_shift = shift;
335 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
336 neigh_get_hash_rnd(&ret->hash_rnd[i]);
337 return ret;
338 }
339
340 static void neigh_hash_free_rcu(struct rcu_head *head)
341 {
342 struct neigh_hash_table *nht = container_of(head,
343 struct neigh_hash_table,
344 rcu);
345 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
346 struct neighbour __rcu **buckets = nht->hash_buckets;
347
348 if (size <= PAGE_SIZE)
349 kfree(buckets);
350 else
351 free_pages((unsigned long)buckets, get_order(size));
352 kfree(nht);
353 }
354
355 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356 unsigned long new_shift)
357 {
358 unsigned int i, hash;
359 struct neigh_hash_table *new_nht, *old_nht;
360
361 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
362
363 old_nht = rcu_dereference_protected(tbl->nht,
364 lockdep_is_held(&tbl->lock));
365 new_nht = neigh_hash_alloc(new_shift);
366 if (!new_nht)
367 return old_nht;
368
369 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
370 struct neighbour *n, *next;
371
372 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
373 lockdep_is_held(&tbl->lock));
374 n != NULL;
375 n = next) {
376 hash = tbl->hash(n->primary_key, n->dev,
377 new_nht->hash_rnd);
378
379 hash >>= (32 - new_nht->hash_shift);
380 next = rcu_dereference_protected(n->next,
381 lockdep_is_held(&tbl->lock));
382
383 rcu_assign_pointer(n->next,
384 rcu_dereference_protected(
385 new_nht->hash_buckets[hash],
386 lockdep_is_held(&tbl->lock)));
387 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
388 }
389 }
390
391 rcu_assign_pointer(tbl->nht, new_nht);
392 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
393 return new_nht;
394 }
395
396 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
397 struct net_device *dev)
398 {
399 struct neighbour *n;
400
401 NEIGH_CACHE_STAT_INC(tbl, lookups);
402
403 rcu_read_lock_bh();
404 n = __neigh_lookup_noref(tbl, pkey, dev);
405 if (n) {
406 if (!atomic_inc_not_zero(&n->refcnt))
407 n = NULL;
408 NEIGH_CACHE_STAT_INC(tbl, hits);
409 }
410
411 rcu_read_unlock_bh();
412 return n;
413 }
414 EXPORT_SYMBOL(neigh_lookup);
415
416 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
417 const void *pkey)
418 {
419 struct neighbour *n;
420 int key_len = tbl->key_len;
421 u32 hash_val;
422 struct neigh_hash_table *nht;
423
424 NEIGH_CACHE_STAT_INC(tbl, lookups);
425
426 rcu_read_lock_bh();
427 nht = rcu_dereference_bh(tbl->nht);
428 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
429
430 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431 n != NULL;
432 n = rcu_dereference_bh(n->next)) {
433 if (!memcmp(n->primary_key, pkey, key_len) &&
434 net_eq(dev_net(n->dev), net)) {
435 if (!atomic_inc_not_zero(&n->refcnt))
436 n = NULL;
437 NEIGH_CACHE_STAT_INC(tbl, hits);
438 break;
439 }
440 }
441
442 rcu_read_unlock_bh();
443 return n;
444 }
445 EXPORT_SYMBOL(neigh_lookup_nodev);
446
447 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
448 struct net_device *dev, bool want_ref)
449 {
450 u32 hash_val;
451 int key_len = tbl->key_len;
452 int error;
453 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
454 struct neigh_hash_table *nht;
455
456 if (!n) {
457 rc = ERR_PTR(-ENOBUFS);
458 goto out;
459 }
460
461 memcpy(n->primary_key, pkey, key_len);
462 n->dev = dev;
463 dev_hold(dev);
464
465 /* Protocol specific setup. */
466 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
467 rc = ERR_PTR(error);
468 goto out_neigh_release;
469 }
470
471 if (dev->netdev_ops->ndo_neigh_construct) {
472 error = dev->netdev_ops->ndo_neigh_construct(n);
473 if (error < 0) {
474 rc = ERR_PTR(error);
475 goto out_neigh_release;
476 }
477 }
478
479 /* Device specific setup. */
480 if (n->parms->neigh_setup &&
481 (error = n->parms->neigh_setup(n)) < 0) {
482 rc = ERR_PTR(error);
483 goto out_neigh_release;
484 }
485
486 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
487
488 write_lock_bh(&tbl->lock);
489 nht = rcu_dereference_protected(tbl->nht,
490 lockdep_is_held(&tbl->lock));
491
492 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
493 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
494
495 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
496
497 if (n->parms->dead) {
498 rc = ERR_PTR(-EINVAL);
499 goto out_tbl_unlock;
500 }
501
502 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
503 lockdep_is_held(&tbl->lock));
504 n1 != NULL;
505 n1 = rcu_dereference_protected(n1->next,
506 lockdep_is_held(&tbl->lock))) {
507 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
508 if (want_ref)
509 neigh_hold(n1);
510 rc = n1;
511 goto out_tbl_unlock;
512 }
513 }
514
515 n->dead = 0;
516 if (want_ref)
517 neigh_hold(n);
518 rcu_assign_pointer(n->next,
519 rcu_dereference_protected(nht->hash_buckets[hash_val],
520 lockdep_is_held(&tbl->lock)));
521 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
522 write_unlock_bh(&tbl->lock);
523 neigh_dbg(2, "neigh %p is created\n", n);
524 rc = n;
525 out:
526 return rc;
527 out_tbl_unlock:
528 write_unlock_bh(&tbl->lock);
529 out_neigh_release:
530 neigh_release(n);
531 goto out;
532 }
533 EXPORT_SYMBOL(__neigh_create);
534
535 static u32 pneigh_hash(const void *pkey, int key_len)
536 {
537 u32 hash_val = *(u32 *)(pkey + key_len - 4);
538 hash_val ^= (hash_val >> 16);
539 hash_val ^= hash_val >> 8;
540 hash_val ^= hash_val >> 4;
541 hash_val &= PNEIGH_HASHMASK;
542 return hash_val;
543 }
544
545 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
546 struct net *net,
547 const void *pkey,
548 int key_len,
549 struct net_device *dev)
550 {
551 while (n) {
552 if (!memcmp(n->key, pkey, key_len) &&
553 net_eq(pneigh_net(n), net) &&
554 (n->dev == dev || !n->dev))
555 return n;
556 n = n->next;
557 }
558 return NULL;
559 }
560
561 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
562 struct net *net, const void *pkey, struct net_device *dev)
563 {
564 int key_len = tbl->key_len;
565 u32 hash_val = pneigh_hash(pkey, key_len);
566
567 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
568 net, pkey, key_len, dev);
569 }
570 EXPORT_SYMBOL_GPL(__pneigh_lookup);
571
572 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
573 struct net *net, const void *pkey,
574 struct net_device *dev, int creat)
575 {
576 struct pneigh_entry *n;
577 int key_len = tbl->key_len;
578 u32 hash_val = pneigh_hash(pkey, key_len);
579
580 read_lock_bh(&tbl->lock);
581 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 net, pkey, key_len, dev);
583 read_unlock_bh(&tbl->lock);
584
585 if (n || !creat)
586 goto out;
587
588 ASSERT_RTNL();
589
590 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
591 if (!n)
592 goto out;
593
594 write_pnet(&n->net, hold_net(net));
595 memcpy(n->key, pkey, key_len);
596 n->dev = dev;
597 if (dev)
598 dev_hold(dev);
599
600 if (tbl->pconstructor && tbl->pconstructor(n)) {
601 if (dev)
602 dev_put(dev);
603 release_net(net);
604 kfree(n);
605 n = NULL;
606 goto out;
607 }
608
609 write_lock_bh(&tbl->lock);
610 n->next = tbl->phash_buckets[hash_val];
611 tbl->phash_buckets[hash_val] = n;
612 write_unlock_bh(&tbl->lock);
613 out:
614 return n;
615 }
616 EXPORT_SYMBOL(pneigh_lookup);
617
618
619 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
620 struct net_device *dev)
621 {
622 struct pneigh_entry *n, **np;
623 int key_len = tbl->key_len;
624 u32 hash_val = pneigh_hash(pkey, key_len);
625
626 write_lock_bh(&tbl->lock);
627 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
628 np = &n->next) {
629 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
630 net_eq(pneigh_net(n), net)) {
631 *np = n->next;
632 write_unlock_bh(&tbl->lock);
633 if (tbl->pdestructor)
634 tbl->pdestructor(n);
635 if (n->dev)
636 dev_put(n->dev);
637 release_net(pneigh_net(n));
638 kfree(n);
639 return 0;
640 }
641 }
642 write_unlock_bh(&tbl->lock);
643 return -ENOENT;
644 }
645
646 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
647 {
648 struct pneigh_entry *n, **np;
649 u32 h;
650
651 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
652 np = &tbl->phash_buckets[h];
653 while ((n = *np) != NULL) {
654 if (!dev || n->dev == dev) {
655 *np = n->next;
656 if (tbl->pdestructor)
657 tbl->pdestructor(n);
658 if (n->dev)
659 dev_put(n->dev);
660 release_net(pneigh_net(n));
661 kfree(n);
662 continue;
663 }
664 np = &n->next;
665 }
666 }
667 return -ENOENT;
668 }
669
670 static void neigh_parms_destroy(struct neigh_parms *parms);
671
672 static inline void neigh_parms_put(struct neigh_parms *parms)
673 {
674 if (atomic_dec_and_test(&parms->refcnt))
675 neigh_parms_destroy(parms);
676 }
677
678 /*
679 * neighbour must already be out of the table;
680 *
681 */
682 void neigh_destroy(struct neighbour *neigh)
683 {
684 struct net_device *dev = neigh->dev;
685
686 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
687
688 if (!neigh->dead) {
689 pr_warn("Destroying alive neighbour %p\n", neigh);
690 dump_stack();
691 return;
692 }
693
694 if (neigh_del_timer(neigh))
695 pr_warn("Impossible event\n");
696
697 write_lock_bh(&neigh->lock);
698 __skb_queue_purge(&neigh->arp_queue);
699 write_unlock_bh(&neigh->lock);
700 neigh->arp_queue_len_bytes = 0;
701
702 if (dev->netdev_ops->ndo_neigh_destroy)
703 dev->netdev_ops->ndo_neigh_destroy(neigh);
704
705 dev_put(dev);
706 neigh_parms_put(neigh->parms);
707
708 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
709
710 atomic_dec(&neigh->tbl->entries);
711 kfree_rcu(neigh, rcu);
712 }
713 EXPORT_SYMBOL(neigh_destroy);
714
715 /* Neighbour state is suspicious;
716 disable fast path.
717
718 Called with write_locked neigh.
719 */
720 static void neigh_suspect(struct neighbour *neigh)
721 {
722 neigh_dbg(2, "neigh %p is suspected\n", neigh);
723
724 neigh->output = neigh->ops->output;
725 }
726
727 /* Neighbour state is OK;
728 enable fast path.
729
730 Called with write_locked neigh.
731 */
732 static void neigh_connect(struct neighbour *neigh)
733 {
734 neigh_dbg(2, "neigh %p is connected\n", neigh);
735
736 neigh->output = neigh->ops->connected_output;
737 }
738
739 static void neigh_periodic_work(struct work_struct *work)
740 {
741 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
742 struct neighbour *n;
743 struct neighbour __rcu **np;
744 unsigned int i;
745 struct neigh_hash_table *nht;
746
747 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
748
749 write_lock_bh(&tbl->lock);
750 nht = rcu_dereference_protected(tbl->nht,
751 lockdep_is_held(&tbl->lock));
752
753 /*
754 * periodically recompute ReachableTime from random function
755 */
756
757 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
758 struct neigh_parms *p;
759 tbl->last_rand = jiffies;
760 list_for_each_entry(p, &tbl->parms_list, list)
761 p->reachable_time =
762 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
763 }
764
765 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
766 goto out;
767
768 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
769 np = &nht->hash_buckets[i];
770
771 while ((n = rcu_dereference_protected(*np,
772 lockdep_is_held(&tbl->lock))) != NULL) {
773 unsigned int state;
774
775 write_lock(&n->lock);
776
777 state = n->nud_state;
778 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
779 write_unlock(&n->lock);
780 goto next_elt;
781 }
782
783 if (time_before(n->used, n->confirmed))
784 n->used = n->confirmed;
785
786 if (atomic_read(&n->refcnt) == 1 &&
787 (state == NUD_FAILED ||
788 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
789 *np = n->next;
790 n->dead = 1;
791 write_unlock(&n->lock);
792 neigh_cleanup_and_release(n);
793 continue;
794 }
795 write_unlock(&n->lock);
796
797 next_elt:
798 np = &n->next;
799 }
800 /*
801 * It's fine to release lock here, even if hash table
802 * grows while we are preempted.
803 */
804 write_unlock_bh(&tbl->lock);
805 cond_resched();
806 write_lock_bh(&tbl->lock);
807 nht = rcu_dereference_protected(tbl->nht,
808 lockdep_is_held(&tbl->lock));
809 }
810 out:
811 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
812 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
813 * BASE_REACHABLE_TIME.
814 */
815 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
816 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
817 write_unlock_bh(&tbl->lock);
818 }
819
820 static __inline__ int neigh_max_probes(struct neighbour *n)
821 {
822 struct neigh_parms *p = n->parms;
823 int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
824 if (!(n->nud_state & NUD_PROBE))
825 max_probes += NEIGH_VAR(p, MCAST_PROBES);
826 return max_probes;
827 }
828
829 static void neigh_invalidate(struct neighbour *neigh)
830 __releases(neigh->lock)
831 __acquires(neigh->lock)
832 {
833 struct sk_buff *skb;
834
835 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
836 neigh_dbg(2, "neigh %p is failed\n", neigh);
837 neigh->updated = jiffies;
838
839 /* It is very thin place. report_unreachable is very complicated
840 routine. Particularly, it can hit the same neighbour entry!
841
842 So that, we try to be accurate and avoid dead loop. --ANK
843 */
844 while (neigh->nud_state == NUD_FAILED &&
845 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
846 write_unlock(&neigh->lock);
847 neigh->ops->error_report(neigh, skb);
848 write_lock(&neigh->lock);
849 }
850 __skb_queue_purge(&neigh->arp_queue);
851 neigh->arp_queue_len_bytes = 0;
852 }
853
854 static void neigh_probe(struct neighbour *neigh)
855 __releases(neigh->lock)
856 {
857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
858 /* keep skb alive even if arp_queue overflows */
859 if (skb)
860 skb = skb_copy(skb, GFP_ATOMIC);
861 write_unlock(&neigh->lock);
862 neigh->ops->solicit(neigh, skb);
863 atomic_inc(&neigh->probes);
864 kfree_skb(skb);
865 }
866
867 /* Called when a timer expires for a neighbour entry. */
868
869 static void neigh_timer_handler(unsigned long arg)
870 {
871 unsigned long now, next;
872 struct neighbour *neigh = (struct neighbour *)arg;
873 unsigned int state;
874 int notify = 0;
875
876 write_lock(&neigh->lock);
877
878 state = neigh->nud_state;
879 now = jiffies;
880 next = now + HZ;
881
882 if (!(state & NUD_IN_TIMER))
883 goto out;
884
885 if (state & NUD_REACHABLE) {
886 if (time_before_eq(now,
887 neigh->confirmed + neigh->parms->reachable_time)) {
888 neigh_dbg(2, "neigh %p is still alive\n", neigh);
889 next = neigh->confirmed + neigh->parms->reachable_time;
890 } else if (time_before_eq(now,
891 neigh->used +
892 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
893 neigh_dbg(2, "neigh %p is delayed\n", neigh);
894 neigh->nud_state = NUD_DELAY;
895 neigh->updated = jiffies;
896 neigh_suspect(neigh);
897 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
898 } else {
899 neigh_dbg(2, "neigh %p is suspected\n", neigh);
900 neigh->nud_state = NUD_STALE;
901 neigh->updated = jiffies;
902 neigh_suspect(neigh);
903 notify = 1;
904 }
905 } else if (state & NUD_DELAY) {
906 if (time_before_eq(now,
907 neigh->confirmed +
908 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
909 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
910 neigh->nud_state = NUD_REACHABLE;
911 neigh->updated = jiffies;
912 neigh_connect(neigh);
913 notify = 1;
914 next = neigh->confirmed + neigh->parms->reachable_time;
915 } else {
916 neigh_dbg(2, "neigh %p is probed\n", neigh);
917 neigh->nud_state = NUD_PROBE;
918 neigh->updated = jiffies;
919 atomic_set(&neigh->probes, 0);
920 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
921 }
922 } else {
923 /* NUD_PROBE|NUD_INCOMPLETE */
924 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
925 }
926
927 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
928 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
929 neigh->nud_state = NUD_FAILED;
930 notify = 1;
931 neigh_invalidate(neigh);
932 goto out;
933 }
934
935 if (neigh->nud_state & NUD_IN_TIMER) {
936 if (time_before(next, jiffies + HZ/2))
937 next = jiffies + HZ/2;
938 if (!mod_timer(&neigh->timer, next))
939 neigh_hold(neigh);
940 }
941 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
942 neigh_probe(neigh);
943 } else {
944 out:
945 write_unlock(&neigh->lock);
946 }
947
948 if (notify)
949 neigh_update_notify(neigh);
950
951 neigh_release(neigh);
952 }
953
954 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
955 {
956 int rc;
957 bool immediate_probe = false;
958
959 write_lock_bh(&neigh->lock);
960
961 rc = 0;
962 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
963 goto out_unlock_bh;
964
965 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
966 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
967 NEIGH_VAR(neigh->parms, APP_PROBES)) {
968 unsigned long next, now = jiffies;
969
970 atomic_set(&neigh->probes,
971 NEIGH_VAR(neigh->parms, UCAST_PROBES));
972 neigh->nud_state = NUD_INCOMPLETE;
973 neigh->updated = now;
974 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
975 HZ/2);
976 neigh_add_timer(neigh, next);
977 immediate_probe = true;
978 } else {
979 neigh->nud_state = NUD_FAILED;
980 neigh->updated = jiffies;
981 write_unlock_bh(&neigh->lock);
982
983 kfree_skb(skb);
984 return 1;
985 }
986 } else if (neigh->nud_state & NUD_STALE) {
987 neigh_dbg(2, "neigh %p is delayed\n", neigh);
988 neigh->nud_state = NUD_DELAY;
989 neigh->updated = jiffies;
990 neigh_add_timer(neigh, jiffies +
991 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
992 }
993
994 if (neigh->nud_state == NUD_INCOMPLETE) {
995 if (skb) {
996 while (neigh->arp_queue_len_bytes + skb->truesize >
997 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
998 struct sk_buff *buff;
999
1000 buff = __skb_dequeue(&neigh->arp_queue);
1001 if (!buff)
1002 break;
1003 neigh->arp_queue_len_bytes -= buff->truesize;
1004 kfree_skb(buff);
1005 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1006 }
1007 skb_dst_force(skb);
1008 __skb_queue_tail(&neigh->arp_queue, skb);
1009 neigh->arp_queue_len_bytes += skb->truesize;
1010 }
1011 rc = 1;
1012 }
1013 out_unlock_bh:
1014 if (immediate_probe)
1015 neigh_probe(neigh);
1016 else
1017 write_unlock(&neigh->lock);
1018 local_bh_enable();
1019 return rc;
1020 }
1021 EXPORT_SYMBOL(__neigh_event_send);
1022
1023 static void neigh_update_hhs(struct neighbour *neigh)
1024 {
1025 struct hh_cache *hh;
1026 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1027 = NULL;
1028
1029 if (neigh->dev->header_ops)
1030 update = neigh->dev->header_ops->cache_update;
1031
1032 if (update) {
1033 hh = &neigh->hh;
1034 if (hh->hh_len) {
1035 write_seqlock_bh(&hh->hh_lock);
1036 update(hh, neigh->dev, neigh->ha);
1037 write_sequnlock_bh(&hh->hh_lock);
1038 }
1039 }
1040 }
1041
1042
1043
1044 /* Generic update routine.
1045 -- lladdr is new lladdr or NULL, if it is not supplied.
1046 -- new is new state.
1047 -- flags
1048 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1049 if it is different.
1050 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1051 lladdr instead of overriding it
1052 if it is different.
1053 It also allows to retain current state
1054 if lladdr is unchanged.
1055 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1056
1057 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1058 NTF_ROUTER flag.
1059 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1060 a router.
1061
1062 Caller MUST hold reference count on the entry.
1063 */
1064
1065 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1066 u32 flags)
1067 {
1068 u8 old;
1069 int err;
1070 int notify = 0;
1071 struct net_device *dev;
1072 int update_isrouter = 0;
1073
1074 write_lock_bh(&neigh->lock);
1075
1076 dev = neigh->dev;
1077 old = neigh->nud_state;
1078 err = -EPERM;
1079
1080 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1081 (old & (NUD_NOARP | NUD_PERMANENT)))
1082 goto out;
1083
1084 if (!(new & NUD_VALID)) {
1085 neigh_del_timer(neigh);
1086 if (old & NUD_CONNECTED)
1087 neigh_suspect(neigh);
1088 neigh->nud_state = new;
1089 err = 0;
1090 notify = old & NUD_VALID;
1091 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1092 (new & NUD_FAILED)) {
1093 neigh_invalidate(neigh);
1094 notify = 1;
1095 }
1096 goto out;
1097 }
1098
1099 /* Compare new lladdr with cached one */
1100 if (!dev->addr_len) {
1101 /* First case: device needs no address. */
1102 lladdr = neigh->ha;
1103 } else if (lladdr) {
1104 /* The second case: if something is already cached
1105 and a new address is proposed:
1106 - compare new & old
1107 - if they are different, check override flag
1108 */
1109 if ((old & NUD_VALID) &&
1110 !memcmp(lladdr, neigh->ha, dev->addr_len))
1111 lladdr = neigh->ha;
1112 } else {
1113 /* No address is supplied; if we know something,
1114 use it, otherwise discard the request.
1115 */
1116 err = -EINVAL;
1117 if (!(old & NUD_VALID))
1118 goto out;
1119 lladdr = neigh->ha;
1120 }
1121
1122 if (new & NUD_CONNECTED)
1123 neigh->confirmed = jiffies;
1124 neigh->updated = jiffies;
1125
1126 /* If entry was valid and address is not changed,
1127 do not change entry state, if new one is STALE.
1128 */
1129 err = 0;
1130 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1131 if (old & NUD_VALID) {
1132 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1133 update_isrouter = 0;
1134 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1135 (old & NUD_CONNECTED)) {
1136 lladdr = neigh->ha;
1137 new = NUD_STALE;
1138 } else
1139 goto out;
1140 } else {
1141 if (lladdr == neigh->ha && new == NUD_STALE &&
1142 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1143 (old & NUD_CONNECTED))
1144 )
1145 new = old;
1146 }
1147 }
1148
1149 if (new != old) {
1150 neigh_del_timer(neigh);
1151 if (new & NUD_IN_TIMER)
1152 neigh_add_timer(neigh, (jiffies +
1153 ((new & NUD_REACHABLE) ?
1154 neigh->parms->reachable_time :
1155 0)));
1156 neigh->nud_state = new;
1157 notify = 1;
1158 }
1159
1160 if (lladdr != neigh->ha) {
1161 write_seqlock(&neigh->ha_lock);
1162 memcpy(&neigh->ha, lladdr, dev->addr_len);
1163 write_sequnlock(&neigh->ha_lock);
1164 neigh_update_hhs(neigh);
1165 if (!(new & NUD_CONNECTED))
1166 neigh->confirmed = jiffies -
1167 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1168 notify = 1;
1169 }
1170 if (new == old)
1171 goto out;
1172 if (new & NUD_CONNECTED)
1173 neigh_connect(neigh);
1174 else
1175 neigh_suspect(neigh);
1176 if (!(old & NUD_VALID)) {
1177 struct sk_buff *skb;
1178
1179 /* Again: avoid dead loop if something went wrong */
1180
1181 while (neigh->nud_state & NUD_VALID &&
1182 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1183 struct dst_entry *dst = skb_dst(skb);
1184 struct neighbour *n2, *n1 = neigh;
1185 write_unlock_bh(&neigh->lock);
1186
1187 rcu_read_lock();
1188
1189 /* Why not just use 'neigh' as-is? The problem is that
1190 * things such as shaper, eql, and sch_teql can end up
1191 * using alternative, different, neigh objects to output
1192 * the packet in the output path. So what we need to do
1193 * here is re-lookup the top-level neigh in the path so
1194 * we can reinject the packet there.
1195 */
1196 n2 = NULL;
1197 if (dst) {
1198 n2 = dst_neigh_lookup_skb(dst, skb);
1199 if (n2)
1200 n1 = n2;
1201 }
1202 n1->output(n1, skb);
1203 if (n2)
1204 neigh_release(n2);
1205 rcu_read_unlock();
1206
1207 write_lock_bh(&neigh->lock);
1208 }
1209 __skb_queue_purge(&neigh->arp_queue);
1210 neigh->arp_queue_len_bytes = 0;
1211 }
1212 out:
1213 if (update_isrouter) {
1214 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1215 (neigh->flags | NTF_ROUTER) :
1216 (neigh->flags & ~NTF_ROUTER);
1217 }
1218 write_unlock_bh(&neigh->lock);
1219
1220 if (notify)
1221 neigh_update_notify(neigh);
1222
1223 return err;
1224 }
1225 EXPORT_SYMBOL(neigh_update);
1226
1227 /* Update the neigh to listen temporarily for probe responses, even if it is
1228 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1229 */
1230 void __neigh_set_probe_once(struct neighbour *neigh)
1231 {
1232 neigh->updated = jiffies;
1233 if (!(neigh->nud_state & NUD_FAILED))
1234 return;
1235 neigh->nud_state = NUD_INCOMPLETE;
1236 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1237 neigh_add_timer(neigh,
1238 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1239 }
1240 EXPORT_SYMBOL(__neigh_set_probe_once);
1241
1242 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1243 u8 *lladdr, void *saddr,
1244 struct net_device *dev)
1245 {
1246 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1247 lladdr || !dev->addr_len);
1248 if (neigh)
1249 neigh_update(neigh, lladdr, NUD_STALE,
1250 NEIGH_UPDATE_F_OVERRIDE);
1251 return neigh;
1252 }
1253 EXPORT_SYMBOL(neigh_event_ns);
1254
1255 /* called with read_lock_bh(&n->lock); */
1256 static void neigh_hh_init(struct neighbour *n)
1257 {
1258 struct net_device *dev = n->dev;
1259 __be16 prot = n->tbl->protocol;
1260 struct hh_cache *hh = &n->hh;
1261
1262 write_lock_bh(&n->lock);
1263
1264 /* Only one thread can come in here and initialize the
1265 * hh_cache entry.
1266 */
1267 if (!hh->hh_len)
1268 dev->header_ops->cache(n, hh, prot);
1269
1270 write_unlock_bh(&n->lock);
1271 }
1272
1273 /* Slow and careful. */
1274
1275 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1276 {
1277 int rc = 0;
1278
1279 if (!neigh_event_send(neigh, skb)) {
1280 int err;
1281 struct net_device *dev = neigh->dev;
1282 unsigned int seq;
1283
1284 if (dev->header_ops->cache && !neigh->hh.hh_len)
1285 neigh_hh_init(neigh);
1286
1287 do {
1288 __skb_pull(skb, skb_network_offset(skb));
1289 seq = read_seqbegin(&neigh->ha_lock);
1290 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1291 neigh->ha, NULL, skb->len);
1292 } while (read_seqretry(&neigh->ha_lock, seq));
1293
1294 if (err >= 0)
1295 rc = dev_queue_xmit(skb);
1296 else
1297 goto out_kfree_skb;
1298 }
1299 out:
1300 return rc;
1301 out_kfree_skb:
1302 rc = -EINVAL;
1303 kfree_skb(skb);
1304 goto out;
1305 }
1306 EXPORT_SYMBOL(neigh_resolve_output);
1307
1308 /* As fast as possible without hh cache */
1309
1310 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1311 {
1312 struct net_device *dev = neigh->dev;
1313 unsigned int seq;
1314 int err;
1315
1316 do {
1317 __skb_pull(skb, skb_network_offset(skb));
1318 seq = read_seqbegin(&neigh->ha_lock);
1319 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1320 neigh->ha, NULL, skb->len);
1321 } while (read_seqretry(&neigh->ha_lock, seq));
1322
1323 if (err >= 0)
1324 err = dev_queue_xmit(skb);
1325 else {
1326 err = -EINVAL;
1327 kfree_skb(skb);
1328 }
1329 return err;
1330 }
1331 EXPORT_SYMBOL(neigh_connected_output);
1332
1333 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1334 {
1335 return dev_queue_xmit(skb);
1336 }
1337 EXPORT_SYMBOL(neigh_direct_output);
1338
1339 static void neigh_proxy_process(unsigned long arg)
1340 {
1341 struct neigh_table *tbl = (struct neigh_table *)arg;
1342 long sched_next = 0;
1343 unsigned long now = jiffies;
1344 struct sk_buff *skb, *n;
1345
1346 spin_lock(&tbl->proxy_queue.lock);
1347
1348 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1349 long tdif = NEIGH_CB(skb)->sched_next - now;
1350
1351 if (tdif <= 0) {
1352 struct net_device *dev = skb->dev;
1353
1354 __skb_unlink(skb, &tbl->proxy_queue);
1355 if (tbl->proxy_redo && netif_running(dev)) {
1356 rcu_read_lock();
1357 tbl->proxy_redo(skb);
1358 rcu_read_unlock();
1359 } else {
1360 kfree_skb(skb);
1361 }
1362
1363 dev_put(dev);
1364 } else if (!sched_next || tdif < sched_next)
1365 sched_next = tdif;
1366 }
1367 del_timer(&tbl->proxy_timer);
1368 if (sched_next)
1369 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1370 spin_unlock(&tbl->proxy_queue.lock);
1371 }
1372
1373 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1374 struct sk_buff *skb)
1375 {
1376 unsigned long now = jiffies;
1377
1378 unsigned long sched_next = now + (prandom_u32() %
1379 NEIGH_VAR(p, PROXY_DELAY));
1380
1381 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1382 kfree_skb(skb);
1383 return;
1384 }
1385
1386 NEIGH_CB(skb)->sched_next = sched_next;
1387 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1388
1389 spin_lock(&tbl->proxy_queue.lock);
1390 if (del_timer(&tbl->proxy_timer)) {
1391 if (time_before(tbl->proxy_timer.expires, sched_next))
1392 sched_next = tbl->proxy_timer.expires;
1393 }
1394 skb_dst_drop(skb);
1395 dev_hold(skb->dev);
1396 __skb_queue_tail(&tbl->proxy_queue, skb);
1397 mod_timer(&tbl->proxy_timer, sched_next);
1398 spin_unlock(&tbl->proxy_queue.lock);
1399 }
1400 EXPORT_SYMBOL(pneigh_enqueue);
1401
1402 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1403 struct net *net, int ifindex)
1404 {
1405 struct neigh_parms *p;
1406
1407 list_for_each_entry(p, &tbl->parms_list, list) {
1408 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1409 (!p->dev && !ifindex && net_eq(net, &init_net)))
1410 return p;
1411 }
1412
1413 return NULL;
1414 }
1415
1416 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1417 struct neigh_table *tbl)
1418 {
1419 struct neigh_parms *p;
1420 struct net *net = dev_net(dev);
1421 const struct net_device_ops *ops = dev->netdev_ops;
1422
1423 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1424 if (p) {
1425 p->tbl = tbl;
1426 atomic_set(&p->refcnt, 1);
1427 p->reachable_time =
1428 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1429 dev_hold(dev);
1430 p->dev = dev;
1431 write_pnet(&p->net, hold_net(net));
1432 p->sysctl_table = NULL;
1433
1434 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1435 release_net(net);
1436 dev_put(dev);
1437 kfree(p);
1438 return NULL;
1439 }
1440
1441 write_lock_bh(&tbl->lock);
1442 list_add(&p->list, &tbl->parms.list);
1443 write_unlock_bh(&tbl->lock);
1444
1445 neigh_parms_data_state_cleanall(p);
1446 }
1447 return p;
1448 }
1449 EXPORT_SYMBOL(neigh_parms_alloc);
1450
1451 static void neigh_rcu_free_parms(struct rcu_head *head)
1452 {
1453 struct neigh_parms *parms =
1454 container_of(head, struct neigh_parms, rcu_head);
1455
1456 neigh_parms_put(parms);
1457 }
1458
1459 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1460 {
1461 if (!parms || parms == &tbl->parms)
1462 return;
1463 write_lock_bh(&tbl->lock);
1464 list_del(&parms->list);
1465 parms->dead = 1;
1466 write_unlock_bh(&tbl->lock);
1467 if (parms->dev)
1468 dev_put(parms->dev);
1469 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1470 }
1471 EXPORT_SYMBOL(neigh_parms_release);
1472
1473 static void neigh_parms_destroy(struct neigh_parms *parms)
1474 {
1475 release_net(neigh_parms_net(parms));
1476 kfree(parms);
1477 }
1478
1479 static struct lock_class_key neigh_table_proxy_queue_class;
1480
1481 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1482
1483 void neigh_table_init(int index, struct neigh_table *tbl)
1484 {
1485 unsigned long now = jiffies;
1486 unsigned long phsize;
1487
1488 INIT_LIST_HEAD(&tbl->parms_list);
1489 list_add(&tbl->parms.list, &tbl->parms_list);
1490 write_pnet(&tbl->parms.net, &init_net);
1491 atomic_set(&tbl->parms.refcnt, 1);
1492 tbl->parms.reachable_time =
1493 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1494
1495 tbl->stats = alloc_percpu(struct neigh_statistics);
1496 if (!tbl->stats)
1497 panic("cannot create neighbour cache statistics");
1498
1499 #ifdef CONFIG_PROC_FS
1500 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1501 &neigh_stat_seq_fops, tbl))
1502 panic("cannot create neighbour proc dir entry");
1503 #endif
1504
1505 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1506
1507 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1508 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1509
1510 if (!tbl->nht || !tbl->phash_buckets)
1511 panic("cannot allocate neighbour cache hashes");
1512
1513 if (!tbl->entry_size)
1514 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1515 tbl->key_len, NEIGH_PRIV_ALIGN);
1516 else
1517 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1518
1519 rwlock_init(&tbl->lock);
1520 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1521 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1522 tbl->parms.reachable_time);
1523 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1524 skb_queue_head_init_class(&tbl->proxy_queue,
1525 &neigh_table_proxy_queue_class);
1526
1527 tbl->last_flush = now;
1528 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1529
1530 neigh_tables[index] = tbl;
1531 }
1532 EXPORT_SYMBOL(neigh_table_init);
1533
1534 int neigh_table_clear(int index, struct neigh_table *tbl)
1535 {
1536 neigh_tables[index] = NULL;
1537 /* It is not clean... Fix it to unload IPv6 module safely */
1538 cancel_delayed_work_sync(&tbl->gc_work);
1539 del_timer_sync(&tbl->proxy_timer);
1540 pneigh_queue_purge(&tbl->proxy_queue);
1541 neigh_ifdown(tbl, NULL);
1542 if (atomic_read(&tbl->entries))
1543 pr_crit("neighbour leakage\n");
1544
1545 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1546 neigh_hash_free_rcu);
1547 tbl->nht = NULL;
1548
1549 kfree(tbl->phash_buckets);
1550 tbl->phash_buckets = NULL;
1551
1552 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1553
1554 free_percpu(tbl->stats);
1555 tbl->stats = NULL;
1556
1557 return 0;
1558 }
1559 EXPORT_SYMBOL(neigh_table_clear);
1560
1561 static struct neigh_table *neigh_find_table(int family)
1562 {
1563 struct neigh_table *tbl = NULL;
1564
1565 switch (family) {
1566 case AF_INET:
1567 tbl = neigh_tables[NEIGH_ARP_TABLE];
1568 break;
1569 case AF_INET6:
1570 tbl = neigh_tables[NEIGH_ND_TABLE];
1571 break;
1572 case AF_DECnet:
1573 tbl = neigh_tables[NEIGH_DN_TABLE];
1574 break;
1575 }
1576
1577 return tbl;
1578 }
1579
1580 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1581 {
1582 struct net *net = sock_net(skb->sk);
1583 struct ndmsg *ndm;
1584 struct nlattr *dst_attr;
1585 struct neigh_table *tbl;
1586 struct neighbour *neigh;
1587 struct net_device *dev = NULL;
1588 int err = -EINVAL;
1589
1590 ASSERT_RTNL();
1591 if (nlmsg_len(nlh) < sizeof(*ndm))
1592 goto out;
1593
1594 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1595 if (dst_attr == NULL)
1596 goto out;
1597
1598 ndm = nlmsg_data(nlh);
1599 if (ndm->ndm_ifindex) {
1600 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1601 if (dev == NULL) {
1602 err = -ENODEV;
1603 goto out;
1604 }
1605 }
1606
1607 tbl = neigh_find_table(ndm->ndm_family);
1608 if (tbl == NULL)
1609 return -EAFNOSUPPORT;
1610
1611 if (nla_len(dst_attr) < tbl->key_len)
1612 goto out;
1613
1614 if (ndm->ndm_flags & NTF_PROXY) {
1615 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1616 goto out;
1617 }
1618
1619 if (dev == NULL)
1620 goto out;
1621
1622 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1623 if (neigh == NULL) {
1624 err = -ENOENT;
1625 goto out;
1626 }
1627
1628 err = neigh_update(neigh, NULL, NUD_FAILED,
1629 NEIGH_UPDATE_F_OVERRIDE |
1630 NEIGH_UPDATE_F_ADMIN);
1631 neigh_release(neigh);
1632
1633 out:
1634 return err;
1635 }
1636
1637 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1638 {
1639 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1640 struct net *net = sock_net(skb->sk);
1641 struct ndmsg *ndm;
1642 struct nlattr *tb[NDA_MAX+1];
1643 struct neigh_table *tbl;
1644 struct net_device *dev = NULL;
1645 struct neighbour *neigh;
1646 void *dst, *lladdr;
1647 int err;
1648
1649 ASSERT_RTNL();
1650 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1651 if (err < 0)
1652 goto out;
1653
1654 err = -EINVAL;
1655 if (tb[NDA_DST] == NULL)
1656 goto out;
1657
1658 ndm = nlmsg_data(nlh);
1659 if (ndm->ndm_ifindex) {
1660 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1661 if (dev == NULL) {
1662 err = -ENODEV;
1663 goto out;
1664 }
1665
1666 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1667 goto out;
1668 }
1669
1670 tbl = neigh_find_table(ndm->ndm_family);
1671 if (tbl == NULL)
1672 return -EAFNOSUPPORT;
1673
1674 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1675 goto out;
1676 dst = nla_data(tb[NDA_DST]);
1677 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1678
1679 if (ndm->ndm_flags & NTF_PROXY) {
1680 struct pneigh_entry *pn;
1681
1682 err = -ENOBUFS;
1683 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1684 if (pn) {
1685 pn->flags = ndm->ndm_flags;
1686 err = 0;
1687 }
1688 goto out;
1689 }
1690
1691 if (dev == NULL)
1692 goto out;
1693
1694 neigh = neigh_lookup(tbl, dst, dev);
1695 if (neigh == NULL) {
1696 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1697 err = -ENOENT;
1698 goto out;
1699 }
1700
1701 neigh = __neigh_lookup_errno(tbl, dst, dev);
1702 if (IS_ERR(neigh)) {
1703 err = PTR_ERR(neigh);
1704 goto out;
1705 }
1706 } else {
1707 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1708 err = -EEXIST;
1709 neigh_release(neigh);
1710 goto out;
1711 }
1712
1713 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1714 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1715 }
1716
1717 if (ndm->ndm_flags & NTF_USE) {
1718 neigh_event_send(neigh, NULL);
1719 err = 0;
1720 } else
1721 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1722 neigh_release(neigh);
1723
1724 out:
1725 return err;
1726 }
1727
1728 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1729 {
1730 struct nlattr *nest;
1731
1732 nest = nla_nest_start(skb, NDTA_PARMS);
1733 if (nest == NULL)
1734 return -ENOBUFS;
1735
1736 if ((parms->dev &&
1737 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1738 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1739 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1740 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1741 /* approximative value for deprecated QUEUE_LEN (in packets) */
1742 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1743 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1744 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1745 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1746 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1747 NEIGH_VAR(parms, UCAST_PROBES)) ||
1748 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1749 NEIGH_VAR(parms, MCAST_PROBES)) ||
1750 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1751 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1752 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1753 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1754 NEIGH_VAR(parms, GC_STALETIME)) ||
1755 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1756 NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1757 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1758 NEIGH_VAR(parms, RETRANS_TIME)) ||
1759 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1760 NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1761 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1762 NEIGH_VAR(parms, PROXY_DELAY)) ||
1763 nla_put_msecs(skb, NDTPA_LOCKTIME,
1764 NEIGH_VAR(parms, LOCKTIME)))
1765 goto nla_put_failure;
1766 return nla_nest_end(skb, nest);
1767
1768 nla_put_failure:
1769 nla_nest_cancel(skb, nest);
1770 return -EMSGSIZE;
1771 }
1772
1773 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1774 u32 pid, u32 seq, int type, int flags)
1775 {
1776 struct nlmsghdr *nlh;
1777 struct ndtmsg *ndtmsg;
1778
1779 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1780 if (nlh == NULL)
1781 return -EMSGSIZE;
1782
1783 ndtmsg = nlmsg_data(nlh);
1784
1785 read_lock_bh(&tbl->lock);
1786 ndtmsg->ndtm_family = tbl->family;
1787 ndtmsg->ndtm_pad1 = 0;
1788 ndtmsg->ndtm_pad2 = 0;
1789
1790 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1791 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1792 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1793 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1794 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1795 goto nla_put_failure;
1796 {
1797 unsigned long now = jiffies;
1798 unsigned int flush_delta = now - tbl->last_flush;
1799 unsigned int rand_delta = now - tbl->last_rand;
1800 struct neigh_hash_table *nht;
1801 struct ndt_config ndc = {
1802 .ndtc_key_len = tbl->key_len,
1803 .ndtc_entry_size = tbl->entry_size,
1804 .ndtc_entries = atomic_read(&tbl->entries),
1805 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1806 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1807 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1808 };
1809
1810 rcu_read_lock_bh();
1811 nht = rcu_dereference_bh(tbl->nht);
1812 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1813 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1814 rcu_read_unlock_bh();
1815
1816 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1817 goto nla_put_failure;
1818 }
1819
1820 {
1821 int cpu;
1822 struct ndt_stats ndst;
1823
1824 memset(&ndst, 0, sizeof(ndst));
1825
1826 for_each_possible_cpu(cpu) {
1827 struct neigh_statistics *st;
1828
1829 st = per_cpu_ptr(tbl->stats, cpu);
1830 ndst.ndts_allocs += st->allocs;
1831 ndst.ndts_destroys += st->destroys;
1832 ndst.ndts_hash_grows += st->hash_grows;
1833 ndst.ndts_res_failed += st->res_failed;
1834 ndst.ndts_lookups += st->lookups;
1835 ndst.ndts_hits += st->hits;
1836 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1837 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1838 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1839 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1840 }
1841
1842 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1843 goto nla_put_failure;
1844 }
1845
1846 BUG_ON(tbl->parms.dev);
1847 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1848 goto nla_put_failure;
1849
1850 read_unlock_bh(&tbl->lock);
1851 nlmsg_end(skb, nlh);
1852 return 0;
1853
1854 nla_put_failure:
1855 read_unlock_bh(&tbl->lock);
1856 nlmsg_cancel(skb, nlh);
1857 return -EMSGSIZE;
1858 }
1859
1860 static int neightbl_fill_param_info(struct sk_buff *skb,
1861 struct neigh_table *tbl,
1862 struct neigh_parms *parms,
1863 u32 pid, u32 seq, int type,
1864 unsigned int flags)
1865 {
1866 struct ndtmsg *ndtmsg;
1867 struct nlmsghdr *nlh;
1868
1869 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1870 if (nlh == NULL)
1871 return -EMSGSIZE;
1872
1873 ndtmsg = nlmsg_data(nlh);
1874
1875 read_lock_bh(&tbl->lock);
1876 ndtmsg->ndtm_family = tbl->family;
1877 ndtmsg->ndtm_pad1 = 0;
1878 ndtmsg->ndtm_pad2 = 0;
1879
1880 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1881 neightbl_fill_parms(skb, parms) < 0)
1882 goto errout;
1883
1884 read_unlock_bh(&tbl->lock);
1885 nlmsg_end(skb, nlh);
1886 return 0;
1887 errout:
1888 read_unlock_bh(&tbl->lock);
1889 nlmsg_cancel(skb, nlh);
1890 return -EMSGSIZE;
1891 }
1892
1893 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1894 [NDTA_NAME] = { .type = NLA_STRING },
1895 [NDTA_THRESH1] = { .type = NLA_U32 },
1896 [NDTA_THRESH2] = { .type = NLA_U32 },
1897 [NDTA_THRESH3] = { .type = NLA_U32 },
1898 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1899 [NDTA_PARMS] = { .type = NLA_NESTED },
1900 };
1901
1902 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1903 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1904 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1905 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1906 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1907 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1908 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1909 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1910 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1911 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1912 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1913 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1914 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1915 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1916 };
1917
1918 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1919 {
1920 struct net *net = sock_net(skb->sk);
1921 struct neigh_table *tbl;
1922 struct ndtmsg *ndtmsg;
1923 struct nlattr *tb[NDTA_MAX+1];
1924 bool found = false;
1925 int err, tidx;
1926
1927 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1928 nl_neightbl_policy);
1929 if (err < 0)
1930 goto errout;
1931
1932 if (tb[NDTA_NAME] == NULL) {
1933 err = -EINVAL;
1934 goto errout;
1935 }
1936
1937 ndtmsg = nlmsg_data(nlh);
1938
1939 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1940 tbl = neigh_tables[tidx];
1941 if (!tbl)
1942 continue;
1943 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1944 continue;
1945 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1946 found = true;
1947 break;
1948 }
1949 }
1950
1951 if (!found)
1952 return -ENOENT;
1953
1954 /*
1955 * We acquire tbl->lock to be nice to the periodic timers and
1956 * make sure they always see a consistent set of values.
1957 */
1958 write_lock_bh(&tbl->lock);
1959
1960 if (tb[NDTA_PARMS]) {
1961 struct nlattr *tbp[NDTPA_MAX+1];
1962 struct neigh_parms *p;
1963 int i, ifindex = 0;
1964
1965 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1966 nl_ntbl_parm_policy);
1967 if (err < 0)
1968 goto errout_tbl_lock;
1969
1970 if (tbp[NDTPA_IFINDEX])
1971 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1972
1973 p = lookup_neigh_parms(tbl, net, ifindex);
1974 if (p == NULL) {
1975 err = -ENOENT;
1976 goto errout_tbl_lock;
1977 }
1978
1979 for (i = 1; i <= NDTPA_MAX; i++) {
1980 if (tbp[i] == NULL)
1981 continue;
1982
1983 switch (i) {
1984 case NDTPA_QUEUE_LEN:
1985 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1986 nla_get_u32(tbp[i]) *
1987 SKB_TRUESIZE(ETH_FRAME_LEN));
1988 break;
1989 case NDTPA_QUEUE_LENBYTES:
1990 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1991 nla_get_u32(tbp[i]));
1992 break;
1993 case NDTPA_PROXY_QLEN:
1994 NEIGH_VAR_SET(p, PROXY_QLEN,
1995 nla_get_u32(tbp[i]));
1996 break;
1997 case NDTPA_APP_PROBES:
1998 NEIGH_VAR_SET(p, APP_PROBES,
1999 nla_get_u32(tbp[i]));
2000 break;
2001 case NDTPA_UCAST_PROBES:
2002 NEIGH_VAR_SET(p, UCAST_PROBES,
2003 nla_get_u32(tbp[i]));
2004 break;
2005 case NDTPA_MCAST_PROBES:
2006 NEIGH_VAR_SET(p, MCAST_PROBES,
2007 nla_get_u32(tbp[i]));
2008 break;
2009 case NDTPA_BASE_REACHABLE_TIME:
2010 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2011 nla_get_msecs(tbp[i]));
2012 /* update reachable_time as well, otherwise, the change will
2013 * only be effective after the next time neigh_periodic_work
2014 * decides to recompute it (can be multiple minutes)
2015 */
2016 p->reachable_time =
2017 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2018 break;
2019 case NDTPA_GC_STALETIME:
2020 NEIGH_VAR_SET(p, GC_STALETIME,
2021 nla_get_msecs(tbp[i]));
2022 break;
2023 case NDTPA_DELAY_PROBE_TIME:
2024 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2025 nla_get_msecs(tbp[i]));
2026 break;
2027 case NDTPA_RETRANS_TIME:
2028 NEIGH_VAR_SET(p, RETRANS_TIME,
2029 nla_get_msecs(tbp[i]));
2030 break;
2031 case NDTPA_ANYCAST_DELAY:
2032 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2033 nla_get_msecs(tbp[i]));
2034 break;
2035 case NDTPA_PROXY_DELAY:
2036 NEIGH_VAR_SET(p, PROXY_DELAY,
2037 nla_get_msecs(tbp[i]));
2038 break;
2039 case NDTPA_LOCKTIME:
2040 NEIGH_VAR_SET(p, LOCKTIME,
2041 nla_get_msecs(tbp[i]));
2042 break;
2043 }
2044 }
2045 }
2046
2047 err = -ENOENT;
2048 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2049 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2050 !net_eq(net, &init_net))
2051 goto errout_tbl_lock;
2052
2053 if (tb[NDTA_THRESH1])
2054 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2055
2056 if (tb[NDTA_THRESH2])
2057 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2058
2059 if (tb[NDTA_THRESH3])
2060 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2061
2062 if (tb[NDTA_GC_INTERVAL])
2063 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2064
2065 err = 0;
2066
2067 errout_tbl_lock:
2068 write_unlock_bh(&tbl->lock);
2069 errout:
2070 return err;
2071 }
2072
2073 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2074 {
2075 struct net *net = sock_net(skb->sk);
2076 int family, tidx, nidx = 0;
2077 int tbl_skip = cb->args[0];
2078 int neigh_skip = cb->args[1];
2079 struct neigh_table *tbl;
2080
2081 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2082
2083 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2084 struct neigh_parms *p;
2085
2086 tbl = neigh_tables[tidx];
2087 if (!tbl)
2088 continue;
2089
2090 if (tidx < tbl_skip || (family && tbl->family != family))
2091 continue;
2092
2093 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2094 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2095 NLM_F_MULTI) < 0)
2096 break;
2097
2098 nidx = 0;
2099 p = list_next_entry(&tbl->parms, list);
2100 list_for_each_entry_from(p, &tbl->parms_list, list) {
2101 if (!net_eq(neigh_parms_net(p), net))
2102 continue;
2103
2104 if (nidx < neigh_skip)
2105 goto next;
2106
2107 if (neightbl_fill_param_info(skb, tbl, p,
2108 NETLINK_CB(cb->skb).portid,
2109 cb->nlh->nlmsg_seq,
2110 RTM_NEWNEIGHTBL,
2111 NLM_F_MULTI) < 0)
2112 goto out;
2113 next:
2114 nidx++;
2115 }
2116
2117 neigh_skip = 0;
2118 }
2119 out:
2120 cb->args[0] = tidx;
2121 cb->args[1] = nidx;
2122
2123 return skb->len;
2124 }
2125
2126 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2127 u32 pid, u32 seq, int type, unsigned int flags)
2128 {
2129 unsigned long now = jiffies;
2130 struct nda_cacheinfo ci;
2131 struct nlmsghdr *nlh;
2132 struct ndmsg *ndm;
2133
2134 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2135 if (nlh == NULL)
2136 return -EMSGSIZE;
2137
2138 ndm = nlmsg_data(nlh);
2139 ndm->ndm_family = neigh->ops->family;
2140 ndm->ndm_pad1 = 0;
2141 ndm->ndm_pad2 = 0;
2142 ndm->ndm_flags = neigh->flags;
2143 ndm->ndm_type = neigh->type;
2144 ndm->ndm_ifindex = neigh->dev->ifindex;
2145
2146 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2147 goto nla_put_failure;
2148
2149 read_lock_bh(&neigh->lock);
2150 ndm->ndm_state = neigh->nud_state;
2151 if (neigh->nud_state & NUD_VALID) {
2152 char haddr[MAX_ADDR_LEN];
2153
2154 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2155 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2156 read_unlock_bh(&neigh->lock);
2157 goto nla_put_failure;
2158 }
2159 }
2160
2161 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2162 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2163 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2164 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2165 read_unlock_bh(&neigh->lock);
2166
2167 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2168 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2169 goto nla_put_failure;
2170
2171 nlmsg_end(skb, nlh);
2172 return 0;
2173
2174 nla_put_failure:
2175 nlmsg_cancel(skb, nlh);
2176 return -EMSGSIZE;
2177 }
2178
2179 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2180 u32 pid, u32 seq, int type, unsigned int flags,
2181 struct neigh_table *tbl)
2182 {
2183 struct nlmsghdr *nlh;
2184 struct ndmsg *ndm;
2185
2186 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2187 if (nlh == NULL)
2188 return -EMSGSIZE;
2189
2190 ndm = nlmsg_data(nlh);
2191 ndm->ndm_family = tbl->family;
2192 ndm->ndm_pad1 = 0;
2193 ndm->ndm_pad2 = 0;
2194 ndm->ndm_flags = pn->flags | NTF_PROXY;
2195 ndm->ndm_type = RTN_UNICAST;
2196 ndm->ndm_ifindex = pn->dev->ifindex;
2197 ndm->ndm_state = NUD_NONE;
2198
2199 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2200 goto nla_put_failure;
2201
2202 nlmsg_end(skb, nlh);
2203 return 0;
2204
2205 nla_put_failure:
2206 nlmsg_cancel(skb, nlh);
2207 return -EMSGSIZE;
2208 }
2209
2210 static void neigh_update_notify(struct neighbour *neigh)
2211 {
2212 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2213 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2214 }
2215
2216 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2217 struct netlink_callback *cb)
2218 {
2219 struct net *net = sock_net(skb->sk);
2220 struct neighbour *n;
2221 int rc, h, s_h = cb->args[1];
2222 int idx, s_idx = idx = cb->args[2];
2223 struct neigh_hash_table *nht;
2224
2225 rcu_read_lock_bh();
2226 nht = rcu_dereference_bh(tbl->nht);
2227
2228 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2229 if (h > s_h)
2230 s_idx = 0;
2231 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2232 n != NULL;
2233 n = rcu_dereference_bh(n->next)) {
2234 if (!net_eq(dev_net(n->dev), net))
2235 continue;
2236 if (idx < s_idx)
2237 goto next;
2238 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2239 cb->nlh->nlmsg_seq,
2240 RTM_NEWNEIGH,
2241 NLM_F_MULTI) < 0) {
2242 rc = -1;
2243 goto out;
2244 }
2245 next:
2246 idx++;
2247 }
2248 }
2249 rc = skb->len;
2250 out:
2251 rcu_read_unlock_bh();
2252 cb->args[1] = h;
2253 cb->args[2] = idx;
2254 return rc;
2255 }
2256
2257 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2258 struct netlink_callback *cb)
2259 {
2260 struct pneigh_entry *n;
2261 struct net *net = sock_net(skb->sk);
2262 int rc, h, s_h = cb->args[3];
2263 int idx, s_idx = idx = cb->args[4];
2264
2265 read_lock_bh(&tbl->lock);
2266
2267 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2268 if (h > s_h)
2269 s_idx = 0;
2270 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2271 if (dev_net(n->dev) != net)
2272 continue;
2273 if (idx < s_idx)
2274 goto next;
2275 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2276 cb->nlh->nlmsg_seq,
2277 RTM_NEWNEIGH,
2278 NLM_F_MULTI, tbl) < 0) {
2279 read_unlock_bh(&tbl->lock);
2280 rc = -1;
2281 goto out;
2282 }
2283 next:
2284 idx++;
2285 }
2286 }
2287
2288 read_unlock_bh(&tbl->lock);
2289 rc = skb->len;
2290 out:
2291 cb->args[3] = h;
2292 cb->args[4] = idx;
2293 return rc;
2294
2295 }
2296
2297 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2298 {
2299 struct neigh_table *tbl;
2300 int t, family, s_t;
2301 int proxy = 0;
2302 int err;
2303
2304 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2305
2306 /* check for full ndmsg structure presence, family member is
2307 * the same for both structures
2308 */
2309 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2310 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2311 proxy = 1;
2312
2313 s_t = cb->args[0];
2314
2315 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2316 tbl = neigh_tables[t];
2317
2318 if (!tbl)
2319 continue;
2320 if (t < s_t || (family && tbl->family != family))
2321 continue;
2322 if (t > s_t)
2323 memset(&cb->args[1], 0, sizeof(cb->args) -
2324 sizeof(cb->args[0]));
2325 if (proxy)
2326 err = pneigh_dump_table(tbl, skb, cb);
2327 else
2328 err = neigh_dump_table(tbl, skb, cb);
2329 if (err < 0)
2330 break;
2331 }
2332
2333 cb->args[0] = t;
2334 return skb->len;
2335 }
2336
2337 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2338 {
2339 int chain;
2340 struct neigh_hash_table *nht;
2341
2342 rcu_read_lock_bh();
2343 nht = rcu_dereference_bh(tbl->nht);
2344
2345 read_lock(&tbl->lock); /* avoid resizes */
2346 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2347 struct neighbour *n;
2348
2349 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2350 n != NULL;
2351 n = rcu_dereference_bh(n->next))
2352 cb(n, cookie);
2353 }
2354 read_unlock(&tbl->lock);
2355 rcu_read_unlock_bh();
2356 }
2357 EXPORT_SYMBOL(neigh_for_each);
2358
2359 /* The tbl->lock must be held as a writer and BH disabled. */
2360 void __neigh_for_each_release(struct neigh_table *tbl,
2361 int (*cb)(struct neighbour *))
2362 {
2363 int chain;
2364 struct neigh_hash_table *nht;
2365
2366 nht = rcu_dereference_protected(tbl->nht,
2367 lockdep_is_held(&tbl->lock));
2368 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2369 struct neighbour *n;
2370 struct neighbour __rcu **np;
2371
2372 np = &nht->hash_buckets[chain];
2373 while ((n = rcu_dereference_protected(*np,
2374 lockdep_is_held(&tbl->lock))) != NULL) {
2375 int release;
2376
2377 write_lock(&n->lock);
2378 release = cb(n);
2379 if (release) {
2380 rcu_assign_pointer(*np,
2381 rcu_dereference_protected(n->next,
2382 lockdep_is_held(&tbl->lock)));
2383 n->dead = 1;
2384 } else
2385 np = &n->next;
2386 write_unlock(&n->lock);
2387 if (release)
2388 neigh_cleanup_and_release(n);
2389 }
2390 }
2391 }
2392 EXPORT_SYMBOL(__neigh_for_each_release);
2393
2394 int neigh_xmit(int family, struct net_device *dev,
2395 const void *addr, struct sk_buff *skb)
2396 {
2397 int err;
2398 if (family == AF_PACKET) {
2399 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2400 addr, NULL, skb->len);
2401 if (err < 0)
2402 goto out_kfree_skb;
2403 err = dev_queue_xmit(skb);
2404 } else {
2405 struct neigh_table *tbl;
2406 struct neighbour *neigh;
2407
2408 err = -ENETDOWN;
2409 tbl = neigh_find_table(family);
2410 if (!tbl)
2411 goto out;
2412 neigh = __neigh_lookup_noref(tbl, addr, dev);
2413 if (!neigh)
2414 neigh = __neigh_create(tbl, addr, dev, false);
2415 err = PTR_ERR(neigh);
2416 if (IS_ERR(neigh))
2417 goto out_kfree_skb;
2418 err = neigh->output(neigh, skb);
2419 }
2420 out:
2421 return err;
2422 out_kfree_skb:
2423 kfree_skb(skb);
2424 goto out;
2425 }
2426 EXPORT_SYMBOL(neigh_xmit);
2427
2428 #ifdef CONFIG_PROC_FS
2429
2430 static struct neighbour *neigh_get_first(struct seq_file *seq)
2431 {
2432 struct neigh_seq_state *state = seq->private;
2433 struct net *net = seq_file_net(seq);
2434 struct neigh_hash_table *nht = state->nht;
2435 struct neighbour *n = NULL;
2436 int bucket = state->bucket;
2437
2438 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2439 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2440 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2441
2442 while (n) {
2443 if (!net_eq(dev_net(n->dev), net))
2444 goto next;
2445 if (state->neigh_sub_iter) {
2446 loff_t fakep = 0;
2447 void *v;
2448
2449 v = state->neigh_sub_iter(state, n, &fakep);
2450 if (!v)
2451 goto next;
2452 }
2453 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2454 break;
2455 if (n->nud_state & ~NUD_NOARP)
2456 break;
2457 next:
2458 n = rcu_dereference_bh(n->next);
2459 }
2460
2461 if (n)
2462 break;
2463 }
2464 state->bucket = bucket;
2465
2466 return n;
2467 }
2468
2469 static struct neighbour *neigh_get_next(struct seq_file *seq,
2470 struct neighbour *n,
2471 loff_t *pos)
2472 {
2473 struct neigh_seq_state *state = seq->private;
2474 struct net *net = seq_file_net(seq);
2475 struct neigh_hash_table *nht = state->nht;
2476
2477 if (state->neigh_sub_iter) {
2478 void *v = state->neigh_sub_iter(state, n, pos);
2479 if (v)
2480 return n;
2481 }
2482 n = rcu_dereference_bh(n->next);
2483
2484 while (1) {
2485 while (n) {
2486 if (!net_eq(dev_net(n->dev), net))
2487 goto next;
2488 if (state->neigh_sub_iter) {
2489 void *v = state->neigh_sub_iter(state, n, pos);
2490 if (v)
2491 return n;
2492 goto next;
2493 }
2494 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2495 break;
2496
2497 if (n->nud_state & ~NUD_NOARP)
2498 break;
2499 next:
2500 n = rcu_dereference_bh(n->next);
2501 }
2502
2503 if (n)
2504 break;
2505
2506 if (++state->bucket >= (1 << nht->hash_shift))
2507 break;
2508
2509 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2510 }
2511
2512 if (n && pos)
2513 --(*pos);
2514 return n;
2515 }
2516
2517 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2518 {
2519 struct neighbour *n = neigh_get_first(seq);
2520
2521 if (n) {
2522 --(*pos);
2523 while (*pos) {
2524 n = neigh_get_next(seq, n, pos);
2525 if (!n)
2526 break;
2527 }
2528 }
2529 return *pos ? NULL : n;
2530 }
2531
2532 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2533 {
2534 struct neigh_seq_state *state = seq->private;
2535 struct net *net = seq_file_net(seq);
2536 struct neigh_table *tbl = state->tbl;
2537 struct pneigh_entry *pn = NULL;
2538 int bucket = state->bucket;
2539
2540 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2541 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2542 pn = tbl->phash_buckets[bucket];
2543 while (pn && !net_eq(pneigh_net(pn), net))
2544 pn = pn->next;
2545 if (pn)
2546 break;
2547 }
2548 state->bucket = bucket;
2549
2550 return pn;
2551 }
2552
2553 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2554 struct pneigh_entry *pn,
2555 loff_t *pos)
2556 {
2557 struct neigh_seq_state *state = seq->private;
2558 struct net *net = seq_file_net(seq);
2559 struct neigh_table *tbl = state->tbl;
2560
2561 do {
2562 pn = pn->next;
2563 } while (pn && !net_eq(pneigh_net(pn), net));
2564
2565 while (!pn) {
2566 if (++state->bucket > PNEIGH_HASHMASK)
2567 break;
2568 pn = tbl->phash_buckets[state->bucket];
2569 while (pn && !net_eq(pneigh_net(pn), net))
2570 pn = pn->next;
2571 if (pn)
2572 break;
2573 }
2574
2575 if (pn && pos)
2576 --(*pos);
2577
2578 return pn;
2579 }
2580
2581 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2582 {
2583 struct pneigh_entry *pn = pneigh_get_first(seq);
2584
2585 if (pn) {
2586 --(*pos);
2587 while (*pos) {
2588 pn = pneigh_get_next(seq, pn, pos);
2589 if (!pn)
2590 break;
2591 }
2592 }
2593 return *pos ? NULL : pn;
2594 }
2595
2596 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2597 {
2598 struct neigh_seq_state *state = seq->private;
2599 void *rc;
2600 loff_t idxpos = *pos;
2601
2602 rc = neigh_get_idx(seq, &idxpos);
2603 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2604 rc = pneigh_get_idx(seq, &idxpos);
2605
2606 return rc;
2607 }
2608
2609 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2610 __acquires(rcu_bh)
2611 {
2612 struct neigh_seq_state *state = seq->private;
2613
2614 state->tbl = tbl;
2615 state->bucket = 0;
2616 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2617
2618 rcu_read_lock_bh();
2619 state->nht = rcu_dereference_bh(tbl->nht);
2620
2621 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2622 }
2623 EXPORT_SYMBOL(neigh_seq_start);
2624
2625 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2626 {
2627 struct neigh_seq_state *state;
2628 void *rc;
2629
2630 if (v == SEQ_START_TOKEN) {
2631 rc = neigh_get_first(seq);
2632 goto out;
2633 }
2634
2635 state = seq->private;
2636 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2637 rc = neigh_get_next(seq, v, NULL);
2638 if (rc)
2639 goto out;
2640 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2641 rc = pneigh_get_first(seq);
2642 } else {
2643 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2644 rc = pneigh_get_next(seq, v, NULL);
2645 }
2646 out:
2647 ++(*pos);
2648 return rc;
2649 }
2650 EXPORT_SYMBOL(neigh_seq_next);
2651
2652 void neigh_seq_stop(struct seq_file *seq, void *v)
2653 __releases(rcu_bh)
2654 {
2655 rcu_read_unlock_bh();
2656 }
2657 EXPORT_SYMBOL(neigh_seq_stop);
2658
2659 /* statistics via seq_file */
2660
2661 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2662 {
2663 struct neigh_table *tbl = seq->private;
2664 int cpu;
2665
2666 if (*pos == 0)
2667 return SEQ_START_TOKEN;
2668
2669 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2670 if (!cpu_possible(cpu))
2671 continue;
2672 *pos = cpu+1;
2673 return per_cpu_ptr(tbl->stats, cpu);
2674 }
2675 return NULL;
2676 }
2677
2678 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2679 {
2680 struct neigh_table *tbl = seq->private;
2681 int cpu;
2682
2683 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2684 if (!cpu_possible(cpu))
2685 continue;
2686 *pos = cpu+1;
2687 return per_cpu_ptr(tbl->stats, cpu);
2688 }
2689 return NULL;
2690 }
2691
2692 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2693 {
2694
2695 }
2696
2697 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2698 {
2699 struct neigh_table *tbl = seq->private;
2700 struct neigh_statistics *st = v;
2701
2702 if (v == SEQ_START_TOKEN) {
2703 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2704 return 0;
2705 }
2706
2707 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2708 "%08lx %08lx %08lx %08lx %08lx\n",
2709 atomic_read(&tbl->entries),
2710
2711 st->allocs,
2712 st->destroys,
2713 st->hash_grows,
2714
2715 st->lookups,
2716 st->hits,
2717
2718 st->res_failed,
2719
2720 st->rcv_probes_mcast,
2721 st->rcv_probes_ucast,
2722
2723 st->periodic_gc_runs,
2724 st->forced_gc_runs,
2725 st->unres_discards
2726 );
2727
2728 return 0;
2729 }
2730
2731 static const struct seq_operations neigh_stat_seq_ops = {
2732 .start = neigh_stat_seq_start,
2733 .next = neigh_stat_seq_next,
2734 .stop = neigh_stat_seq_stop,
2735 .show = neigh_stat_seq_show,
2736 };
2737
2738 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2739 {
2740 int ret = seq_open(file, &neigh_stat_seq_ops);
2741
2742 if (!ret) {
2743 struct seq_file *sf = file->private_data;
2744 sf->private = PDE_DATA(inode);
2745 }
2746 return ret;
2747 };
2748
2749 static const struct file_operations neigh_stat_seq_fops = {
2750 .owner = THIS_MODULE,
2751 .open = neigh_stat_seq_open,
2752 .read = seq_read,
2753 .llseek = seq_lseek,
2754 .release = seq_release,
2755 };
2756
2757 #endif /* CONFIG_PROC_FS */
2758
2759 static inline size_t neigh_nlmsg_size(void)
2760 {
2761 return NLMSG_ALIGN(sizeof(struct ndmsg))
2762 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2763 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2764 + nla_total_size(sizeof(struct nda_cacheinfo))
2765 + nla_total_size(4); /* NDA_PROBES */
2766 }
2767
2768 static void __neigh_notify(struct neighbour *n, int type, int flags)
2769 {
2770 struct net *net = dev_net(n->dev);
2771 struct sk_buff *skb;
2772 int err = -ENOBUFS;
2773
2774 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2775 if (skb == NULL)
2776 goto errout;
2777
2778 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2779 if (err < 0) {
2780 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2781 WARN_ON(err == -EMSGSIZE);
2782 kfree_skb(skb);
2783 goto errout;
2784 }
2785 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2786 return;
2787 errout:
2788 if (err < 0)
2789 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2790 }
2791
2792 void neigh_app_ns(struct neighbour *n)
2793 {
2794 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2795 }
2796 EXPORT_SYMBOL(neigh_app_ns);
2797
2798 #ifdef CONFIG_SYSCTL
2799 static int zero;
2800 static int int_max = INT_MAX;
2801 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2802
2803 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2804 void __user *buffer, size_t *lenp, loff_t *ppos)
2805 {
2806 int size, ret;
2807 struct ctl_table tmp = *ctl;
2808
2809 tmp.extra1 = &zero;
2810 tmp.extra2 = &unres_qlen_max;
2811 tmp.data = &size;
2812
2813 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2814 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2815
2816 if (write && !ret)
2817 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2818 return ret;
2819 }
2820
2821 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2822 int family)
2823 {
2824 switch (family) {
2825 case AF_INET:
2826 return __in_dev_arp_parms_get_rcu(dev);
2827 case AF_INET6:
2828 return __in6_dev_nd_parms_get_rcu(dev);
2829 }
2830 return NULL;
2831 }
2832
2833 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2834 int index)
2835 {
2836 struct net_device *dev;
2837 int family = neigh_parms_family(p);
2838
2839 rcu_read_lock();
2840 for_each_netdev_rcu(net, dev) {
2841 struct neigh_parms *dst_p =
2842 neigh_get_dev_parms_rcu(dev, family);
2843
2844 if (dst_p && !test_bit(index, dst_p->data_state))
2845 dst_p->data[index] = p->data[index];
2846 }
2847 rcu_read_unlock();
2848 }
2849
2850 static void neigh_proc_update(struct ctl_table *ctl, int write)
2851 {
2852 struct net_device *dev = ctl->extra1;
2853 struct neigh_parms *p = ctl->extra2;
2854 struct net *net = neigh_parms_net(p);
2855 int index = (int *) ctl->data - p->data;
2856
2857 if (!write)
2858 return;
2859
2860 set_bit(index, p->data_state);
2861 if (!dev) /* NULL dev means this is default value */
2862 neigh_copy_dflt_parms(net, p, index);
2863 }
2864
2865 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2866 void __user *buffer,
2867 size_t *lenp, loff_t *ppos)
2868 {
2869 struct ctl_table tmp = *ctl;
2870 int ret;
2871
2872 tmp.extra1 = &zero;
2873 tmp.extra2 = &int_max;
2874
2875 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2876 neigh_proc_update(ctl, write);
2877 return ret;
2878 }
2879
2880 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2881 void __user *buffer, size_t *lenp, loff_t *ppos)
2882 {
2883 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2884
2885 neigh_proc_update(ctl, write);
2886 return ret;
2887 }
2888 EXPORT_SYMBOL(neigh_proc_dointvec);
2889
2890 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2891 void __user *buffer,
2892 size_t *lenp, loff_t *ppos)
2893 {
2894 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2895
2896 neigh_proc_update(ctl, write);
2897 return ret;
2898 }
2899 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2900
2901 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2902 void __user *buffer,
2903 size_t *lenp, loff_t *ppos)
2904 {
2905 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2906
2907 neigh_proc_update(ctl, write);
2908 return ret;
2909 }
2910
2911 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2912 void __user *buffer,
2913 size_t *lenp, loff_t *ppos)
2914 {
2915 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2916
2917 neigh_proc_update(ctl, write);
2918 return ret;
2919 }
2920 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2921
2922 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2923 void __user *buffer,
2924 size_t *lenp, loff_t *ppos)
2925 {
2926 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2927
2928 neigh_proc_update(ctl, write);
2929 return ret;
2930 }
2931
2932 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
2933 void __user *buffer,
2934 size_t *lenp, loff_t *ppos)
2935 {
2936 struct neigh_parms *p = ctl->extra2;
2937 int ret;
2938
2939 if (strcmp(ctl->procname, "base_reachable_time") == 0)
2940 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2941 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
2942 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2943 else
2944 ret = -1;
2945
2946 if (write && ret == 0) {
2947 /* update reachable_time as well, otherwise, the change will
2948 * only be effective after the next time neigh_periodic_work
2949 * decides to recompute it
2950 */
2951 p->reachable_time =
2952 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2953 }
2954 return ret;
2955 }
2956
2957 #define NEIGH_PARMS_DATA_OFFSET(index) \
2958 (&((struct neigh_parms *) 0)->data[index])
2959
2960 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2961 [NEIGH_VAR_ ## attr] = { \
2962 .procname = name, \
2963 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2964 .maxlen = sizeof(int), \
2965 .mode = mval, \
2966 .proc_handler = proc, \
2967 }
2968
2969 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2970 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2971
2972 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2973 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2974
2975 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2976 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2977
2978 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2979 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2980
2981 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2982 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2983
2984 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2985 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2986
2987 static struct neigh_sysctl_table {
2988 struct ctl_table_header *sysctl_header;
2989 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2990 } neigh_sysctl_template __read_mostly = {
2991 .neigh_vars = {
2992 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2993 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2994 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2995 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2996 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2997 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2998 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2999 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3000 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3001 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3002 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3003 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3004 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3005 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3006 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3007 [NEIGH_VAR_GC_INTERVAL] = {
3008 .procname = "gc_interval",
3009 .maxlen = sizeof(int),
3010 .mode = 0644,
3011 .proc_handler = proc_dointvec_jiffies,
3012 },
3013 [NEIGH_VAR_GC_THRESH1] = {
3014 .procname = "gc_thresh1",
3015 .maxlen = sizeof(int),
3016 .mode = 0644,
3017 .extra1 = &zero,
3018 .extra2 = &int_max,
3019 .proc_handler = proc_dointvec_minmax,
3020 },
3021 [NEIGH_VAR_GC_THRESH2] = {
3022 .procname = "gc_thresh2",
3023 .maxlen = sizeof(int),
3024 .mode = 0644,
3025 .extra1 = &zero,
3026 .extra2 = &int_max,
3027 .proc_handler = proc_dointvec_minmax,
3028 },
3029 [NEIGH_VAR_GC_THRESH3] = {
3030 .procname = "gc_thresh3",
3031 .maxlen = sizeof(int),
3032 .mode = 0644,
3033 .extra1 = &zero,
3034 .extra2 = &int_max,
3035 .proc_handler = proc_dointvec_minmax,
3036 },
3037 {},
3038 },
3039 };
3040
3041 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3042 proc_handler *handler)
3043 {
3044 int i;
3045 struct neigh_sysctl_table *t;
3046 const char *dev_name_source;
3047 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3048 char *p_name;
3049
3050 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3051 if (!t)
3052 goto err;
3053
3054 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3055 t->neigh_vars[i].data += (long) p;
3056 t->neigh_vars[i].extra1 = dev;
3057 t->neigh_vars[i].extra2 = p;
3058 }
3059
3060 if (dev) {
3061 dev_name_source = dev->name;
3062 /* Terminate the table early */
3063 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3064 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3065 } else {
3066 struct neigh_table *tbl = p->tbl;
3067 dev_name_source = "default";
3068 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3069 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3070 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3071 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3072 }
3073
3074 if (handler) {
3075 /* RetransTime */
3076 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3077 /* ReachableTime */
3078 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3079 /* RetransTime (in milliseconds)*/
3080 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3081 /* ReachableTime (in milliseconds) */
3082 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3083 } else {
3084 /* Those handlers will update p->reachable_time after
3085 * base_reachable_time(_ms) is set to ensure the new timer starts being
3086 * applied after the next neighbour update instead of waiting for
3087 * neigh_periodic_work to update its value (can be multiple minutes)
3088 * So any handler that replaces them should do this as well
3089 */
3090 /* ReachableTime */
3091 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3092 neigh_proc_base_reachable_time;
3093 /* ReachableTime (in milliseconds) */
3094 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3095 neigh_proc_base_reachable_time;
3096 }
3097
3098 /* Don't export sysctls to unprivileged users */
3099 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3100 t->neigh_vars[0].procname = NULL;
3101
3102 switch (neigh_parms_family(p)) {
3103 case AF_INET:
3104 p_name = "ipv4";
3105 break;
3106 case AF_INET6:
3107 p_name = "ipv6";
3108 break;
3109 default:
3110 BUG();
3111 }
3112
3113 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3114 p_name, dev_name_source);
3115 t->sysctl_header =
3116 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3117 if (!t->sysctl_header)
3118 goto free;
3119
3120 p->sysctl_table = t;
3121 return 0;
3122
3123 free:
3124 kfree(t);
3125 err:
3126 return -ENOBUFS;
3127 }
3128 EXPORT_SYMBOL(neigh_sysctl_register);
3129
3130 void neigh_sysctl_unregister(struct neigh_parms *p)
3131 {
3132 if (p->sysctl_table) {
3133 struct neigh_sysctl_table *t = p->sysctl_table;
3134 p->sysctl_table = NULL;
3135 unregister_net_sysctl_table(t->sysctl_header);
3136 kfree(t);
3137 }
3138 }
3139 EXPORT_SYMBOL(neigh_sysctl_unregister);
3140
3141 #endif /* CONFIG_SYSCTL */
3142
3143 static int __init neigh_init(void)
3144 {
3145 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3146 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3147 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3148
3149 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3150 NULL);
3151 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3152
3153 return 0;
3154 }
3155
3156 subsys_initcall(neigh_init);
3157
This page took 0.110415 seconds and 4 git commands to generate.