netfilter: ipset: Log warning when a hash type of set gets full
[deliverable/linux.git] / include / linux / netfilter / ipset / ip_set_ahash.h
CommitLineData
6c027889
JK
1#ifndef _IP_SET_AHASH_H
2#define _IP_SET_AHASH_H
3
4#include <linux/rcupdate.h>
5#include <linux/jhash.h>
6#include <linux/netfilter/ipset/ip_set_timeout.h>
7
3d14b171
JK
8#define CONCAT(a, b, c) a##b##c
9#define TOKEN(a, b, c) CONCAT(a, b, c)
10
11#define type_pf_next TOKEN(TYPE, PF, _elem)
12
6c027889
JK
13/* Hashing which uses arrays to resolve clashing. The hash table is resized
14 * (doubled) when searching becomes too long.
15 * Internally jhash is used with the assumption that the size of the
16 * stored data is a multiple of sizeof(u32). If storage supports timeout,
17 * the timeout field must be the last one in the data structure - that field
18 * is ignored when computing the hash key.
19 *
20 * Readers and resizing
21 *
22 * Resizing can be triggered by userspace command only, and those
23 * are serialized by the nfnl mutex. During resizing the set is
24 * read-locked, so the only possible concurrent operations are
25 * the kernel side readers. Those must be protected by proper RCU locking.
26 */
27
28/* Number of elements to store in an initial array block */
29#define AHASH_INIT_SIZE 4
30/* Max number of elements to store in an array block */
89dc79b7
JK
31#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
32
33/* Max number of elements can be tuned */
34#ifdef IP_SET_HASH_WITH_MULTI
35#define AHASH_MAX(h) ((h)->ahash_max)
36
37static inline u8
38tune_ahash_max(u8 curr, u32 multi)
39{
40 u32 n;
41
42 if (multi < curr)
43 return curr;
44
45 n = curr + AHASH_INIT_SIZE;
46 /* Currently, at listing one hash bucket must fit into a message.
47 * Therefore we have a hard limit here.
48 */
49 return n > curr && n <= 64 ? n : curr;
50}
51#define TUNE_AHASH_MAX(h, multi) \
52 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
53#else
54#define AHASH_MAX(h) AHASH_MAX_SIZE
55#define TUNE_AHASH_MAX(h, multi)
56#endif
6c027889
JK
57
58/* A hash bucket */
59struct hbucket {
60 void *value; /* the array of the values */
61 u8 size; /* size of the array */
62 u8 pos; /* position of the first free entry */
63};
64
65/* The hash table: the table size stored here in order to make resizing easy */
66struct htable {
67 u8 htable_bits; /* size of hash table == 2^htable_bits */
68 struct hbucket bucket[0]; /* hashtable buckets */
69};
70
15b4d93f 71#define hbucket(h, i) (&((h)->bucket[i]))
6c027889
JK
72
73/* Book-keeping of the prefixes added to the set */
74struct ip_set_hash_nets {
75 u8 cidr; /* the different cidr values in the set */
76 u32 nets; /* number of elements per cidr */
77};
78
79/* The generic ip_set hash structure */
80struct ip_set_hash {
81 struct htable *table; /* the hash table */
82 u32 maxelem; /* max elements in the hash */
83 u32 elements; /* current element (vs timeout) */
84 u32 initval; /* random jhash init value */
85 u32 timeout; /* timeout value, if enabled */
86 struct timer_list gc; /* garbage collection when timeout enabled */
3d14b171 87 struct type_pf_next next; /* temporary storage for uadd */
89dc79b7
JK
88#ifdef IP_SET_HASH_WITH_MULTI
89 u8 ahash_max; /* max elements in an array block */
90#endif
6c027889
JK
91#ifdef IP_SET_HASH_WITH_NETMASK
92 u8 netmask; /* netmask value for subnets to store */
93#endif
e385357a
JK
94#ifdef IP_SET_HASH_WITH_RBTREE
95 struct rb_root rbtree;
96#endif
6c027889
JK
97#ifdef IP_SET_HASH_WITH_NETS
98 struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
99#endif
100};
101
102/* Compute htable_bits from the user input parameter hashsize */
103static u8
104htable_bits(u32 hashsize)
105{
106 /* Assume that hashsize == 2^htable_bits */
107 u8 bits = fls(hashsize - 1);
108 if (jhash_size(bits) != hashsize)
109 /* Round up to the first 2^n value */
110 bits = fls(hashsize);
111
112 return bits;
113}
114
115#ifdef IP_SET_HASH_WITH_NETS
116
117#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
118
119/* Network cidr size book keeping when the hash stores different
120 * sized networks */
121static void
122add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
123{
124 u8 i;
125
126 ++h->nets[cidr-1].nets;
127
128 pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
129
130 if (h->nets[cidr-1].nets > 1)
131 return;
132
133 /* New cidr size */
134 for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
135 /* Add in increasing prefix order, so larger cidr first */
136 if (h->nets[i].cidr < cidr)
137 swap(h->nets[i].cidr, cidr);
138 }
139 if (i < host_mask)
140 h->nets[i].cidr = cidr;
141}
142
143static void
144del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
145{
146 u8 i;
147
148 --h->nets[cidr-1].nets;
149
150 pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
151
152 if (h->nets[cidr-1].nets != 0)
153 return;
154
155 /* All entries with this cidr size deleted, so cleanup h->cidr[] */
156 for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
157 if (h->nets[i].cidr == cidr)
158 h->nets[i].cidr = cidr = h->nets[i+1].cidr;
159 }
160 h->nets[i - 1].cidr = 0;
161}
162#endif
163
164/* Destroy the hashtable part of the set */
165static void
166ahash_destroy(struct htable *t)
167{
168 struct hbucket *n;
169 u32 i;
170
171 for (i = 0; i < jhash_size(t->htable_bits); i++) {
172 n = hbucket(t, i);
173 if (n->size)
174 /* FIXME: use slab cache */
175 kfree(n->value);
176 }
177
178 ip_set_free(t);
179}
180
181/* Calculate the actual memory size of the set data */
182static size_t
183ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
184{
185 u32 i;
186 struct htable *t = h->table;
187 size_t memsize = sizeof(*h)
188 + sizeof(*t)
189#ifdef IP_SET_HASH_WITH_NETS
190 + sizeof(struct ip_set_hash_nets) * host_mask
191#endif
192 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
193
194 for (i = 0; i < jhash_size(t->htable_bits); i++)
195 memsize += t->bucket[i].size * dsize;
196
197 return memsize;
198}
199
200/* Flush a hash type of set: destroy all elements */
201static void
202ip_set_hash_flush(struct ip_set *set)
203{
204 struct ip_set_hash *h = set->data;
205 struct htable *t = h->table;
206 struct hbucket *n;
207 u32 i;
208
209 for (i = 0; i < jhash_size(t->htable_bits); i++) {
210 n = hbucket(t, i);
211 if (n->size) {
212 n->size = n->pos = 0;
213 /* FIXME: use slab cache */
214 kfree(n->value);
215 }
216 }
217#ifdef IP_SET_HASH_WITH_NETS
218 memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
219 * SET_HOST_MASK(set->family));
220#endif
221 h->elements = 0;
222}
223
224/* Destroy a hash type of set */
225static void
226ip_set_hash_destroy(struct ip_set *set)
227{
228 struct ip_set_hash *h = set->data;
229
230 if (with_timeout(h->timeout))
231 del_timer_sync(&h->gc);
232
233 ahash_destroy(h->table);
e385357a
JK
234#ifdef IP_SET_HASH_WITH_RBTREE
235 rbtree_destroy(&h->rbtree);
236#endif
6c027889
JK
237 kfree(h);
238
239 set->data = NULL;
240}
241
6c027889
JK
242#endif /* _IP_SET_AHASH_H */
243
a6a7b759
JK
244#ifndef HKEY_DATALEN
245#define HKEY_DATALEN sizeof(struct type_pf_elem)
246#endif
247
248#define HKEY(data, initval, htable_bits) \
249(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
250 & jhash_mask(htable_bits))
251
6c027889
JK
252#define CONCAT(a, b, c) a##b##c
253#define TOKEN(a, b, c) CONCAT(a, b, c)
254
255/* Type/family dependent function prototypes */
256
257#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal)
258#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull)
259#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy)
260#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out)
261#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask)
262#define type_pf_data_list TOKEN(TYPE, PF, _data_list)
263#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist)
3d14b171 264#define type_pf_data_next TOKEN(TYPE, PF, _data_next)
6c027889
JK
265
266#define type_pf_elem TOKEN(TYPE, PF, _elem)
267#define type_pf_telem TOKEN(TYPE, PF, _telem)
268#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout)
269#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired)
270#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
271
272#define type_pf_elem_add TOKEN(TYPE, PF, _elem_add)
273#define type_pf_add TOKEN(TYPE, PF, _add)
274#define type_pf_del TOKEN(TYPE, PF, _del)
275#define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs)
276#define type_pf_test TOKEN(TYPE, PF, _test)
277
278#define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd)
279#define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem)
280#define type_pf_expire TOKEN(TYPE, PF, _expire)
281#define type_pf_tadd TOKEN(TYPE, PF, _tadd)
282#define type_pf_tdel TOKEN(TYPE, PF, _tdel)
283#define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs)
284#define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest)
285
286#define type_pf_resize TOKEN(TYPE, PF, _resize)
287#define type_pf_tresize TOKEN(TYPE, PF, _tresize)
288#define type_pf_flush ip_set_hash_flush
289#define type_pf_destroy ip_set_hash_destroy
290#define type_pf_head TOKEN(TYPE, PF, _head)
291#define type_pf_list TOKEN(TYPE, PF, _list)
292#define type_pf_tlist TOKEN(TYPE, PF, _tlist)
293#define type_pf_same_set TOKEN(TYPE, PF, _same_set)
294#define type_pf_kadt TOKEN(TYPE, PF, _kadt)
295#define type_pf_uadt TOKEN(TYPE, PF, _uadt)
296#define type_pf_gc TOKEN(TYPE, PF, _gc)
297#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init)
298#define type_pf_variant TOKEN(TYPE, PF, _variant)
299#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant)
300
301/* Flavour without timeout */
302
303/* Get the ith element from the array block n */
304#define ahash_data(n, i) \
305 ((struct type_pf_elem *)((n)->value) + (i))
306
307/* Add an element to the hash table when resizing the set:
308 * we spare the maintenance of the internal counters. */
309static int
89dc79b7
JK
310type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value,
311 u8 ahash_max)
6c027889
JK
312{
313 if (n->pos >= n->size) {
314 void *tmp;
315
89dc79b7 316 if (n->size >= ahash_max)
6c027889
JK
317 /* Trigger rehashing */
318 return -EAGAIN;
319
320 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
321 * sizeof(struct type_pf_elem),
322 GFP_ATOMIC);
323 if (!tmp)
324 return -ENOMEM;
325 if (n->size) {
326 memcpy(tmp, n->value,
327 sizeof(struct type_pf_elem) * n->size);
328 kfree(n->value);
329 }
330 n->value = tmp;
331 n->size += AHASH_INIT_SIZE;
332 }
333 type_pf_data_copy(ahash_data(n, n->pos++), value);
334 return 0;
335}
336
337/* Resize a hash: create a new hash table with doubling the hashsize
338 * and inserting the elements to it. Repeat until we succeed or
339 * fail due to memory pressures. */
340static int
341type_pf_resize(struct ip_set *set, bool retried)
342{
343 struct ip_set_hash *h = set->data;
344 struct htable *t, *orig = h->table;
345 u8 htable_bits = orig->htable_bits;
346 const struct type_pf_elem *data;
347 struct hbucket *n, *m;
348 u32 i, j;
349 int ret;
350
351retry:
352 ret = 0;
353 htable_bits++;
354 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
355 set->name, orig->htable_bits, htable_bits, orig);
0927a1ac 356 if (!htable_bits) {
6c027889 357 /* In case we have plenty of memory :-) */
0927a1ac
JK
358 pr_warning("Cannot increase the hashsize of set %s further\n",
359 set->name);
6c027889 360 return -IPSET_ERR_HASH_FULL;
0927a1ac 361 }
6c027889
JK
362 t = ip_set_alloc(sizeof(*t)
363 + jhash_size(htable_bits) * sizeof(struct hbucket));
364 if (!t)
365 return -ENOMEM;
366 t->htable_bits = htable_bits;
367
368 read_lock_bh(&set->lock);
369 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
370 n = hbucket(orig, i);
371 for (j = 0; j < n->pos; j++) {
372 data = ahash_data(n, j);
373 m = hbucket(t, HKEY(data, h->initval, htable_bits));
89dc79b7 374 ret = type_pf_elem_add(m, data, AHASH_MAX(h));
6c027889
JK
375 if (ret < 0) {
376 read_unlock_bh(&set->lock);
377 ahash_destroy(t);
378 if (ret == -EAGAIN)
379 goto retry;
380 return ret;
381 }
382 }
383 }
384
385 rcu_assign_pointer(h->table, t);
386 read_unlock_bh(&set->lock);
387
388 /* Give time to other readers of the set */
389 synchronize_rcu_bh();
390
391 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
392 orig->htable_bits, orig, t->htable_bits, t);
393 ahash_destroy(orig);
394
395 return 0;
396}
397
0f598f0b 398static inline void
3d14b171
JK
399type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
400
6c027889
JK
401/* Add an element to a hash and update the internal counters when succeeded,
402 * otherwise report the proper error code. */
403static int
5416219e 404type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
6c027889
JK
405{
406 struct ip_set_hash *h = set->data;
407 struct htable *t;
408 const struct type_pf_elem *d = value;
409 struct hbucket *n;
410 int i, ret = 0;
89dc79b7 411 u32 key, multi = 0;
6c027889 412
0927a1ac
JK
413 if (h->elements >= h->maxelem) {
414 if (net_ratelimit())
415 pr_warning("Set %s is full, maxelem %u reached\n",
416 set->name, h->maxelem);
6c027889 417 return -IPSET_ERR_HASH_FULL;
0927a1ac 418 }
6c027889
JK
419
420 rcu_read_lock_bh();
421 t = rcu_dereference_bh(h->table);
422 key = HKEY(value, h->initval, t->htable_bits);
423 n = hbucket(t, key);
424 for (i = 0; i < n->pos; i++)
89dc79b7 425 if (type_pf_data_equal(ahash_data(n, i), d, &multi)) {
6c027889
JK
426 ret = -IPSET_ERR_EXIST;
427 goto out;
428 }
89dc79b7
JK
429 TUNE_AHASH_MAX(h, multi);
430 ret = type_pf_elem_add(n, value, AHASH_MAX(h));
3d14b171
JK
431 if (ret != 0) {
432 if (ret == -EAGAIN)
433 type_pf_data_next(h, d);
6c027889 434 goto out;
3d14b171 435 }
6c027889
JK
436
437#ifdef IP_SET_HASH_WITH_NETS
438 add_cidr(h, d->cidr, HOST_MASK);
439#endif
440 h->elements++;
441out:
442 rcu_read_unlock_bh();
443 return ret;
444}
445
446/* Delete an element from the hash: swap it with the last element
447 * and free up space if possible.
448 */
449static int
5416219e 450type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
6c027889
JK
451{
452 struct ip_set_hash *h = set->data;
453 struct htable *t = h->table;
454 const struct type_pf_elem *d = value;
455 struct hbucket *n;
456 int i;
457 struct type_pf_elem *data;
89dc79b7 458 u32 key, multi = 0;
6c027889
JK
459
460 key = HKEY(value, h->initval, t->htable_bits);
461 n = hbucket(t, key);
462 for (i = 0; i < n->pos; i++) {
463 data = ahash_data(n, i);
89dc79b7 464 if (!type_pf_data_equal(data, d, &multi))
6c027889
JK
465 continue;
466 if (i != n->pos - 1)
467 /* Not last one */
468 type_pf_data_copy(data, ahash_data(n, n->pos - 1));
469
470 n->pos--;
471 h->elements--;
472#ifdef IP_SET_HASH_WITH_NETS
473 del_cidr(h, d->cidr, HOST_MASK);
474#endif
475 if (n->pos + AHASH_INIT_SIZE < n->size) {
476 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
477 * sizeof(struct type_pf_elem),
478 GFP_ATOMIC);
479 if (!tmp)
480 return 0;
481 n->size -= AHASH_INIT_SIZE;
482 memcpy(tmp, n->value,
483 n->size * sizeof(struct type_pf_elem));
484 kfree(n->value);
485 n->value = tmp;
486 }
487 return 0;
488 }
489
490 return -IPSET_ERR_EXIST;
491}
492
493#ifdef IP_SET_HASH_WITH_NETS
494
495/* Special test function which takes into account the different network
496 * sizes added to the set */
497static int
498type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
499{
500 struct ip_set_hash *h = set->data;
501 struct htable *t = h->table;
502 struct hbucket *n;
503 const struct type_pf_elem *data;
504 int i, j = 0;
89dc79b7 505 u32 key, multi = 0;
6c027889
JK
506 u8 host_mask = SET_HOST_MASK(set->family);
507
508 pr_debug("test by nets\n");
89dc79b7 509 for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
6c027889
JK
510 type_pf_data_netmask(d, h->nets[j].cidr);
511 key = HKEY(d, h->initval, t->htable_bits);
512 n = hbucket(t, key);
513 for (i = 0; i < n->pos; i++) {
514 data = ahash_data(n, i);
89dc79b7 515 if (type_pf_data_equal(data, d, &multi))
6c027889
JK
516 return 1;
517 }
518 }
519 return 0;
520}
521#endif
522
523/* Test whether the element is added to the set */
524static int
5416219e 525type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
6c027889
JK
526{
527 struct ip_set_hash *h = set->data;
528 struct htable *t = h->table;
529 struct type_pf_elem *d = value;
530 struct hbucket *n;
531 const struct type_pf_elem *data;
532 int i;
89dc79b7 533 u32 key, multi = 0;
6c027889
JK
534
535#ifdef IP_SET_HASH_WITH_NETS
536 /* If we test an IP address and not a network address,
537 * try all possible network sizes */
538 if (d->cidr == SET_HOST_MASK(set->family))
539 return type_pf_test_cidrs(set, d, timeout);
540#endif
541
542 key = HKEY(d, h->initval, t->htable_bits);
543 n = hbucket(t, key);
544 for (i = 0; i < n->pos; i++) {
545 data = ahash_data(n, i);
89dc79b7 546 if (type_pf_data_equal(data, d, &multi))
6c027889
JK
547 return 1;
548 }
549 return 0;
550}
551
552/* Reply a HEADER request: fill out the header part of the set */
553static int
554type_pf_head(struct ip_set *set, struct sk_buff *skb)
555{
556 const struct ip_set_hash *h = set->data;
557 struct nlattr *nested;
558 size_t memsize;
559
560 read_lock_bh(&set->lock);
561 memsize = ahash_memsize(h, with_timeout(h->timeout)
562 ? sizeof(struct type_pf_telem)
563 : sizeof(struct type_pf_elem),
564 set->family == AF_INET ? 32 : 128);
565 read_unlock_bh(&set->lock);
566
567 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
568 if (!nested)
569 goto nla_put_failure;
570 NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
571 htonl(jhash_size(h->table->htable_bits)));
572 NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
573#ifdef IP_SET_HASH_WITH_NETMASK
574 if (h->netmask != HOST_MASK)
575 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
576#endif
2f9f28b2 577 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
6c027889
JK
578 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
579 if (with_timeout(h->timeout))
580 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
581 ipset_nest_end(skb, nested);
582
583 return 0;
584nla_put_failure:
585 return -EMSGSIZE;
586}
587
588/* Reply a LIST/SAVE request: dump the elements of the specified set */
589static int
590type_pf_list(const struct ip_set *set,
591 struct sk_buff *skb, struct netlink_callback *cb)
592{
593 const struct ip_set_hash *h = set->data;
594 const struct htable *t = h->table;
595 struct nlattr *atd, *nested;
596 const struct hbucket *n;
597 const struct type_pf_elem *data;
598 u32 first = cb->args[2];
599 /* We assume that one hash bucket fills into one page */
600 void *incomplete;
601 int i;
602
603 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
604 if (!atd)
605 return -EMSGSIZE;
606 pr_debug("list hash set %s\n", set->name);
607 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
608 incomplete = skb_tail_pointer(skb);
609 n = hbucket(t, cb->args[2]);
610 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
611 for (i = 0; i < n->pos; i++) {
612 data = ahash_data(n, i);
613 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
614 cb->args[2], n, i, data);
615 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
616 if (!nested) {
617 if (cb->args[2] == first) {
618 nla_nest_cancel(skb, atd);
619 return -EMSGSIZE;
620 } else
621 goto nla_put_failure;
622 }
623 if (type_pf_data_list(skb, data))
624 goto nla_put_failure;
625 ipset_nest_end(skb, nested);
626 }
627 }
628 ipset_nest_end(skb, atd);
629 /* Set listing finished */
630 cb->args[2] = 0;
631
632 return 0;
633
634nla_put_failure:
635 nlmsg_trim(skb, incomplete);
636 ipset_nest_end(skb, atd);
637 if (unlikely(first == cb->args[2])) {
638 pr_warning("Can't list set %s: one bucket does not fit into "
639 "a message. Please report it!\n", set->name);
640 cb->args[2] = 0;
641 return -EMSGSIZE;
642 }
643 return 0;
644}
645
646static int
647type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
b66554cf 648 const struct xt_action_param *par,
ac8cc925 649 enum ipset_adt adt, const struct ip_set_adt_opt *opt);
6c027889
JK
650static int
651type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
3d14b171 652 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
6c027889
JK
653
654static const struct ip_set_type_variant type_pf_variant = {
655 .kadt = type_pf_kadt,
656 .uadt = type_pf_uadt,
657 .adt = {
658 [IPSET_ADD] = type_pf_add,
659 [IPSET_DEL] = type_pf_del,
660 [IPSET_TEST] = type_pf_test,
661 },
662 .destroy = type_pf_destroy,
663 .flush = type_pf_flush,
664 .head = type_pf_head,
665 .list = type_pf_list,
666 .resize = type_pf_resize,
667 .same_set = type_pf_same_set,
668};
669
670/* Flavour with timeout support */
671
672#define ahash_tdata(n, i) \
673 (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
674
675static inline u32
676type_pf_data_timeout(const struct type_pf_elem *data)
677{
678 const struct type_pf_telem *tdata =
679 (const struct type_pf_telem *) data;
680
681 return tdata->timeout;
682}
683
684static inline bool
685type_pf_data_expired(const struct type_pf_elem *data)
686{
687 const struct type_pf_telem *tdata =
688 (const struct type_pf_telem *) data;
689
690 return ip_set_timeout_expired(tdata->timeout);
691}
692
693static inline void
694type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
695{
696 struct type_pf_telem *tdata = (struct type_pf_telem *) data;
697
698 tdata->timeout = ip_set_timeout_set(timeout);
699}
700
701static int
702type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
89dc79b7 703 u8 ahash_max, u32 timeout)
6c027889
JK
704{
705 struct type_pf_elem *data;
706
707 if (n->pos >= n->size) {
708 void *tmp;
709
89dc79b7 710 if (n->size >= ahash_max)
6c027889
JK
711 /* Trigger rehashing */
712 return -EAGAIN;
713
714 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
715 * sizeof(struct type_pf_telem),
716 GFP_ATOMIC);
717 if (!tmp)
718 return -ENOMEM;
719 if (n->size) {
720 memcpy(tmp, n->value,
721 sizeof(struct type_pf_telem) * n->size);
722 kfree(n->value);
723 }
724 n->value = tmp;
725 n->size += AHASH_INIT_SIZE;
726 }
727 data = ahash_tdata(n, n->pos++);
728 type_pf_data_copy(data, value);
729 type_pf_data_timeout_set(data, timeout);
730 return 0;
731}
732
733/* Delete expired elements from the hashtable */
734static void
735type_pf_expire(struct ip_set_hash *h)
736{
737 struct htable *t = h->table;
738 struct hbucket *n;
739 struct type_pf_elem *data;
740 u32 i;
741 int j;
742
743 for (i = 0; i < jhash_size(t->htable_bits); i++) {
744 n = hbucket(t, i);
745 for (j = 0; j < n->pos; j++) {
746 data = ahash_tdata(n, j);
747 if (type_pf_data_expired(data)) {
748 pr_debug("expired %u/%u\n", i, j);
749#ifdef IP_SET_HASH_WITH_NETS
750 del_cidr(h, data->cidr, HOST_MASK);
751#endif
752 if (j != n->pos - 1)
753 /* Not last one */
754 type_pf_data_copy(data,
755 ahash_tdata(n, n->pos - 1));
756 n->pos--;
757 h->elements--;
758 }
759 }
760 if (n->pos + AHASH_INIT_SIZE < n->size) {
761 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
762 * sizeof(struct type_pf_telem),
763 GFP_ATOMIC);
764 if (!tmp)
765 /* Still try to delete expired elements */
766 continue;
767 n->size -= AHASH_INIT_SIZE;
768 memcpy(tmp, n->value,
769 n->size * sizeof(struct type_pf_telem));
770 kfree(n->value);
771 n->value = tmp;
772 }
773 }
774}
775
776static int
777type_pf_tresize(struct ip_set *set, bool retried)
778{
779 struct ip_set_hash *h = set->data;
780 struct htable *t, *orig = h->table;
781 u8 htable_bits = orig->htable_bits;
782 const struct type_pf_elem *data;
783 struct hbucket *n, *m;
784 u32 i, j;
785 int ret;
786
787 /* Try to cleanup once */
788 if (!retried) {
789 i = h->elements;
790 write_lock_bh(&set->lock);
791 type_pf_expire(set->data);
792 write_unlock_bh(&set->lock);
793 if (h->elements < i)
794 return 0;
795 }
796
797retry:
798 ret = 0;
799 htable_bits++;
0927a1ac 800 if (!htable_bits) {
6c027889 801 /* In case we have plenty of memory :-) */
0927a1ac
JK
802 pr_warning("Cannot increase the hashsize of set %s further\n",
803 set->name);
6c027889 804 return -IPSET_ERR_HASH_FULL;
0927a1ac 805 }
6c027889
JK
806 t = ip_set_alloc(sizeof(*t)
807 + jhash_size(htable_bits) * sizeof(struct hbucket));
808 if (!t)
809 return -ENOMEM;
810 t->htable_bits = htable_bits;
811
812 read_lock_bh(&set->lock);
813 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
814 n = hbucket(orig, i);
815 for (j = 0; j < n->pos; j++) {
816 data = ahash_tdata(n, j);
817 m = hbucket(t, HKEY(data, h->initval, htable_bits));
89dc79b7 818 ret = type_pf_elem_tadd(m, data, AHASH_MAX(h),
6c027889
JK
819 type_pf_data_timeout(data));
820 if (ret < 0) {
821 read_unlock_bh(&set->lock);
822 ahash_destroy(t);
823 if (ret == -EAGAIN)
824 goto retry;
825 return ret;
826 }
827 }
828 }
829
830 rcu_assign_pointer(h->table, t);
831 read_unlock_bh(&set->lock);
832
833 /* Give time to other readers of the set */
834 synchronize_rcu_bh();
835
836 ahash_destroy(orig);
837
838 return 0;
839}
840
841static int
5416219e 842type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
6c027889
JK
843{
844 struct ip_set_hash *h = set->data;
845 struct htable *t = h->table;
846 const struct type_pf_elem *d = value;
847 struct hbucket *n;
848 struct type_pf_elem *data;
89dc79b7 849 int ret = 0, i, j = AHASH_MAX(h) + 1;
5416219e 850 bool flag_exist = flags & IPSET_FLAG_EXIST;
89dc79b7 851 u32 key, multi = 0;
6c027889
JK
852
853 if (h->elements >= h->maxelem)
854 /* FIXME: when set is full, we slow down here */
855 type_pf_expire(h);
0927a1ac
JK
856 if (h->elements >= h->maxelem) {
857 if (net_ratelimit())
858 pr_warning("Set %s is full, maxelem %u reached\n",
859 set->name, h->maxelem);
6c027889 860 return -IPSET_ERR_HASH_FULL;
0927a1ac 861 }
6c027889
JK
862
863 rcu_read_lock_bh();
864 t = rcu_dereference_bh(h->table);
865 key = HKEY(d, h->initval, t->htable_bits);
866 n = hbucket(t, key);
867 for (i = 0; i < n->pos; i++) {
868 data = ahash_tdata(n, i);
89dc79b7 869 if (type_pf_data_equal(data, d, &multi)) {
5416219e 870 if (type_pf_data_expired(data) || flag_exist)
6c027889
JK
871 j = i;
872 else {
873 ret = -IPSET_ERR_EXIST;
874 goto out;
875 }
89dc79b7 876 } else if (j == AHASH_MAX(h) + 1 &&
6c027889
JK
877 type_pf_data_expired(data))
878 j = i;
879 }
89dc79b7 880 if (j != AHASH_MAX(h) + 1) {
6c027889
JK
881 data = ahash_tdata(n, j);
882#ifdef IP_SET_HASH_WITH_NETS
883 del_cidr(h, data->cidr, HOST_MASK);
884 add_cidr(h, d->cidr, HOST_MASK);
885#endif
886 type_pf_data_copy(data, d);
887 type_pf_data_timeout_set(data, timeout);
888 goto out;
889 }
89dc79b7
JK
890 TUNE_AHASH_MAX(h, multi);
891 ret = type_pf_elem_tadd(n, d, AHASH_MAX(h), timeout);
3d14b171 892 if (ret != 0) {
c64562ea 893 if (ret == -EAGAIN)
3d14b171 894 type_pf_data_next(h, d);
6c027889 895 goto out;
3d14b171 896 }
6c027889
JK
897
898#ifdef IP_SET_HASH_WITH_NETS
899 add_cidr(h, d->cidr, HOST_MASK);
900#endif
901 h->elements++;
902out:
903 rcu_read_unlock_bh();
904 return ret;
905}
906
907static int
5416219e 908type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
6c027889
JK
909{
910 struct ip_set_hash *h = set->data;
911 struct htable *t = h->table;
912 const struct type_pf_elem *d = value;
913 struct hbucket *n;
b141c242 914 int i;
6c027889 915 struct type_pf_elem *data;
89dc79b7 916 u32 key, multi = 0;
6c027889
JK
917
918 key = HKEY(value, h->initval, t->htable_bits);
919 n = hbucket(t, key);
920 for (i = 0; i < n->pos; i++) {
921 data = ahash_tdata(n, i);
89dc79b7 922 if (!type_pf_data_equal(data, d, &multi))
6c027889
JK
923 continue;
924 if (type_pf_data_expired(data))
b141c242 925 return -IPSET_ERR_EXIST;
6c027889
JK
926 if (i != n->pos - 1)
927 /* Not last one */
928 type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
929
930 n->pos--;
931 h->elements--;
932#ifdef IP_SET_HASH_WITH_NETS
933 del_cidr(h, d->cidr, HOST_MASK);
934#endif
935 if (n->pos + AHASH_INIT_SIZE < n->size) {
936 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
937 * sizeof(struct type_pf_telem),
938 GFP_ATOMIC);
939 if (!tmp)
940 return 0;
941 n->size -= AHASH_INIT_SIZE;
942 memcpy(tmp, n->value,
943 n->size * sizeof(struct type_pf_telem));
944 kfree(n->value);
945 n->value = tmp;
946 }
947 return 0;
948 }
949
950 return -IPSET_ERR_EXIST;
951}
952
953#ifdef IP_SET_HASH_WITH_NETS
954static int
955type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
956{
957 struct ip_set_hash *h = set->data;
958 struct htable *t = h->table;
959 struct type_pf_elem *data;
960 struct hbucket *n;
961 int i, j = 0;
89dc79b7 962 u32 key, multi = 0;
6c027889
JK
963 u8 host_mask = SET_HOST_MASK(set->family);
964
89dc79b7 965 for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
6c027889
JK
966 type_pf_data_netmask(d, h->nets[j].cidr);
967 key = HKEY(d, h->initval, t->htable_bits);
968 n = hbucket(t, key);
969 for (i = 0; i < n->pos; i++) {
970 data = ahash_tdata(n, i);
89dc79b7 971 if (type_pf_data_equal(data, d, &multi))
6c027889
JK
972 return !type_pf_data_expired(data);
973 }
974 }
975 return 0;
976}
977#endif
978
979static int
5416219e 980type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
6c027889
JK
981{
982 struct ip_set_hash *h = set->data;
983 struct htable *t = h->table;
984 struct type_pf_elem *data, *d = value;
985 struct hbucket *n;
986 int i;
89dc79b7 987 u32 key, multi = 0;
6c027889
JK
988
989#ifdef IP_SET_HASH_WITH_NETS
990 if (d->cidr == SET_HOST_MASK(set->family))
991 return type_pf_ttest_cidrs(set, d, timeout);
992#endif
993 key = HKEY(d, h->initval, t->htable_bits);
994 n = hbucket(t, key);
995 for (i = 0; i < n->pos; i++) {
996 data = ahash_tdata(n, i);
89dc79b7 997 if (type_pf_data_equal(data, d, &multi))
6c027889
JK
998 return !type_pf_data_expired(data);
999 }
1000 return 0;
1001}
1002
1003static int
1004type_pf_tlist(const struct ip_set *set,
1005 struct sk_buff *skb, struct netlink_callback *cb)
1006{
1007 const struct ip_set_hash *h = set->data;
1008 const struct htable *t = h->table;
1009 struct nlattr *atd, *nested;
1010 const struct hbucket *n;
1011 const struct type_pf_elem *data;
1012 u32 first = cb->args[2];
1013 /* We assume that one hash bucket fills into one page */
1014 void *incomplete;
1015 int i;
1016
1017 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1018 if (!atd)
1019 return -EMSGSIZE;
1020 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
1021 incomplete = skb_tail_pointer(skb);
1022 n = hbucket(t, cb->args[2]);
1023 for (i = 0; i < n->pos; i++) {
1024 data = ahash_tdata(n, i);
1025 pr_debug("list %p %u\n", n, i);
1026 if (type_pf_data_expired(data))
1027 continue;
1028 pr_debug("do list %p %u\n", n, i);
1029 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1030 if (!nested) {
1031 if (cb->args[2] == first) {
1032 nla_nest_cancel(skb, atd);
1033 return -EMSGSIZE;
1034 } else
1035 goto nla_put_failure;
1036 }
1037 if (type_pf_data_tlist(skb, data))
1038 goto nla_put_failure;
1039 ipset_nest_end(skb, nested);
1040 }
1041 }
1042 ipset_nest_end(skb, atd);
1043 /* Set listing finished */
1044 cb->args[2] = 0;
1045
1046 return 0;
1047
1048nla_put_failure:
1049 nlmsg_trim(skb, incomplete);
1050 ipset_nest_end(skb, atd);
1051 if (unlikely(first == cb->args[2])) {
1052 pr_warning("Can't list set %s: one bucket does not fit into "
1053 "a message. Please report it!\n", set->name);
1054 cb->args[2] = 0;
1055 return -EMSGSIZE;
1056 }
1057 return 0;
1058}
1059
1060static const struct ip_set_type_variant type_pf_tvariant = {
1061 .kadt = type_pf_kadt,
1062 .uadt = type_pf_uadt,
1063 .adt = {
1064 [IPSET_ADD] = type_pf_tadd,
1065 [IPSET_DEL] = type_pf_tdel,
1066 [IPSET_TEST] = type_pf_ttest,
1067 },
1068 .destroy = type_pf_destroy,
1069 .flush = type_pf_flush,
1070 .head = type_pf_head,
1071 .list = type_pf_tlist,
1072 .resize = type_pf_tresize,
1073 .same_set = type_pf_same_set,
1074};
1075
1076static void
1077type_pf_gc(unsigned long ul_set)
1078{
1079 struct ip_set *set = (struct ip_set *) ul_set;
1080 struct ip_set_hash *h = set->data;
1081
1082 pr_debug("called\n");
1083 write_lock_bh(&set->lock);
1084 type_pf_expire(h);
1085 write_unlock_bh(&set->lock);
1086
1087 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1088 add_timer(&h->gc);
1089}
1090
1091static void
1092type_pf_gc_init(struct ip_set *set)
1093{
1094 struct ip_set_hash *h = set->data;
1095
1096 init_timer(&h->gc);
1097 h->gc.data = (unsigned long) set;
1098 h->gc.function = type_pf_gc;
1099 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1100 add_timer(&h->gc);
1101 pr_debug("gc initialized, run in every %u\n",
1102 IPSET_GC_PERIOD(h->timeout));
1103}
1104
a6a7b759
JK
1105#undef HKEY_DATALEN
1106#undef HKEY
6c027889
JK
1107#undef type_pf_data_equal
1108#undef type_pf_data_isnull
1109#undef type_pf_data_copy
1110#undef type_pf_data_zero_out
1111#undef type_pf_data_list
1112#undef type_pf_data_tlist
1113
1114#undef type_pf_elem
1115#undef type_pf_telem
1116#undef type_pf_data_timeout
1117#undef type_pf_data_expired
1118#undef type_pf_data_netmask
1119#undef type_pf_data_timeout_set
1120
1121#undef type_pf_elem_add
1122#undef type_pf_add
1123#undef type_pf_del
1124#undef type_pf_test_cidrs
1125#undef type_pf_test
1126
1127#undef type_pf_elem_tadd
1128#undef type_pf_expire
1129#undef type_pf_tadd
1130#undef type_pf_tdel
1131#undef type_pf_ttest_cidrs
1132#undef type_pf_ttest
1133
1134#undef type_pf_resize
1135#undef type_pf_tresize
1136#undef type_pf_flush
1137#undef type_pf_destroy
1138#undef type_pf_head
1139#undef type_pf_list
1140#undef type_pf_tlist
1141#undef type_pf_same_set
1142#undef type_pf_kadt
1143#undef type_pf_uadt
1144#undef type_pf_gc
1145#undef type_pf_gc_init
1146#undef type_pf_variant
1147#undef type_pf_tvariant
This page took 0.173763 seconds and 5 git commands to generate.