b89fb79cb44fb9ed819c192776b525b243b8f895
[deliverable/linux.git] / include / linux / netfilter / ipset / ip_set_ahash.h
1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
3
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
7
8 #define CONCAT(a, b, c) a##b##c
9 #define TOKEN(a, b, c) CONCAT(a, b, c)
10
11 #define type_pf_next TOKEN(TYPE, PF, _elem)
12
13 /* Hashing which uses arrays to resolve clashing. The hash table is resized
14 * (doubled) when searching becomes too long.
15 * Internally jhash is used with the assumption that the size of the
16 * stored data is a multiple of sizeof(u32). If storage supports timeout,
17 * the timeout field must be the last one in the data structure - that field
18 * is ignored when computing the hash key.
19 *
20 * Readers and resizing
21 *
22 * Resizing can be triggered by userspace command only, and those
23 * are serialized by the nfnl mutex. During resizing the set is
24 * read-locked, so the only possible concurrent operations are
25 * the kernel side readers. Those must be protected by proper RCU locking.
26 */
27
28 /* Number of elements to store in an initial array block */
29 #define AHASH_INIT_SIZE 4
30 /* Max number of elements to store in an array block */
31 #define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
32
33 /* Max number of elements can be tuned */
34 #ifdef IP_SET_HASH_WITH_MULTI
35 #define AHASH_MAX(h) ((h)->ahash_max)
36
37 static inline u8
38 tune_ahash_max(u8 curr, u32 multi)
39 {
40 u32 n;
41
42 if (multi < curr)
43 return curr;
44
45 n = curr + AHASH_INIT_SIZE;
46 /* Currently, at listing one hash bucket must fit into a message.
47 * Therefore we have a hard limit here.
48 */
49 return n > curr && n <= 64 ? n : curr;
50 }
51 #define TUNE_AHASH_MAX(h, multi) \
52 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
53 #else
54 #define AHASH_MAX(h) AHASH_MAX_SIZE
55 #define TUNE_AHASH_MAX(h, multi)
56 #endif
57
58 /* A hash bucket */
59 struct hbucket {
60 void *value; /* the array of the values */
61 u8 size; /* size of the array */
62 u8 pos; /* position of the first free entry */
63 };
64
65 /* The hash table: the table size stored here in order to make resizing easy */
66 struct htable {
67 u8 htable_bits; /* size of hash table == 2^htable_bits */
68 struct hbucket bucket[0]; /* hashtable buckets */
69 };
70
71 #define hbucket(h, i) (&((h)->bucket[i]))
72
73 /* Book-keeping of the prefixes added to the set */
74 struct ip_set_hash_nets {
75 u8 cidr; /* the different cidr values in the set */
76 u32 nets; /* number of elements per cidr */
77 };
78
79 /* The generic ip_set hash structure */
80 struct ip_set_hash {
81 struct htable *table; /* the hash table */
82 u32 maxelem; /* max elements in the hash */
83 u32 elements; /* current element (vs timeout) */
84 u32 initval; /* random jhash init value */
85 u32 timeout; /* timeout value, if enabled */
86 struct timer_list gc; /* garbage collection when timeout enabled */
87 struct type_pf_next next; /* temporary storage for uadd */
88 #ifdef IP_SET_HASH_WITH_MULTI
89 u8 ahash_max; /* max elements in an array block */
90 #endif
91 #ifdef IP_SET_HASH_WITH_NETMASK
92 u8 netmask; /* netmask value for subnets to store */
93 #endif
94 #ifdef IP_SET_HASH_WITH_RBTREE
95 struct rb_root rbtree;
96 #endif
97 #ifdef IP_SET_HASH_WITH_NETS
98 struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
99 #endif
100 };
101
102 /* Compute htable_bits from the user input parameter hashsize */
103 static u8
104 htable_bits(u32 hashsize)
105 {
106 /* Assume that hashsize == 2^htable_bits */
107 u8 bits = fls(hashsize - 1);
108 if (jhash_size(bits) != hashsize)
109 /* Round up to the first 2^n value */
110 bits = fls(hashsize);
111
112 return bits;
113 }
114
115 #ifdef IP_SET_HASH_WITH_NETS
116
117 #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
118
119 /* Network cidr size book keeping when the hash stores different
120 * sized networks */
121 static void
122 add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
123 {
124 u8 i;
125
126 ++h->nets[cidr-1].nets;
127
128 pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
129
130 if (h->nets[cidr-1].nets > 1)
131 return;
132
133 /* New cidr size */
134 for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
135 /* Add in increasing prefix order, so larger cidr first */
136 if (h->nets[i].cidr < cidr)
137 swap(h->nets[i].cidr, cidr);
138 }
139 if (i < host_mask)
140 h->nets[i].cidr = cidr;
141 }
142
143 static void
144 del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
145 {
146 u8 i;
147
148 --h->nets[cidr-1].nets;
149
150 pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
151
152 if (h->nets[cidr-1].nets != 0)
153 return;
154
155 /* All entries with this cidr size deleted, so cleanup h->cidr[] */
156 for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
157 if (h->nets[i].cidr == cidr)
158 h->nets[i].cidr = cidr = h->nets[i+1].cidr;
159 }
160 h->nets[i - 1].cidr = 0;
161 }
162 #endif
163
164 /* Destroy the hashtable part of the set */
165 static void
166 ahash_destroy(struct htable *t)
167 {
168 struct hbucket *n;
169 u32 i;
170
171 for (i = 0; i < jhash_size(t->htable_bits); i++) {
172 n = hbucket(t, i);
173 if (n->size)
174 /* FIXME: use slab cache */
175 kfree(n->value);
176 }
177
178 ip_set_free(t);
179 }
180
181 /* Calculate the actual memory size of the set data */
182 static size_t
183 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
184 {
185 u32 i;
186 struct htable *t = h->table;
187 size_t memsize = sizeof(*h)
188 + sizeof(*t)
189 #ifdef IP_SET_HASH_WITH_NETS
190 + sizeof(struct ip_set_hash_nets) * host_mask
191 #endif
192 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
193
194 for (i = 0; i < jhash_size(t->htable_bits); i++)
195 memsize += t->bucket[i].size * dsize;
196
197 return memsize;
198 }
199
200 /* Flush a hash type of set: destroy all elements */
201 static void
202 ip_set_hash_flush(struct ip_set *set)
203 {
204 struct ip_set_hash *h = set->data;
205 struct htable *t = h->table;
206 struct hbucket *n;
207 u32 i;
208
209 for (i = 0; i < jhash_size(t->htable_bits); i++) {
210 n = hbucket(t, i);
211 if (n->size) {
212 n->size = n->pos = 0;
213 /* FIXME: use slab cache */
214 kfree(n->value);
215 }
216 }
217 #ifdef IP_SET_HASH_WITH_NETS
218 memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
219 * SET_HOST_MASK(set->family));
220 #endif
221 h->elements = 0;
222 }
223
224 /* Destroy a hash type of set */
225 static void
226 ip_set_hash_destroy(struct ip_set *set)
227 {
228 struct ip_set_hash *h = set->data;
229
230 if (with_timeout(h->timeout))
231 del_timer_sync(&h->gc);
232
233 ahash_destroy(h->table);
234 #ifdef IP_SET_HASH_WITH_RBTREE
235 rbtree_destroy(&h->rbtree);
236 #endif
237 kfree(h);
238
239 set->data = NULL;
240 }
241
242 #endif /* _IP_SET_AHASH_H */
243
244 #ifndef HKEY_DATALEN
245 #define HKEY_DATALEN sizeof(struct type_pf_elem)
246 #endif
247
248 #define HKEY(data, initval, htable_bits) \
249 (jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
250 & jhash_mask(htable_bits))
251
252 #define CONCAT(a, b, c) a##b##c
253 #define TOKEN(a, b, c) CONCAT(a, b, c)
254
255 /* Type/family dependent function prototypes */
256
257 #define type_pf_data_equal TOKEN(TYPE, PF, _data_equal)
258 #define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull)
259 #define type_pf_data_copy TOKEN(TYPE, PF, _data_copy)
260 #define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out)
261 #define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask)
262 #define type_pf_data_list TOKEN(TYPE, PF, _data_list)
263 #define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist)
264 #define type_pf_data_next TOKEN(TYPE, PF, _data_next)
265
266 #define type_pf_elem TOKEN(TYPE, PF, _elem)
267 #define type_pf_telem TOKEN(TYPE, PF, _telem)
268 #define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout)
269 #define type_pf_data_expired TOKEN(TYPE, PF, _data_expired)
270 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
271
272 #define type_pf_elem_add TOKEN(TYPE, PF, _elem_add)
273 #define type_pf_add TOKEN(TYPE, PF, _add)
274 #define type_pf_del TOKEN(TYPE, PF, _del)
275 #define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs)
276 #define type_pf_test TOKEN(TYPE, PF, _test)
277
278 #define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd)
279 #define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem)
280 #define type_pf_expire TOKEN(TYPE, PF, _expire)
281 #define type_pf_tadd TOKEN(TYPE, PF, _tadd)
282 #define type_pf_tdel TOKEN(TYPE, PF, _tdel)
283 #define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs)
284 #define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest)
285
286 #define type_pf_resize TOKEN(TYPE, PF, _resize)
287 #define type_pf_tresize TOKEN(TYPE, PF, _tresize)
288 #define type_pf_flush ip_set_hash_flush
289 #define type_pf_destroy ip_set_hash_destroy
290 #define type_pf_head TOKEN(TYPE, PF, _head)
291 #define type_pf_list TOKEN(TYPE, PF, _list)
292 #define type_pf_tlist TOKEN(TYPE, PF, _tlist)
293 #define type_pf_same_set TOKEN(TYPE, PF, _same_set)
294 #define type_pf_kadt TOKEN(TYPE, PF, _kadt)
295 #define type_pf_uadt TOKEN(TYPE, PF, _uadt)
296 #define type_pf_gc TOKEN(TYPE, PF, _gc)
297 #define type_pf_gc_init TOKEN(TYPE, PF, _gc_init)
298 #define type_pf_variant TOKEN(TYPE, PF, _variant)
299 #define type_pf_tvariant TOKEN(TYPE, PF, _tvariant)
300
301 /* Flavour without timeout */
302
303 /* Get the ith element from the array block n */
304 #define ahash_data(n, i) \
305 ((struct type_pf_elem *)((n)->value) + (i))
306
307 /* Add an element to the hash table when resizing the set:
308 * we spare the maintenance of the internal counters. */
309 static int
310 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value,
311 u8 ahash_max)
312 {
313 if (n->pos >= n->size) {
314 void *tmp;
315
316 if (n->size >= ahash_max)
317 /* Trigger rehashing */
318 return -EAGAIN;
319
320 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
321 * sizeof(struct type_pf_elem),
322 GFP_ATOMIC);
323 if (!tmp)
324 return -ENOMEM;
325 if (n->size) {
326 memcpy(tmp, n->value,
327 sizeof(struct type_pf_elem) * n->size);
328 kfree(n->value);
329 }
330 n->value = tmp;
331 n->size += AHASH_INIT_SIZE;
332 }
333 type_pf_data_copy(ahash_data(n, n->pos++), value);
334 return 0;
335 }
336
337 /* Resize a hash: create a new hash table with doubling the hashsize
338 * and inserting the elements to it. Repeat until we succeed or
339 * fail due to memory pressures. */
340 static int
341 type_pf_resize(struct ip_set *set, bool retried)
342 {
343 struct ip_set_hash *h = set->data;
344 struct htable *t, *orig = h->table;
345 u8 htable_bits = orig->htable_bits;
346 const struct type_pf_elem *data;
347 struct hbucket *n, *m;
348 u32 i, j;
349 int ret;
350
351 retry:
352 ret = 0;
353 htable_bits++;
354 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
355 set->name, orig->htable_bits, htable_bits, orig);
356 if (!htable_bits)
357 /* In case we have plenty of memory :-) */
358 return -IPSET_ERR_HASH_FULL;
359 t = ip_set_alloc(sizeof(*t)
360 + jhash_size(htable_bits) * sizeof(struct hbucket));
361 if (!t)
362 return -ENOMEM;
363 t->htable_bits = htable_bits;
364
365 read_lock_bh(&set->lock);
366 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
367 n = hbucket(orig, i);
368 for (j = 0; j < n->pos; j++) {
369 data = ahash_data(n, j);
370 m = hbucket(t, HKEY(data, h->initval, htable_bits));
371 ret = type_pf_elem_add(m, data, AHASH_MAX(h));
372 if (ret < 0) {
373 read_unlock_bh(&set->lock);
374 ahash_destroy(t);
375 if (ret == -EAGAIN)
376 goto retry;
377 return ret;
378 }
379 }
380 }
381
382 rcu_assign_pointer(h->table, t);
383 read_unlock_bh(&set->lock);
384
385 /* Give time to other readers of the set */
386 synchronize_rcu_bh();
387
388 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
389 orig->htable_bits, orig, t->htable_bits, t);
390 ahash_destroy(orig);
391
392 return 0;
393 }
394
395 static inline void
396 type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
397
398 /* Add an element to a hash and update the internal counters when succeeded,
399 * otherwise report the proper error code. */
400 static int
401 type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
402 {
403 struct ip_set_hash *h = set->data;
404 struct htable *t;
405 const struct type_pf_elem *d = value;
406 struct hbucket *n;
407 int i, ret = 0;
408 u32 key, multi = 0;
409
410 if (h->elements >= h->maxelem)
411 return -IPSET_ERR_HASH_FULL;
412
413 rcu_read_lock_bh();
414 t = rcu_dereference_bh(h->table);
415 key = HKEY(value, h->initval, t->htable_bits);
416 n = hbucket(t, key);
417 for (i = 0; i < n->pos; i++)
418 if (type_pf_data_equal(ahash_data(n, i), d, &multi)) {
419 ret = -IPSET_ERR_EXIST;
420 goto out;
421 }
422 TUNE_AHASH_MAX(h, multi);
423 ret = type_pf_elem_add(n, value, AHASH_MAX(h));
424 if (ret != 0) {
425 if (ret == -EAGAIN)
426 type_pf_data_next(h, d);
427 goto out;
428 }
429
430 #ifdef IP_SET_HASH_WITH_NETS
431 add_cidr(h, d->cidr, HOST_MASK);
432 #endif
433 h->elements++;
434 out:
435 rcu_read_unlock_bh();
436 return ret;
437 }
438
439 /* Delete an element from the hash: swap it with the last element
440 * and free up space if possible.
441 */
442 static int
443 type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
444 {
445 struct ip_set_hash *h = set->data;
446 struct htable *t = h->table;
447 const struct type_pf_elem *d = value;
448 struct hbucket *n;
449 int i;
450 struct type_pf_elem *data;
451 u32 key, multi = 0;
452
453 key = HKEY(value, h->initval, t->htable_bits);
454 n = hbucket(t, key);
455 for (i = 0; i < n->pos; i++) {
456 data = ahash_data(n, i);
457 if (!type_pf_data_equal(data, d, &multi))
458 continue;
459 if (i != n->pos - 1)
460 /* Not last one */
461 type_pf_data_copy(data, ahash_data(n, n->pos - 1));
462
463 n->pos--;
464 h->elements--;
465 #ifdef IP_SET_HASH_WITH_NETS
466 del_cidr(h, d->cidr, HOST_MASK);
467 #endif
468 if (n->pos + AHASH_INIT_SIZE < n->size) {
469 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
470 * sizeof(struct type_pf_elem),
471 GFP_ATOMIC);
472 if (!tmp)
473 return 0;
474 n->size -= AHASH_INIT_SIZE;
475 memcpy(tmp, n->value,
476 n->size * sizeof(struct type_pf_elem));
477 kfree(n->value);
478 n->value = tmp;
479 }
480 return 0;
481 }
482
483 return -IPSET_ERR_EXIST;
484 }
485
486 #ifdef IP_SET_HASH_WITH_NETS
487
488 /* Special test function which takes into account the different network
489 * sizes added to the set */
490 static int
491 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
492 {
493 struct ip_set_hash *h = set->data;
494 struct htable *t = h->table;
495 struct hbucket *n;
496 const struct type_pf_elem *data;
497 int i, j = 0;
498 u32 key, multi = 0;
499 u8 host_mask = SET_HOST_MASK(set->family);
500
501 pr_debug("test by nets\n");
502 for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
503 type_pf_data_netmask(d, h->nets[j].cidr);
504 key = HKEY(d, h->initval, t->htable_bits);
505 n = hbucket(t, key);
506 for (i = 0; i < n->pos; i++) {
507 data = ahash_data(n, i);
508 if (type_pf_data_equal(data, d, &multi))
509 return 1;
510 }
511 }
512 return 0;
513 }
514 #endif
515
516 /* Test whether the element is added to the set */
517 static int
518 type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
519 {
520 struct ip_set_hash *h = set->data;
521 struct htable *t = h->table;
522 struct type_pf_elem *d = value;
523 struct hbucket *n;
524 const struct type_pf_elem *data;
525 int i;
526 u32 key, multi = 0;
527
528 #ifdef IP_SET_HASH_WITH_NETS
529 /* If we test an IP address and not a network address,
530 * try all possible network sizes */
531 if (d->cidr == SET_HOST_MASK(set->family))
532 return type_pf_test_cidrs(set, d, timeout);
533 #endif
534
535 key = HKEY(d, h->initval, t->htable_bits);
536 n = hbucket(t, key);
537 for (i = 0; i < n->pos; i++) {
538 data = ahash_data(n, i);
539 if (type_pf_data_equal(data, d, &multi))
540 return 1;
541 }
542 return 0;
543 }
544
545 /* Reply a HEADER request: fill out the header part of the set */
546 static int
547 type_pf_head(struct ip_set *set, struct sk_buff *skb)
548 {
549 const struct ip_set_hash *h = set->data;
550 struct nlattr *nested;
551 size_t memsize;
552
553 read_lock_bh(&set->lock);
554 memsize = ahash_memsize(h, with_timeout(h->timeout)
555 ? sizeof(struct type_pf_telem)
556 : sizeof(struct type_pf_elem),
557 set->family == AF_INET ? 32 : 128);
558 read_unlock_bh(&set->lock);
559
560 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
561 if (!nested)
562 goto nla_put_failure;
563 NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
564 htonl(jhash_size(h->table->htable_bits)));
565 NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
566 #ifdef IP_SET_HASH_WITH_NETMASK
567 if (h->netmask != HOST_MASK)
568 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
569 #endif
570 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
571 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
572 if (with_timeout(h->timeout))
573 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
574 ipset_nest_end(skb, nested);
575
576 return 0;
577 nla_put_failure:
578 return -EMSGSIZE;
579 }
580
581 /* Reply a LIST/SAVE request: dump the elements of the specified set */
582 static int
583 type_pf_list(const struct ip_set *set,
584 struct sk_buff *skb, struct netlink_callback *cb)
585 {
586 const struct ip_set_hash *h = set->data;
587 const struct htable *t = h->table;
588 struct nlattr *atd, *nested;
589 const struct hbucket *n;
590 const struct type_pf_elem *data;
591 u32 first = cb->args[2];
592 /* We assume that one hash bucket fills into one page */
593 void *incomplete;
594 int i;
595
596 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
597 if (!atd)
598 return -EMSGSIZE;
599 pr_debug("list hash set %s\n", set->name);
600 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
601 incomplete = skb_tail_pointer(skb);
602 n = hbucket(t, cb->args[2]);
603 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
604 for (i = 0; i < n->pos; i++) {
605 data = ahash_data(n, i);
606 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
607 cb->args[2], n, i, data);
608 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
609 if (!nested) {
610 if (cb->args[2] == first) {
611 nla_nest_cancel(skb, atd);
612 return -EMSGSIZE;
613 } else
614 goto nla_put_failure;
615 }
616 if (type_pf_data_list(skb, data))
617 goto nla_put_failure;
618 ipset_nest_end(skb, nested);
619 }
620 }
621 ipset_nest_end(skb, atd);
622 /* Set listing finished */
623 cb->args[2] = 0;
624
625 return 0;
626
627 nla_put_failure:
628 nlmsg_trim(skb, incomplete);
629 ipset_nest_end(skb, atd);
630 if (unlikely(first == cb->args[2])) {
631 pr_warning("Can't list set %s: one bucket does not fit into "
632 "a message. Please report it!\n", set->name);
633 cb->args[2] = 0;
634 return -EMSGSIZE;
635 }
636 return 0;
637 }
638
639 static int
640 type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
641 const struct xt_action_param *par,
642 enum ipset_adt adt, const struct ip_set_adt_opt *opt);
643 static int
644 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
645 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
646
647 static const struct ip_set_type_variant type_pf_variant = {
648 .kadt = type_pf_kadt,
649 .uadt = type_pf_uadt,
650 .adt = {
651 [IPSET_ADD] = type_pf_add,
652 [IPSET_DEL] = type_pf_del,
653 [IPSET_TEST] = type_pf_test,
654 },
655 .destroy = type_pf_destroy,
656 .flush = type_pf_flush,
657 .head = type_pf_head,
658 .list = type_pf_list,
659 .resize = type_pf_resize,
660 .same_set = type_pf_same_set,
661 };
662
663 /* Flavour with timeout support */
664
665 #define ahash_tdata(n, i) \
666 (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
667
668 static inline u32
669 type_pf_data_timeout(const struct type_pf_elem *data)
670 {
671 const struct type_pf_telem *tdata =
672 (const struct type_pf_telem *) data;
673
674 return tdata->timeout;
675 }
676
677 static inline bool
678 type_pf_data_expired(const struct type_pf_elem *data)
679 {
680 const struct type_pf_telem *tdata =
681 (const struct type_pf_telem *) data;
682
683 return ip_set_timeout_expired(tdata->timeout);
684 }
685
686 static inline void
687 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
688 {
689 struct type_pf_telem *tdata = (struct type_pf_telem *) data;
690
691 tdata->timeout = ip_set_timeout_set(timeout);
692 }
693
694 static int
695 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
696 u8 ahash_max, u32 timeout)
697 {
698 struct type_pf_elem *data;
699
700 if (n->pos >= n->size) {
701 void *tmp;
702
703 if (n->size >= ahash_max)
704 /* Trigger rehashing */
705 return -EAGAIN;
706
707 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
708 * sizeof(struct type_pf_telem),
709 GFP_ATOMIC);
710 if (!tmp)
711 return -ENOMEM;
712 if (n->size) {
713 memcpy(tmp, n->value,
714 sizeof(struct type_pf_telem) * n->size);
715 kfree(n->value);
716 }
717 n->value = tmp;
718 n->size += AHASH_INIT_SIZE;
719 }
720 data = ahash_tdata(n, n->pos++);
721 type_pf_data_copy(data, value);
722 type_pf_data_timeout_set(data, timeout);
723 return 0;
724 }
725
726 /* Delete expired elements from the hashtable */
727 static void
728 type_pf_expire(struct ip_set_hash *h)
729 {
730 struct htable *t = h->table;
731 struct hbucket *n;
732 struct type_pf_elem *data;
733 u32 i;
734 int j;
735
736 for (i = 0; i < jhash_size(t->htable_bits); i++) {
737 n = hbucket(t, i);
738 for (j = 0; j < n->pos; j++) {
739 data = ahash_tdata(n, j);
740 if (type_pf_data_expired(data)) {
741 pr_debug("expired %u/%u\n", i, j);
742 #ifdef IP_SET_HASH_WITH_NETS
743 del_cidr(h, data->cidr, HOST_MASK);
744 #endif
745 if (j != n->pos - 1)
746 /* Not last one */
747 type_pf_data_copy(data,
748 ahash_tdata(n, n->pos - 1));
749 n->pos--;
750 h->elements--;
751 }
752 }
753 if (n->pos + AHASH_INIT_SIZE < n->size) {
754 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
755 * sizeof(struct type_pf_telem),
756 GFP_ATOMIC);
757 if (!tmp)
758 /* Still try to delete expired elements */
759 continue;
760 n->size -= AHASH_INIT_SIZE;
761 memcpy(tmp, n->value,
762 n->size * sizeof(struct type_pf_telem));
763 kfree(n->value);
764 n->value = tmp;
765 }
766 }
767 }
768
769 static int
770 type_pf_tresize(struct ip_set *set, bool retried)
771 {
772 struct ip_set_hash *h = set->data;
773 struct htable *t, *orig = h->table;
774 u8 htable_bits = orig->htable_bits;
775 const struct type_pf_elem *data;
776 struct hbucket *n, *m;
777 u32 i, j;
778 int ret;
779
780 /* Try to cleanup once */
781 if (!retried) {
782 i = h->elements;
783 write_lock_bh(&set->lock);
784 type_pf_expire(set->data);
785 write_unlock_bh(&set->lock);
786 if (h->elements < i)
787 return 0;
788 }
789
790 retry:
791 ret = 0;
792 htable_bits++;
793 if (!htable_bits)
794 /* In case we have plenty of memory :-) */
795 return -IPSET_ERR_HASH_FULL;
796 t = ip_set_alloc(sizeof(*t)
797 + jhash_size(htable_bits) * sizeof(struct hbucket));
798 if (!t)
799 return -ENOMEM;
800 t->htable_bits = htable_bits;
801
802 read_lock_bh(&set->lock);
803 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
804 n = hbucket(orig, i);
805 for (j = 0; j < n->pos; j++) {
806 data = ahash_tdata(n, j);
807 m = hbucket(t, HKEY(data, h->initval, htable_bits));
808 ret = type_pf_elem_tadd(m, data, AHASH_MAX(h),
809 type_pf_data_timeout(data));
810 if (ret < 0) {
811 read_unlock_bh(&set->lock);
812 ahash_destroy(t);
813 if (ret == -EAGAIN)
814 goto retry;
815 return ret;
816 }
817 }
818 }
819
820 rcu_assign_pointer(h->table, t);
821 read_unlock_bh(&set->lock);
822
823 /* Give time to other readers of the set */
824 synchronize_rcu_bh();
825
826 ahash_destroy(orig);
827
828 return 0;
829 }
830
831 static int
832 type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
833 {
834 struct ip_set_hash *h = set->data;
835 struct htable *t = h->table;
836 const struct type_pf_elem *d = value;
837 struct hbucket *n;
838 struct type_pf_elem *data;
839 int ret = 0, i, j = AHASH_MAX(h) + 1;
840 bool flag_exist = flags & IPSET_FLAG_EXIST;
841 u32 key, multi = 0;
842
843 if (h->elements >= h->maxelem)
844 /* FIXME: when set is full, we slow down here */
845 type_pf_expire(h);
846 if (h->elements >= h->maxelem)
847 return -IPSET_ERR_HASH_FULL;
848
849 rcu_read_lock_bh();
850 t = rcu_dereference_bh(h->table);
851 key = HKEY(d, h->initval, t->htable_bits);
852 n = hbucket(t, key);
853 for (i = 0; i < n->pos; i++) {
854 data = ahash_tdata(n, i);
855 if (type_pf_data_equal(data, d, &multi)) {
856 if (type_pf_data_expired(data) || flag_exist)
857 j = i;
858 else {
859 ret = -IPSET_ERR_EXIST;
860 goto out;
861 }
862 } else if (j == AHASH_MAX(h) + 1 &&
863 type_pf_data_expired(data))
864 j = i;
865 }
866 if (j != AHASH_MAX(h) + 1) {
867 data = ahash_tdata(n, j);
868 #ifdef IP_SET_HASH_WITH_NETS
869 del_cidr(h, data->cidr, HOST_MASK);
870 add_cidr(h, d->cidr, HOST_MASK);
871 #endif
872 type_pf_data_copy(data, d);
873 type_pf_data_timeout_set(data, timeout);
874 goto out;
875 }
876 TUNE_AHASH_MAX(h, multi);
877 ret = type_pf_elem_tadd(n, d, AHASH_MAX(h), timeout);
878 if (ret != 0) {
879 if (ret == -EAGAIN)
880 type_pf_data_next(h, d);
881 goto out;
882 }
883
884 #ifdef IP_SET_HASH_WITH_NETS
885 add_cidr(h, d->cidr, HOST_MASK);
886 #endif
887 h->elements++;
888 out:
889 rcu_read_unlock_bh();
890 return ret;
891 }
892
893 static int
894 type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
895 {
896 struct ip_set_hash *h = set->data;
897 struct htable *t = h->table;
898 const struct type_pf_elem *d = value;
899 struct hbucket *n;
900 int i;
901 struct type_pf_elem *data;
902 u32 key, multi = 0;
903
904 key = HKEY(value, h->initval, t->htable_bits);
905 n = hbucket(t, key);
906 for (i = 0; i < n->pos; i++) {
907 data = ahash_tdata(n, i);
908 if (!type_pf_data_equal(data, d, &multi))
909 continue;
910 if (type_pf_data_expired(data))
911 return -IPSET_ERR_EXIST;
912 if (i != n->pos - 1)
913 /* Not last one */
914 type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
915
916 n->pos--;
917 h->elements--;
918 #ifdef IP_SET_HASH_WITH_NETS
919 del_cidr(h, d->cidr, HOST_MASK);
920 #endif
921 if (n->pos + AHASH_INIT_SIZE < n->size) {
922 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
923 * sizeof(struct type_pf_telem),
924 GFP_ATOMIC);
925 if (!tmp)
926 return 0;
927 n->size -= AHASH_INIT_SIZE;
928 memcpy(tmp, n->value,
929 n->size * sizeof(struct type_pf_telem));
930 kfree(n->value);
931 n->value = tmp;
932 }
933 return 0;
934 }
935
936 return -IPSET_ERR_EXIST;
937 }
938
939 #ifdef IP_SET_HASH_WITH_NETS
940 static int
941 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
942 {
943 struct ip_set_hash *h = set->data;
944 struct htable *t = h->table;
945 struct type_pf_elem *data;
946 struct hbucket *n;
947 int i, j = 0;
948 u32 key, multi = 0;
949 u8 host_mask = SET_HOST_MASK(set->family);
950
951 for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
952 type_pf_data_netmask(d, h->nets[j].cidr);
953 key = HKEY(d, h->initval, t->htable_bits);
954 n = hbucket(t, key);
955 for (i = 0; i < n->pos; i++) {
956 data = ahash_tdata(n, i);
957 if (type_pf_data_equal(data, d, &multi))
958 return !type_pf_data_expired(data);
959 }
960 }
961 return 0;
962 }
963 #endif
964
965 static int
966 type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
967 {
968 struct ip_set_hash *h = set->data;
969 struct htable *t = h->table;
970 struct type_pf_elem *data, *d = value;
971 struct hbucket *n;
972 int i;
973 u32 key, multi = 0;
974
975 #ifdef IP_SET_HASH_WITH_NETS
976 if (d->cidr == SET_HOST_MASK(set->family))
977 return type_pf_ttest_cidrs(set, d, timeout);
978 #endif
979 key = HKEY(d, h->initval, t->htable_bits);
980 n = hbucket(t, key);
981 for (i = 0; i < n->pos; i++) {
982 data = ahash_tdata(n, i);
983 if (type_pf_data_equal(data, d, &multi))
984 return !type_pf_data_expired(data);
985 }
986 return 0;
987 }
988
989 static int
990 type_pf_tlist(const struct ip_set *set,
991 struct sk_buff *skb, struct netlink_callback *cb)
992 {
993 const struct ip_set_hash *h = set->data;
994 const struct htable *t = h->table;
995 struct nlattr *atd, *nested;
996 const struct hbucket *n;
997 const struct type_pf_elem *data;
998 u32 first = cb->args[2];
999 /* We assume that one hash bucket fills into one page */
1000 void *incomplete;
1001 int i;
1002
1003 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1004 if (!atd)
1005 return -EMSGSIZE;
1006 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
1007 incomplete = skb_tail_pointer(skb);
1008 n = hbucket(t, cb->args[2]);
1009 for (i = 0; i < n->pos; i++) {
1010 data = ahash_tdata(n, i);
1011 pr_debug("list %p %u\n", n, i);
1012 if (type_pf_data_expired(data))
1013 continue;
1014 pr_debug("do list %p %u\n", n, i);
1015 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1016 if (!nested) {
1017 if (cb->args[2] == first) {
1018 nla_nest_cancel(skb, atd);
1019 return -EMSGSIZE;
1020 } else
1021 goto nla_put_failure;
1022 }
1023 if (type_pf_data_tlist(skb, data))
1024 goto nla_put_failure;
1025 ipset_nest_end(skb, nested);
1026 }
1027 }
1028 ipset_nest_end(skb, atd);
1029 /* Set listing finished */
1030 cb->args[2] = 0;
1031
1032 return 0;
1033
1034 nla_put_failure:
1035 nlmsg_trim(skb, incomplete);
1036 ipset_nest_end(skb, atd);
1037 if (unlikely(first == cb->args[2])) {
1038 pr_warning("Can't list set %s: one bucket does not fit into "
1039 "a message. Please report it!\n", set->name);
1040 cb->args[2] = 0;
1041 return -EMSGSIZE;
1042 }
1043 return 0;
1044 }
1045
1046 static const struct ip_set_type_variant type_pf_tvariant = {
1047 .kadt = type_pf_kadt,
1048 .uadt = type_pf_uadt,
1049 .adt = {
1050 [IPSET_ADD] = type_pf_tadd,
1051 [IPSET_DEL] = type_pf_tdel,
1052 [IPSET_TEST] = type_pf_ttest,
1053 },
1054 .destroy = type_pf_destroy,
1055 .flush = type_pf_flush,
1056 .head = type_pf_head,
1057 .list = type_pf_tlist,
1058 .resize = type_pf_tresize,
1059 .same_set = type_pf_same_set,
1060 };
1061
1062 static void
1063 type_pf_gc(unsigned long ul_set)
1064 {
1065 struct ip_set *set = (struct ip_set *) ul_set;
1066 struct ip_set_hash *h = set->data;
1067
1068 pr_debug("called\n");
1069 write_lock_bh(&set->lock);
1070 type_pf_expire(h);
1071 write_unlock_bh(&set->lock);
1072
1073 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1074 add_timer(&h->gc);
1075 }
1076
1077 static void
1078 type_pf_gc_init(struct ip_set *set)
1079 {
1080 struct ip_set_hash *h = set->data;
1081
1082 init_timer(&h->gc);
1083 h->gc.data = (unsigned long) set;
1084 h->gc.function = type_pf_gc;
1085 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1086 add_timer(&h->gc);
1087 pr_debug("gc initialized, run in every %u\n",
1088 IPSET_GC_PERIOD(h->timeout));
1089 }
1090
1091 #undef HKEY_DATALEN
1092 #undef HKEY
1093 #undef type_pf_data_equal
1094 #undef type_pf_data_isnull
1095 #undef type_pf_data_copy
1096 #undef type_pf_data_zero_out
1097 #undef type_pf_data_list
1098 #undef type_pf_data_tlist
1099
1100 #undef type_pf_elem
1101 #undef type_pf_telem
1102 #undef type_pf_data_timeout
1103 #undef type_pf_data_expired
1104 #undef type_pf_data_netmask
1105 #undef type_pf_data_timeout_set
1106
1107 #undef type_pf_elem_add
1108 #undef type_pf_add
1109 #undef type_pf_del
1110 #undef type_pf_test_cidrs
1111 #undef type_pf_test
1112
1113 #undef type_pf_elem_tadd
1114 #undef type_pf_expire
1115 #undef type_pf_tadd
1116 #undef type_pf_tdel
1117 #undef type_pf_ttest_cidrs
1118 #undef type_pf_ttest
1119
1120 #undef type_pf_resize
1121 #undef type_pf_tresize
1122 #undef type_pf_flush
1123 #undef type_pf_destroy
1124 #undef type_pf_head
1125 #undef type_pf_list
1126 #undef type_pf_tlist
1127 #undef type_pf_same_set
1128 #undef type_pf_kadt
1129 #undef type_pf_uadt
1130 #undef type_pf_gc
1131 #undef type_pf_gc_init
1132 #undef type_pf_variant
1133 #undef type_pf_tvariant
This page took 0.052074 seconds and 4 git commands to generate.