1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
8 #define CONCAT(a, b, c) a##b##c
9 #define TOKEN(a, b, c) CONCAT(a, b, c)
11 #define type_pf_next TOKEN(TYPE, PF, _elem)
13 /* Hashing which uses arrays to resolve clashing. The hash table is resized
14 * (doubled) when searching becomes too long.
15 * Internally jhash is used with the assumption that the size of the
16 * stored data is a multiple of sizeof(u32). If storage supports timeout,
17 * the timeout field must be the last one in the data structure - that field
18 * is ignored when computing the hash key.
20 * Readers and resizing
22 * Resizing can be triggered by userspace command only, and those
23 * are serialized by the nfnl mutex. During resizing the set is
24 * read-locked, so the only possible concurrent operations are
25 * the kernel side readers. Those must be protected by proper RCU locking.
28 /* Number of elements to store in an initial array block */
29 #define AHASH_INIT_SIZE 4
30 /* Max number of elements to store in an array block */
31 #define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
33 /* Max number of elements can be tuned */
34 #ifdef IP_SET_HASH_WITH_MULTI
35 #define AHASH_MAX(h) ((h)->ahash_max)
38 tune_ahash_max(u8 curr
, u32 multi
)
45 n
= curr
+ AHASH_INIT_SIZE
;
46 /* Currently, at listing one hash bucket must fit into a message.
47 * Therefore we have a hard limit here.
49 return n
> curr
&& n
<= 64 ? n
: curr
;
51 #define TUNE_AHASH_MAX(h, multi) \
52 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
54 #define AHASH_MAX(h) AHASH_MAX_SIZE
55 #define TUNE_AHASH_MAX(h, multi)
60 void *value
; /* the array of the values */
61 u8 size
; /* size of the array */
62 u8 pos
; /* position of the first free entry */
65 /* The hash table: the table size stored here in order to make resizing easy */
67 u8 htable_bits
; /* size of hash table == 2^htable_bits */
68 struct hbucket bucket
[0]; /* hashtable buckets */
71 #define hbucket(h, i) (&((h)->bucket[i]))
73 /* Book-keeping of the prefixes added to the set */
74 struct ip_set_hash_nets
{
75 u8 cidr
; /* the different cidr values in the set */
76 u32 nets
; /* number of elements per cidr */
79 /* The generic ip_set hash structure */
81 struct htable
*table
; /* the hash table */
82 u32 maxelem
; /* max elements in the hash */
83 u32 elements
; /* current element (vs timeout) */
84 u32 initval
; /* random jhash init value */
85 u32 timeout
; /* timeout value, if enabled */
86 struct timer_list gc
; /* garbage collection when timeout enabled */
87 struct type_pf_next next
; /* temporary storage for uadd */
88 #ifdef IP_SET_HASH_WITH_MULTI
89 u8 ahash_max
; /* max elements in an array block */
91 #ifdef IP_SET_HASH_WITH_NETMASK
92 u8 netmask
; /* netmask value for subnets to store */
94 #ifdef IP_SET_HASH_WITH_RBTREE
95 struct rb_root rbtree
;
97 #ifdef IP_SET_HASH_WITH_NETS
98 struct ip_set_hash_nets nets
[0]; /* book-keeping of prefixes */
102 /* Compute htable_bits from the user input parameter hashsize */
104 htable_bits(u32 hashsize
)
106 /* Assume that hashsize == 2^htable_bits */
107 u8 bits
= fls(hashsize
- 1);
108 if (jhash_size(bits
) != hashsize
)
109 /* Round up to the first 2^n value */
110 bits
= fls(hashsize
);
115 #ifdef IP_SET_HASH_WITH_NETS
116 #ifdef IP_SET_HASH_WITH_NETS_PACKED
117 /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
118 #define CIDR(cidr) (cidr + 1)
120 #define CIDR(cidr) (cidr)
123 #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
125 /* Network cidr size book keeping when the hash stores different
128 add_cidr(struct ip_set_hash
*h
, u8 cidr
, u8 host_mask
)
132 ++h
->nets
[cidr
-1].nets
;
134 pr_debug("add_cidr added %u: %u\n", cidr
, h
->nets
[cidr
-1].nets
);
136 if (h
->nets
[cidr
-1].nets
> 1)
140 for (i
= 0; i
< host_mask
&& h
->nets
[i
].cidr
; i
++) {
141 /* Add in increasing prefix order, so larger cidr first */
142 if (h
->nets
[i
].cidr
< cidr
)
143 swap(h
->nets
[i
].cidr
, cidr
);
146 h
->nets
[i
].cidr
= cidr
;
150 del_cidr(struct ip_set_hash
*h
, u8 cidr
, u8 host_mask
)
154 --h
->nets
[cidr
-1].nets
;
156 pr_debug("del_cidr deleted %u: %u\n", cidr
, h
->nets
[cidr
-1].nets
);
158 if (h
->nets
[cidr
-1].nets
!= 0)
161 /* All entries with this cidr size deleted, so cleanup h->cidr[] */
162 for (i
= 0; i
< host_mask
- 1 && h
->nets
[i
].cidr
; i
++) {
163 if (h
->nets
[i
].cidr
== cidr
)
164 h
->nets
[i
].cidr
= cidr
= h
->nets
[i
+1].cidr
;
166 h
->nets
[i
- 1].cidr
= 0;
170 /* Destroy the hashtable part of the set */
172 ahash_destroy(struct htable
*t
)
177 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
180 /* FIXME: use slab cache */
187 /* Calculate the actual memory size of the set data */
189 ahash_memsize(const struct ip_set_hash
*h
, size_t dsize
, u8 host_mask
)
192 struct htable
*t
= h
->table
;
193 size_t memsize
= sizeof(*h
)
195 #ifdef IP_SET_HASH_WITH_NETS
196 + sizeof(struct ip_set_hash_nets
) * host_mask
198 + jhash_size(t
->htable_bits
) * sizeof(struct hbucket
);
200 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++)
201 memsize
+= t
->bucket
[i
].size
* dsize
;
206 /* Flush a hash type of set: destroy all elements */
208 ip_set_hash_flush(struct ip_set
*set
)
210 struct ip_set_hash
*h
= set
->data
;
211 struct htable
*t
= h
->table
;
215 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
218 n
->size
= n
->pos
= 0;
219 /* FIXME: use slab cache */
223 #ifdef IP_SET_HASH_WITH_NETS
224 memset(h
->nets
, 0, sizeof(struct ip_set_hash_nets
)
225 * SET_HOST_MASK(set
->family
));
230 /* Destroy a hash type of set */
232 ip_set_hash_destroy(struct ip_set
*set
)
234 struct ip_set_hash
*h
= set
->data
;
236 if (with_timeout(h
->timeout
))
237 del_timer_sync(&h
->gc
);
239 ahash_destroy(h
->table
);
240 #ifdef IP_SET_HASH_WITH_RBTREE
241 rbtree_destroy(&h
->rbtree
);
248 #endif /* _IP_SET_AHASH_H */
251 #define HKEY_DATALEN sizeof(struct type_pf_elem)
254 #define HKEY(data, initval, htable_bits) \
255 (jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
256 & jhash_mask(htable_bits))
258 #define CONCAT(a, b, c) a##b##c
259 #define TOKEN(a, b, c) CONCAT(a, b, c)
261 /* Type/family dependent function prototypes */
263 #define type_pf_data_equal TOKEN(TYPE, PF, _data_equal)
264 #define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull)
265 #define type_pf_data_copy TOKEN(TYPE, PF, _data_copy)
266 #define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out)
267 #define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask)
268 #define type_pf_data_list TOKEN(TYPE, PF, _data_list)
269 #define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist)
270 #define type_pf_data_next TOKEN(TYPE, PF, _data_next)
271 #define type_pf_data_flags TOKEN(TYPE, PF, _data_flags)
272 #ifdef IP_SET_HASH_WITH_NETS
273 #define type_pf_data_match TOKEN(TYPE, PF, _data_match)
275 #define type_pf_data_match(d) 1
278 #define type_pf_elem TOKEN(TYPE, PF, _elem)
279 #define type_pf_telem TOKEN(TYPE, PF, _telem)
280 #define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout)
281 #define type_pf_data_expired TOKEN(TYPE, PF, _data_expired)
282 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
284 #define type_pf_elem_add TOKEN(TYPE, PF, _elem_add)
285 #define type_pf_add TOKEN(TYPE, PF, _add)
286 #define type_pf_del TOKEN(TYPE, PF, _del)
287 #define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs)
288 #define type_pf_test TOKEN(TYPE, PF, _test)
290 #define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd)
291 #define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem)
292 #define type_pf_expire TOKEN(TYPE, PF, _expire)
293 #define type_pf_tadd TOKEN(TYPE, PF, _tadd)
294 #define type_pf_tdel TOKEN(TYPE, PF, _tdel)
295 #define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs)
296 #define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest)
298 #define type_pf_resize TOKEN(TYPE, PF, _resize)
299 #define type_pf_tresize TOKEN(TYPE, PF, _tresize)
300 #define type_pf_flush ip_set_hash_flush
301 #define type_pf_destroy ip_set_hash_destroy
302 #define type_pf_head TOKEN(TYPE, PF, _head)
303 #define type_pf_list TOKEN(TYPE, PF, _list)
304 #define type_pf_tlist TOKEN(TYPE, PF, _tlist)
305 #define type_pf_same_set TOKEN(TYPE, PF, _same_set)
306 #define type_pf_kadt TOKEN(TYPE, PF, _kadt)
307 #define type_pf_uadt TOKEN(TYPE, PF, _uadt)
308 #define type_pf_gc TOKEN(TYPE, PF, _gc)
309 #define type_pf_gc_init TOKEN(TYPE, PF, _gc_init)
310 #define type_pf_variant TOKEN(TYPE, PF, _variant)
311 #define type_pf_tvariant TOKEN(TYPE, PF, _tvariant)
313 /* Flavour without timeout */
315 /* Get the ith element from the array block n */
316 #define ahash_data(n, i) \
317 ((struct type_pf_elem *)((n)->value) + (i))
319 /* Add an element to the hash table when resizing the set:
320 * we spare the maintenance of the internal counters. */
322 type_pf_elem_add(struct hbucket
*n
, const struct type_pf_elem
*value
,
323 u8 ahash_max
, u32 cadt_flags
)
325 struct type_pf_elem
*data
;
327 if (n
->pos
>= n
->size
) {
330 if (n
->size
>= ahash_max
)
331 /* Trigger rehashing */
334 tmp
= kzalloc((n
->size
+ AHASH_INIT_SIZE
)
335 * sizeof(struct type_pf_elem
),
340 memcpy(tmp
, n
->value
,
341 sizeof(struct type_pf_elem
) * n
->size
);
345 n
->size
+= AHASH_INIT_SIZE
;
347 data
= ahash_data(n
, n
->pos
++);
348 type_pf_data_copy(data
, value
);
349 #ifdef IP_SET_HASH_WITH_NETS
350 /* Resizing won't overwrite stored flags */
352 type_pf_data_flags(data
, cadt_flags
);
357 /* Resize a hash: create a new hash table with doubling the hashsize
358 * and inserting the elements to it. Repeat until we succeed or
359 * fail due to memory pressures. */
361 type_pf_resize(struct ip_set
*set
, bool retried
)
363 struct ip_set_hash
*h
= set
->data
;
364 struct htable
*t
, *orig
= h
->table
;
365 u8 htable_bits
= orig
->htable_bits
;
366 const struct type_pf_elem
*data
;
367 struct hbucket
*n
, *m
;
374 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
375 set
->name
, orig
->htable_bits
, htable_bits
, orig
);
377 /* In case we have plenty of memory :-) */
378 pr_warning("Cannot increase the hashsize of set %s further\n",
380 return -IPSET_ERR_HASH_FULL
;
382 t
= ip_set_alloc(sizeof(*t
)
383 + jhash_size(htable_bits
) * sizeof(struct hbucket
));
386 t
->htable_bits
= htable_bits
;
388 read_lock_bh(&set
->lock
);
389 for (i
= 0; i
< jhash_size(orig
->htable_bits
); i
++) {
390 n
= hbucket(orig
, i
);
391 for (j
= 0; j
< n
->pos
; j
++) {
392 data
= ahash_data(n
, j
);
393 m
= hbucket(t
, HKEY(data
, h
->initval
, htable_bits
));
394 ret
= type_pf_elem_add(m
, data
, AHASH_MAX(h
), 0);
396 read_unlock_bh(&set
->lock
);
405 rcu_assign_pointer(h
->table
, t
);
406 read_unlock_bh(&set
->lock
);
408 /* Give time to other readers of the set */
409 synchronize_rcu_bh();
411 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set
->name
,
412 orig
->htable_bits
, orig
, t
->htable_bits
, t
);
419 type_pf_data_next(struct ip_set_hash
*h
, const struct type_pf_elem
*d
);
421 /* Add an element to a hash and update the internal counters when succeeded,
422 * otherwise report the proper error code. */
424 type_pf_add(struct ip_set
*set
, void *value
, u32 timeout
, u32 flags
)
426 struct ip_set_hash
*h
= set
->data
;
428 const struct type_pf_elem
*d
= value
;
432 u32 cadt_flags
= flags
>> 16;
434 if (h
->elements
>= h
->maxelem
) {
436 pr_warning("Set %s is full, maxelem %u reached\n",
437 set
->name
, h
->maxelem
);
438 return -IPSET_ERR_HASH_FULL
;
442 t
= rcu_dereference_bh(h
->table
);
443 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
445 for (i
= 0; i
< n
->pos
; i
++)
446 if (type_pf_data_equal(ahash_data(n
, i
), d
, &multi
)) {
447 #ifdef IP_SET_HASH_WITH_NETS
448 if (flags
& IPSET_FLAG_EXIST
)
449 /* Support overwriting just the flags */
450 type_pf_data_flags(ahash_data(n
, i
),
453 ret
= -IPSET_ERR_EXIST
;
456 TUNE_AHASH_MAX(h
, multi
);
457 ret
= type_pf_elem_add(n
, value
, AHASH_MAX(h
), cadt_flags
);
460 type_pf_data_next(h
, d
);
464 #ifdef IP_SET_HASH_WITH_NETS
465 add_cidr(h
, CIDR(d
->cidr
), HOST_MASK
);
469 rcu_read_unlock_bh();
473 /* Delete an element from the hash: swap it with the last element
474 * and free up space if possible.
477 type_pf_del(struct ip_set
*set
, void *value
, u32 timeout
, u32 flags
)
479 struct ip_set_hash
*h
= set
->data
;
480 struct htable
*t
= h
->table
;
481 const struct type_pf_elem
*d
= value
;
484 struct type_pf_elem
*data
;
487 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
489 for (i
= 0; i
< n
->pos
; i
++) {
490 data
= ahash_data(n
, i
);
491 if (!type_pf_data_equal(data
, d
, &multi
))
495 type_pf_data_copy(data
, ahash_data(n
, n
->pos
- 1));
499 #ifdef IP_SET_HASH_WITH_NETS
500 del_cidr(h
, CIDR(d
->cidr
), HOST_MASK
);
502 if (n
->pos
+ AHASH_INIT_SIZE
< n
->size
) {
503 void *tmp
= kzalloc((n
->size
- AHASH_INIT_SIZE
)
504 * sizeof(struct type_pf_elem
),
508 n
->size
-= AHASH_INIT_SIZE
;
509 memcpy(tmp
, n
->value
,
510 n
->size
* sizeof(struct type_pf_elem
));
517 return -IPSET_ERR_EXIST
;
520 #ifdef IP_SET_HASH_WITH_NETS
522 /* Special test function which takes into account the different network
523 * sizes added to the set */
525 type_pf_test_cidrs(struct ip_set
*set
, struct type_pf_elem
*d
, u32 timeout
)
527 struct ip_set_hash
*h
= set
->data
;
528 struct htable
*t
= h
->table
;
530 const struct type_pf_elem
*data
;
533 u8 host_mask
= SET_HOST_MASK(set
->family
);
535 pr_debug("test by nets\n");
536 for (; j
< host_mask
&& h
->nets
[j
].cidr
&& !multi
; j
++) {
537 type_pf_data_netmask(d
, h
->nets
[j
].cidr
);
538 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
540 for (i
= 0; i
< n
->pos
; i
++) {
541 data
= ahash_data(n
, i
);
542 if (type_pf_data_equal(data
, d
, &multi
))
543 return type_pf_data_match(data
);
550 /* Test whether the element is added to the set */
552 type_pf_test(struct ip_set
*set
, void *value
, u32 timeout
, u32 flags
)
554 struct ip_set_hash
*h
= set
->data
;
555 struct htable
*t
= h
->table
;
556 struct type_pf_elem
*d
= value
;
558 const struct type_pf_elem
*data
;
562 #ifdef IP_SET_HASH_WITH_NETS
563 /* If we test an IP address and not a network address,
564 * try all possible network sizes */
565 if (CIDR(d
->cidr
) == SET_HOST_MASK(set
->family
))
566 return type_pf_test_cidrs(set
, d
, timeout
);
569 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
571 for (i
= 0; i
< n
->pos
; i
++) {
572 data
= ahash_data(n
, i
);
573 if (type_pf_data_equal(data
, d
, &multi
))
574 return type_pf_data_match(data
);
579 /* Reply a HEADER request: fill out the header part of the set */
581 type_pf_head(struct ip_set
*set
, struct sk_buff
*skb
)
583 const struct ip_set_hash
*h
= set
->data
;
584 struct nlattr
*nested
;
587 read_lock_bh(&set
->lock
);
588 memsize
= ahash_memsize(h
, with_timeout(h
->timeout
)
589 ? sizeof(struct type_pf_telem
)
590 : sizeof(struct type_pf_elem
),
591 set
->family
== AF_INET
? 32 : 128);
592 read_unlock_bh(&set
->lock
);
594 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
596 goto nla_put_failure
;
597 if (nla_put_net32(skb
, IPSET_ATTR_HASHSIZE
,
598 htonl(jhash_size(h
->table
->htable_bits
))) ||
599 nla_put_net32(skb
, IPSET_ATTR_MAXELEM
, htonl(h
->maxelem
)))
600 goto nla_put_failure
;
601 #ifdef IP_SET_HASH_WITH_NETMASK
602 if (h
->netmask
!= HOST_MASK
&&
603 nla_put_u8(skb
, IPSET_ATTR_NETMASK
, h
->netmask
))
604 goto nla_put_failure
;
606 if (nla_put_net32(skb
, IPSET_ATTR_REFERENCES
, htonl(set
->ref
- 1)) ||
607 nla_put_net32(skb
, IPSET_ATTR_MEMSIZE
, htonl(memsize
)) ||
608 (with_timeout(h
->timeout
) &&
609 nla_put_net32(skb
, IPSET_ATTR_TIMEOUT
, htonl(h
->timeout
))))
610 goto nla_put_failure
;
611 ipset_nest_end(skb
, nested
);
618 /* Reply a LIST/SAVE request: dump the elements of the specified set */
620 type_pf_list(const struct ip_set
*set
,
621 struct sk_buff
*skb
, struct netlink_callback
*cb
)
623 const struct ip_set_hash
*h
= set
->data
;
624 const struct htable
*t
= h
->table
;
625 struct nlattr
*atd
, *nested
;
626 const struct hbucket
*n
;
627 const struct type_pf_elem
*data
;
628 u32 first
= cb
->args
[2];
629 /* We assume that one hash bucket fills into one page */
633 atd
= ipset_nest_start(skb
, IPSET_ATTR_ADT
);
636 pr_debug("list hash set %s\n", set
->name
);
637 for (; cb
->args
[2] < jhash_size(t
->htable_bits
); cb
->args
[2]++) {
638 incomplete
= skb_tail_pointer(skb
);
639 n
= hbucket(t
, cb
->args
[2]);
640 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb
->args
[2], t
, n
);
641 for (i
= 0; i
< n
->pos
; i
++) {
642 data
= ahash_data(n
, i
);
643 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
644 cb
->args
[2], n
, i
, data
);
645 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
647 if (cb
->args
[2] == first
) {
648 nla_nest_cancel(skb
, atd
);
651 goto nla_put_failure
;
653 if (type_pf_data_list(skb
, data
))
654 goto nla_put_failure
;
655 ipset_nest_end(skb
, nested
);
658 ipset_nest_end(skb
, atd
);
659 /* Set listing finished */
665 nlmsg_trim(skb
, incomplete
);
666 ipset_nest_end(skb
, atd
);
667 if (unlikely(first
== cb
->args
[2])) {
668 pr_warning("Can't list set %s: one bucket does not fit into "
669 "a message. Please report it!\n", set
->name
);
677 type_pf_kadt(struct ip_set
*set
, const struct sk_buff
* skb
,
678 const struct xt_action_param
*par
,
679 enum ipset_adt adt
, const struct ip_set_adt_opt
*opt
);
681 type_pf_uadt(struct ip_set
*set
, struct nlattr
*tb
[],
682 enum ipset_adt adt
, u32
*lineno
, u32 flags
, bool retried
);
684 static const struct ip_set_type_variant type_pf_variant
= {
685 .kadt
= type_pf_kadt
,
686 .uadt
= type_pf_uadt
,
688 [IPSET_ADD
] = type_pf_add
,
689 [IPSET_DEL
] = type_pf_del
,
690 [IPSET_TEST
] = type_pf_test
,
692 .destroy
= type_pf_destroy
,
693 .flush
= type_pf_flush
,
694 .head
= type_pf_head
,
695 .list
= type_pf_list
,
696 .resize
= type_pf_resize
,
697 .same_set
= type_pf_same_set
,
700 /* Flavour with timeout support */
702 #define ahash_tdata(n, i) \
703 (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
706 type_pf_data_timeout(const struct type_pf_elem
*data
)
708 const struct type_pf_telem
*tdata
=
709 (const struct type_pf_telem
*) data
;
711 return tdata
->timeout
;
715 type_pf_data_expired(const struct type_pf_elem
*data
)
717 const struct type_pf_telem
*tdata
=
718 (const struct type_pf_telem
*) data
;
720 return ip_set_timeout_expired(tdata
->timeout
);
724 type_pf_data_timeout_set(struct type_pf_elem
*data
, u32 timeout
)
726 struct type_pf_telem
*tdata
= (struct type_pf_telem
*) data
;
728 tdata
->timeout
= ip_set_timeout_set(timeout
);
732 type_pf_elem_tadd(struct hbucket
*n
, const struct type_pf_elem
*value
,
733 u8 ahash_max
, u32 cadt_flags
, u32 timeout
)
735 struct type_pf_elem
*data
;
737 if (n
->pos
>= n
->size
) {
740 if (n
->size
>= ahash_max
)
741 /* Trigger rehashing */
744 tmp
= kzalloc((n
->size
+ AHASH_INIT_SIZE
)
745 * sizeof(struct type_pf_telem
),
750 memcpy(tmp
, n
->value
,
751 sizeof(struct type_pf_telem
) * n
->size
);
755 n
->size
+= AHASH_INIT_SIZE
;
757 data
= ahash_tdata(n
, n
->pos
++);
758 type_pf_data_copy(data
, value
);
759 type_pf_data_timeout_set(data
, timeout
);
760 #ifdef IP_SET_HASH_WITH_NETS
761 /* Resizing won't overwrite stored flags */
763 type_pf_data_flags(data
, cadt_flags
);
768 /* Delete expired elements from the hashtable */
770 type_pf_expire(struct ip_set_hash
*h
)
772 struct htable
*t
= h
->table
;
774 struct type_pf_elem
*data
;
778 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
780 for (j
= 0; j
< n
->pos
; j
++) {
781 data
= ahash_tdata(n
, j
);
782 if (type_pf_data_expired(data
)) {
783 pr_debug("expired %u/%u\n", i
, j
);
784 #ifdef IP_SET_HASH_WITH_NETS
785 del_cidr(h
, CIDR(data
->cidr
), HOST_MASK
);
789 type_pf_data_copy(data
,
790 ahash_tdata(n
, n
->pos
- 1));
795 if (n
->pos
+ AHASH_INIT_SIZE
< n
->size
) {
796 void *tmp
= kzalloc((n
->size
- AHASH_INIT_SIZE
)
797 * sizeof(struct type_pf_telem
),
800 /* Still try to delete expired elements */
802 n
->size
-= AHASH_INIT_SIZE
;
803 memcpy(tmp
, n
->value
,
804 n
->size
* sizeof(struct type_pf_telem
));
812 type_pf_tresize(struct ip_set
*set
, bool retried
)
814 struct ip_set_hash
*h
= set
->data
;
815 struct htable
*t
, *orig
= h
->table
;
816 u8 htable_bits
= orig
->htable_bits
;
817 const struct type_pf_elem
*data
;
818 struct hbucket
*n
, *m
;
822 /* Try to cleanup once */
825 write_lock_bh(&set
->lock
);
826 type_pf_expire(set
->data
);
827 write_unlock_bh(&set
->lock
);
836 /* In case we have plenty of memory :-) */
837 pr_warning("Cannot increase the hashsize of set %s further\n",
839 return -IPSET_ERR_HASH_FULL
;
841 t
= ip_set_alloc(sizeof(*t
)
842 + jhash_size(htable_bits
) * sizeof(struct hbucket
));
845 t
->htable_bits
= htable_bits
;
847 read_lock_bh(&set
->lock
);
848 for (i
= 0; i
< jhash_size(orig
->htable_bits
); i
++) {
849 n
= hbucket(orig
, i
);
850 for (j
= 0; j
< n
->pos
; j
++) {
851 data
= ahash_tdata(n
, j
);
852 m
= hbucket(t
, HKEY(data
, h
->initval
, htable_bits
));
853 ret
= type_pf_elem_tadd(m
, data
, AHASH_MAX(h
), 0,
854 type_pf_data_timeout(data
));
856 read_unlock_bh(&set
->lock
);
865 rcu_assign_pointer(h
->table
, t
);
866 read_unlock_bh(&set
->lock
);
868 /* Give time to other readers of the set */
869 synchronize_rcu_bh();
877 type_pf_tadd(struct ip_set
*set
, void *value
, u32 timeout
, u32 flags
)
879 struct ip_set_hash
*h
= set
->data
;
880 struct htable
*t
= h
->table
;
881 const struct type_pf_elem
*d
= value
;
883 struct type_pf_elem
*data
;
884 int ret
= 0, i
, j
= AHASH_MAX(h
) + 1;
885 bool flag_exist
= flags
& IPSET_FLAG_EXIST
;
887 u32 cadt_flags
= flags
>> 16;
889 if (h
->elements
>= h
->maxelem
)
890 /* FIXME: when set is full, we slow down here */
892 if (h
->elements
>= h
->maxelem
) {
894 pr_warning("Set %s is full, maxelem %u reached\n",
895 set
->name
, h
->maxelem
);
896 return -IPSET_ERR_HASH_FULL
;
900 t
= rcu_dereference_bh(h
->table
);
901 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
903 for (i
= 0; i
< n
->pos
; i
++) {
904 data
= ahash_tdata(n
, i
);
905 if (type_pf_data_equal(data
, d
, &multi
)) {
906 if (type_pf_data_expired(data
) || flag_exist
)
907 /* Just timeout value may be updated */
910 ret
= -IPSET_ERR_EXIST
;
913 } else if (j
== AHASH_MAX(h
) + 1 &&
914 type_pf_data_expired(data
))
917 if (j
!= AHASH_MAX(h
) + 1) {
918 data
= ahash_tdata(n
, j
);
919 #ifdef IP_SET_HASH_WITH_NETS
920 del_cidr(h
, CIDR(data
->cidr
), HOST_MASK
);
921 add_cidr(h
, CIDR(d
->cidr
), HOST_MASK
);
923 type_pf_data_copy(data
, d
);
924 type_pf_data_timeout_set(data
, timeout
);
925 #ifdef IP_SET_HASH_WITH_NETS
926 type_pf_data_flags(data
, cadt_flags
);
930 TUNE_AHASH_MAX(h
, multi
);
931 ret
= type_pf_elem_tadd(n
, d
, AHASH_MAX(h
), cadt_flags
, timeout
);
934 type_pf_data_next(h
, d
);
938 #ifdef IP_SET_HASH_WITH_NETS
939 add_cidr(h
, CIDR(d
->cidr
), HOST_MASK
);
943 rcu_read_unlock_bh();
948 type_pf_tdel(struct ip_set
*set
, void *value
, u32 timeout
, u32 flags
)
950 struct ip_set_hash
*h
= set
->data
;
951 struct htable
*t
= h
->table
;
952 const struct type_pf_elem
*d
= value
;
955 struct type_pf_elem
*data
;
958 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
960 for (i
= 0; i
< n
->pos
; i
++) {
961 data
= ahash_tdata(n
, i
);
962 if (!type_pf_data_equal(data
, d
, &multi
))
964 if (type_pf_data_expired(data
))
965 return -IPSET_ERR_EXIST
;
968 type_pf_data_copy(data
, ahash_tdata(n
, n
->pos
- 1));
972 #ifdef IP_SET_HASH_WITH_NETS
973 del_cidr(h
, CIDR(d
->cidr
), HOST_MASK
);
975 if (n
->pos
+ AHASH_INIT_SIZE
< n
->size
) {
976 void *tmp
= kzalloc((n
->size
- AHASH_INIT_SIZE
)
977 * sizeof(struct type_pf_telem
),
981 n
->size
-= AHASH_INIT_SIZE
;
982 memcpy(tmp
, n
->value
,
983 n
->size
* sizeof(struct type_pf_telem
));
990 return -IPSET_ERR_EXIST
;
993 #ifdef IP_SET_HASH_WITH_NETS
995 type_pf_ttest_cidrs(struct ip_set
*set
, struct type_pf_elem
*d
, u32 timeout
)
997 struct ip_set_hash
*h
= set
->data
;
998 struct htable
*t
= h
->table
;
999 struct type_pf_elem
*data
;
1003 u8 host_mask
= SET_HOST_MASK(set
->family
);
1005 for (; j
< host_mask
&& h
->nets
[j
].cidr
&& !multi
; j
++) {
1006 type_pf_data_netmask(d
, h
->nets
[j
].cidr
);
1007 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
1008 n
= hbucket(t
, key
);
1009 for (i
= 0; i
< n
->pos
; i
++) {
1010 data
= ahash_tdata(n
, i
);
1011 #ifdef IP_SET_HASH_WITH_MULTI
1012 if (type_pf_data_equal(data
, d
, &multi
)) {
1013 if (!type_pf_data_expired(data
))
1014 return type_pf_data_match(data
);
1018 if (type_pf_data_equal(data
, d
, &multi
) &&
1019 !type_pf_data_expired(data
))
1020 return type_pf_data_match(data
);
1029 type_pf_ttest(struct ip_set
*set
, void *value
, u32 timeout
, u32 flags
)
1031 struct ip_set_hash
*h
= set
->data
;
1032 struct htable
*t
= h
->table
;
1033 struct type_pf_elem
*data
, *d
= value
;
1038 #ifdef IP_SET_HASH_WITH_NETS
1039 if (CIDR(d
->cidr
) == SET_HOST_MASK(set
->family
))
1040 return type_pf_ttest_cidrs(set
, d
, timeout
);
1042 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
1043 n
= hbucket(t
, key
);
1044 for (i
= 0; i
< n
->pos
; i
++) {
1045 data
= ahash_tdata(n
, i
);
1046 if (type_pf_data_equal(data
, d
, &multi
) &&
1047 !type_pf_data_expired(data
))
1048 return type_pf_data_match(data
);
1054 type_pf_tlist(const struct ip_set
*set
,
1055 struct sk_buff
*skb
, struct netlink_callback
*cb
)
1057 const struct ip_set_hash
*h
= set
->data
;
1058 const struct htable
*t
= h
->table
;
1059 struct nlattr
*atd
, *nested
;
1060 const struct hbucket
*n
;
1061 const struct type_pf_elem
*data
;
1062 u32 first
= cb
->args
[2];
1063 /* We assume that one hash bucket fills into one page */
1067 atd
= ipset_nest_start(skb
, IPSET_ATTR_ADT
);
1070 for (; cb
->args
[2] < jhash_size(t
->htable_bits
); cb
->args
[2]++) {
1071 incomplete
= skb_tail_pointer(skb
);
1072 n
= hbucket(t
, cb
->args
[2]);
1073 for (i
= 0; i
< n
->pos
; i
++) {
1074 data
= ahash_tdata(n
, i
);
1075 pr_debug("list %p %u\n", n
, i
);
1076 if (type_pf_data_expired(data
))
1078 pr_debug("do list %p %u\n", n
, i
);
1079 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
1081 if (cb
->args
[2] == first
) {
1082 nla_nest_cancel(skb
, atd
);
1085 goto nla_put_failure
;
1087 if (type_pf_data_tlist(skb
, data
))
1088 goto nla_put_failure
;
1089 ipset_nest_end(skb
, nested
);
1092 ipset_nest_end(skb
, atd
);
1093 /* Set listing finished */
1099 nlmsg_trim(skb
, incomplete
);
1100 ipset_nest_end(skb
, atd
);
1101 if (unlikely(first
== cb
->args
[2])) {
1102 pr_warning("Can't list set %s: one bucket does not fit into "
1103 "a message. Please report it!\n", set
->name
);
1110 static const struct ip_set_type_variant type_pf_tvariant
= {
1111 .kadt
= type_pf_kadt
,
1112 .uadt
= type_pf_uadt
,
1114 [IPSET_ADD
] = type_pf_tadd
,
1115 [IPSET_DEL
] = type_pf_tdel
,
1116 [IPSET_TEST
] = type_pf_ttest
,
1118 .destroy
= type_pf_destroy
,
1119 .flush
= type_pf_flush
,
1120 .head
= type_pf_head
,
1121 .list
= type_pf_tlist
,
1122 .resize
= type_pf_tresize
,
1123 .same_set
= type_pf_same_set
,
1127 type_pf_gc(unsigned long ul_set
)
1129 struct ip_set
*set
= (struct ip_set
*) ul_set
;
1130 struct ip_set_hash
*h
= set
->data
;
1132 pr_debug("called\n");
1133 write_lock_bh(&set
->lock
);
1135 write_unlock_bh(&set
->lock
);
1137 h
->gc
.expires
= jiffies
+ IPSET_GC_PERIOD(h
->timeout
) * HZ
;
1142 type_pf_gc_init(struct ip_set
*set
)
1144 struct ip_set_hash
*h
= set
->data
;
1147 h
->gc
.data
= (unsigned long) set
;
1148 h
->gc
.function
= type_pf_gc
;
1149 h
->gc
.expires
= jiffies
+ IPSET_GC_PERIOD(h
->timeout
) * HZ
;
1151 pr_debug("gc initialized, run in every %u\n",
1152 IPSET_GC_PERIOD(h
->timeout
));
1157 #undef type_pf_data_equal
1158 #undef type_pf_data_isnull
1159 #undef type_pf_data_copy
1160 #undef type_pf_data_zero_out
1161 #undef type_pf_data_netmask
1162 #undef type_pf_data_list
1163 #undef type_pf_data_tlist
1164 #undef type_pf_data_next
1165 #undef type_pf_data_flags
1166 #undef type_pf_data_match
1169 #undef type_pf_telem
1170 #undef type_pf_data_timeout
1171 #undef type_pf_data_expired
1172 #undef type_pf_data_timeout_set
1174 #undef type_pf_elem_add
1177 #undef type_pf_test_cidrs
1180 #undef type_pf_elem_tadd
1181 #undef type_pf_del_telem
1182 #undef type_pf_expire
1185 #undef type_pf_ttest_cidrs
1186 #undef type_pf_ttest
1188 #undef type_pf_resize
1189 #undef type_pf_tresize
1190 #undef type_pf_flush
1191 #undef type_pf_destroy
1194 #undef type_pf_tlist
1195 #undef type_pf_same_set
1199 #undef type_pf_gc_init
1200 #undef type_pf_variant
1201 #undef type_pf_tvariant