Commit | Line | Data |
---|---|---|
96518518 | 1 | /* |
ce6eb0d7 | 2 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> |
96518518 PM |
3 | * |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License version 2 as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/module.h> | |
14 | #include <linux/list.h> | |
c50b960c | 15 | #include <linux/log2.h> |
96518518 PM |
16 | #include <linux/jhash.h> |
17 | #include <linux/netlink.h> | |
3ab428a4 | 18 | #include <linux/vmalloc.h> |
96518518 PM |
19 | #include <linux/netfilter.h> |
20 | #include <linux/netfilter/nf_tables.h> | |
21 | #include <net/netfilter/nf_tables.h> | |
22 | ||
c50b960c | 23 | #define NFT_HASH_MIN_SIZE 4UL |
ce6eb0d7 | 24 | |
96518518 | 25 | struct nft_hash { |
ce6eb0d7 PM |
26 | struct nft_hash_table __rcu *tbl; |
27 | }; | |
28 | ||
29 | struct nft_hash_table { | |
30 | unsigned int size; | |
ce6eb0d7 | 31 | struct nft_hash_elem __rcu *buckets[]; |
96518518 PM |
32 | }; |
33 | ||
34 | struct nft_hash_elem { | |
ce6eb0d7 PM |
35 | struct nft_hash_elem __rcu *next; |
36 | struct nft_data key; | |
37 | struct nft_data data[]; | |
96518518 PM |
38 | }; |
39 | ||
ce6eb0d7 PM |
40 | #define nft_hash_for_each_entry(i, head) \ |
41 | for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) | |
42 | #define nft_hash_for_each_entry_rcu(i, head) \ | |
43 | for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) | |
44 | ||
96518518 PM |
45 | static u32 nft_hash_rnd __read_mostly; |
46 | static bool nft_hash_rnd_initted __read_mostly; | |
47 | ||
48 | static unsigned int nft_hash_data(const struct nft_data *data, | |
49 | unsigned int hsize, unsigned int len) | |
50 | { | |
51 | unsigned int h; | |
52 | ||
20a69341 | 53 | h = jhash(data->data, len, nft_hash_rnd); |
ce6eb0d7 | 54 | return h & (hsize - 1); |
96518518 PM |
55 | } |
56 | ||
20a69341 PM |
57 | static bool nft_hash_lookup(const struct nft_set *set, |
58 | const struct nft_data *key, | |
59 | struct nft_data *data) | |
96518518 | 60 | { |
20a69341 | 61 | const struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 62 | const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); |
20a69341 | 63 | const struct nft_hash_elem *he; |
96518518 PM |
64 | unsigned int h; |
65 | ||
ce6eb0d7 PM |
66 | h = nft_hash_data(key, tbl->size, set->klen); |
67 | nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { | |
20a69341 | 68 | if (nft_data_cmp(&he->key, key, set->klen)) |
96518518 | 69 | continue; |
20a69341 PM |
70 | if (set->flags & NFT_SET_MAP) |
71 | nft_data_copy(data, he->data); | |
72 | return true; | |
96518518 | 73 | } |
20a69341 | 74 | return false; |
96518518 PM |
75 | } |
76 | ||
ce6eb0d7 | 77 | static void nft_hash_tbl_free(const struct nft_hash_table *tbl) |
96518518 | 78 | { |
4cb28970 | 79 | kvfree(tbl); |
ce6eb0d7 PM |
80 | } |
81 | ||
c50b960c PM |
82 | static unsigned int nft_hash_tbl_size(unsigned int nelem) |
83 | { | |
84 | return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE); | |
85 | } | |
86 | ||
ce6eb0d7 PM |
87 | static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) |
88 | { | |
89 | struct nft_hash_table *tbl; | |
90 | size_t size; | |
91 | ||
92 | size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); | |
93 | tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); | |
94 | if (tbl == NULL) | |
95 | tbl = vzalloc(size); | |
96 | if (tbl == NULL) | |
97 | return NULL; | |
98 | tbl->size = nbuckets; | |
99 | ||
100 | return tbl; | |
101 | } | |
102 | ||
103 | static void nft_hash_chain_unzip(const struct nft_set *set, | |
104 | const struct nft_hash_table *ntbl, | |
105 | struct nft_hash_table *tbl, unsigned int n) | |
106 | { | |
107 | struct nft_hash_elem *he, *last, *next; | |
108 | unsigned int h; | |
109 | ||
110 | he = nft_dereference(tbl->buckets[n]); | |
111 | if (he == NULL) | |
112 | return; | |
113 | h = nft_hash_data(&he->key, ntbl->size, set->klen); | |
114 | ||
115 | /* Find last element of first chain hashing to bucket h */ | |
116 | last = he; | |
117 | nft_hash_for_each_entry(he, he->next) { | |
118 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | |
119 | break; | |
120 | last = he; | |
121 | } | |
122 | ||
123 | /* Unlink first chain from the old table */ | |
124 | RCU_INIT_POINTER(tbl->buckets[n], last->next); | |
125 | ||
126 | /* If end of chain reached, done */ | |
127 | if (he == NULL) | |
128 | return; | |
129 | ||
130 | /* Find first element of second chain hashing to bucket h */ | |
131 | next = NULL; | |
132 | nft_hash_for_each_entry(he, he->next) { | |
133 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | |
134 | continue; | |
135 | next = he; | |
136 | break; | |
137 | } | |
138 | ||
139 | /* Link the two chains */ | |
140 | RCU_INIT_POINTER(last->next, next); | |
141 | } | |
142 | ||
143 | static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) | |
144 | { | |
145 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | |
146 | struct nft_hash_elem *he; | |
147 | unsigned int i, h; | |
148 | bool complete; | |
149 | ||
150 | ntbl = nft_hash_tbl_alloc(tbl->size * 2); | |
151 | if (ntbl == NULL) | |
152 | return -ENOMEM; | |
153 | ||
154 | /* Link new table's buckets to first element in the old table | |
155 | * hashing to the new bucket. | |
156 | */ | |
157 | for (i = 0; i < ntbl->size; i++) { | |
158 | h = i < tbl->size ? i : i - tbl->size; | |
159 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | |
160 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) | |
161 | continue; | |
162 | RCU_INIT_POINTER(ntbl->buckets[i], he); | |
163 | break; | |
164 | } | |
165 | } | |
ce6eb0d7 PM |
166 | |
167 | /* Publish new table */ | |
168 | rcu_assign_pointer(priv->tbl, ntbl); | |
169 | ||
170 | /* Unzip interleaved hash chains */ | |
171 | do { | |
172 | /* Wait for readers to use new table/unzipped chains */ | |
173 | synchronize_rcu(); | |
174 | ||
175 | complete = true; | |
176 | for (i = 0; i < tbl->size; i++) { | |
177 | nft_hash_chain_unzip(set, ntbl, tbl, i); | |
178 | if (tbl->buckets[i] != NULL) | |
179 | complete = false; | |
180 | } | |
181 | } while (!complete); | |
182 | ||
183 | nft_hash_tbl_free(tbl); | |
184 | return 0; | |
185 | } | |
186 | ||
187 | static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) | |
188 | { | |
189 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | |
190 | struct nft_hash_elem __rcu **pprev; | |
191 | unsigned int i; | |
192 | ||
193 | ntbl = nft_hash_tbl_alloc(tbl->size / 2); | |
194 | if (ntbl == NULL) | |
195 | return -ENOMEM; | |
196 | ||
197 | for (i = 0; i < ntbl->size; i++) { | |
198 | ntbl->buckets[i] = tbl->buckets[i]; | |
199 | ||
200 | for (pprev = &ntbl->buckets[i]; *pprev != NULL; | |
201 | pprev = &nft_dereference(*pprev)->next) | |
202 | ; | |
203 | RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); | |
204 | } | |
ce6eb0d7 PM |
205 | |
206 | /* Publish new table */ | |
207 | rcu_assign_pointer(priv->tbl, ntbl); | |
208 | synchronize_rcu(); | |
209 | ||
210 | nft_hash_tbl_free(tbl); | |
211 | return 0; | |
96518518 PM |
212 | } |
213 | ||
20a69341 PM |
214 | static int nft_hash_insert(const struct nft_set *set, |
215 | const struct nft_set_elem *elem) | |
96518518 | 216 | { |
20a69341 | 217 | struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 218 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
20a69341 PM |
219 | struct nft_hash_elem *he; |
220 | unsigned int size, h; | |
96518518 | 221 | |
20a69341 | 222 | if (elem->flags != 0) |
96518518 | 223 | return -EINVAL; |
96518518 | 224 | |
20a69341 PM |
225 | size = sizeof(*he); |
226 | if (set->flags & NFT_SET_MAP) | |
227 | size += sizeof(he->data[0]); | |
228 | ||
229 | he = kzalloc(size, GFP_KERNEL); | |
230 | if (he == NULL) | |
96518518 PM |
231 | return -ENOMEM; |
232 | ||
20a69341 PM |
233 | nft_data_copy(&he->key, &elem->key); |
234 | if (set->flags & NFT_SET_MAP) | |
235 | nft_data_copy(he->data, &elem->data); | |
96518518 | 236 | |
ce6eb0d7 PM |
237 | h = nft_hash_data(&he->key, tbl->size, set->klen); |
238 | RCU_INIT_POINTER(he->next, tbl->buckets[h]); | |
239 | rcu_assign_pointer(tbl->buckets[h], he); | |
ce6eb0d7 PM |
240 | |
241 | /* Expand table when exceeding 75% load */ | |
2c96c25d | 242 | if (set->nelems + 1 > tbl->size / 4 * 3) |
ce6eb0d7 PM |
243 | nft_hash_tbl_expand(set, priv); |
244 | ||
96518518 | 245 | return 0; |
96518518 PM |
246 | } |
247 | ||
ce6eb0d7 PM |
248 | static void nft_hash_elem_destroy(const struct nft_set *set, |
249 | struct nft_hash_elem *he) | |
250 | { | |
251 | nft_data_uninit(&he->key, NFT_DATA_VALUE); | |
252 | if (set->flags & NFT_SET_MAP) | |
253 | nft_data_uninit(he->data, set->dtype); | |
254 | kfree(he); | |
255 | } | |
256 | ||
20a69341 PM |
257 | static void nft_hash_remove(const struct nft_set *set, |
258 | const struct nft_set_elem *elem) | |
96518518 | 259 | { |
ce6eb0d7 PM |
260 | struct nft_hash *priv = nft_set_priv(set); |
261 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); | |
262 | struct nft_hash_elem *he, __rcu **pprev; | |
96518518 | 263 | |
ce6eb0d7 PM |
264 | pprev = elem->cookie; |
265 | he = nft_dereference((*pprev)); | |
266 | ||
267 | RCU_INIT_POINTER(*pprev, he->next); | |
268 | synchronize_rcu(); | |
20a69341 | 269 | kfree(he); |
ce6eb0d7 PM |
270 | |
271 | /* Shrink table beneath 30% load */ | |
2c96c25d | 272 | if (set->nelems - 1 < tbl->size * 3 / 10 && |
ce6eb0d7 PM |
273 | tbl->size > NFT_HASH_MIN_SIZE) |
274 | nft_hash_tbl_shrink(set, priv); | |
20a69341 | 275 | } |
96518518 | 276 | |
20a69341 PM |
277 | static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) |
278 | { | |
279 | const struct nft_hash *priv = nft_set_priv(set); | |
ce6eb0d7 PM |
280 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
281 | struct nft_hash_elem __rcu * const *pprev; | |
20a69341 PM |
282 | struct nft_hash_elem *he; |
283 | unsigned int h; | |
96518518 | 284 | |
ce6eb0d7 PM |
285 | h = nft_hash_data(&elem->key, tbl->size, set->klen); |
286 | pprev = &tbl->buckets[h]; | |
287 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | |
288 | if (nft_data_cmp(&he->key, &elem->key, set->klen)) { | |
289 | pprev = &he->next; | |
20a69341 | 290 | continue; |
ce6eb0d7 | 291 | } |
96518518 | 292 | |
ce6eb0d7 PM |
293 | elem->cookie = (void *)pprev; |
294 | elem->flags = 0; | |
20a69341 PM |
295 | if (set->flags & NFT_SET_MAP) |
296 | nft_data_copy(&elem->data, he->data); | |
297 | return 0; | |
298 | } | |
299 | return -ENOENT; | |
96518518 PM |
300 | } |
301 | ||
20a69341 PM |
302 | static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, |
303 | struct nft_set_iter *iter) | |
96518518 | 304 | { |
20a69341 | 305 | const struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 306 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
20a69341 PM |
307 | const struct nft_hash_elem *he; |
308 | struct nft_set_elem elem; | |
96518518 PM |
309 | unsigned int i; |
310 | ||
ce6eb0d7 PM |
311 | for (i = 0; i < tbl->size; i++) { |
312 | nft_hash_for_each_entry(he, tbl->buckets[i]) { | |
20a69341 PM |
313 | if (iter->count < iter->skip) |
314 | goto cont; | |
315 | ||
316 | memcpy(&elem.key, &he->key, sizeof(elem.key)); | |
317 | if (set->flags & NFT_SET_MAP) | |
318 | memcpy(&elem.data, he->data, sizeof(elem.data)); | |
319 | elem.flags = 0; | |
320 | ||
321 | iter->err = iter->fn(ctx, set, iter, &elem); | |
322 | if (iter->err < 0) | |
323 | return; | |
324 | cont: | |
325 | iter->count++; | |
96518518 PM |
326 | } |
327 | } | |
96518518 PM |
328 | } |
329 | ||
20a69341 PM |
330 | static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) |
331 | { | |
332 | return sizeof(struct nft_hash); | |
333 | } | |
96518518 | 334 | |
20a69341 | 335 | static int nft_hash_init(const struct nft_set *set, |
c50b960c | 336 | const struct nft_set_desc *desc, |
96518518 PM |
337 | const struct nlattr * const tb[]) |
338 | { | |
20a69341 | 339 | struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 340 | struct nft_hash_table *tbl; |
c50b960c | 341 | unsigned int size; |
96518518 PM |
342 | |
343 | if (unlikely(!nft_hash_rnd_initted)) { | |
344 | get_random_bytes(&nft_hash_rnd, 4); | |
345 | nft_hash_rnd_initted = true; | |
346 | } | |
347 | ||
c50b960c PM |
348 | size = NFT_HASH_MIN_SIZE; |
349 | if (desc->size) | |
350 | size = nft_hash_tbl_size(desc->size); | |
351 | ||
352 | tbl = nft_hash_tbl_alloc(size); | |
ce6eb0d7 | 353 | if (tbl == NULL) |
96518518 | 354 | return -ENOMEM; |
ce6eb0d7 | 355 | RCU_INIT_POINTER(priv->tbl, tbl); |
96518518 | 356 | return 0; |
96518518 PM |
357 | } |
358 | ||
20a69341 | 359 | static void nft_hash_destroy(const struct nft_set *set) |
96518518 | 360 | { |
20a69341 | 361 | const struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 PM |
362 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
363 | struct nft_hash_elem *he, *next; | |
96518518 PM |
364 | unsigned int i; |
365 | ||
ce6eb0d7 PM |
366 | for (i = 0; i < tbl->size; i++) { |
367 | for (he = nft_dereference(tbl->buckets[i]); he != NULL; | |
368 | he = next) { | |
369 | next = nft_dereference(he->next); | |
370 | nft_hash_elem_destroy(set, he); | |
96518518 PM |
371 | } |
372 | } | |
ce6eb0d7 | 373 | kfree(tbl); |
96518518 PM |
374 | } |
375 | ||
c50b960c PM |
376 | static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, |
377 | struct nft_set_estimate *est) | |
378 | { | |
379 | unsigned int esize; | |
380 | ||
381 | esize = sizeof(struct nft_hash_elem); | |
382 | if (features & NFT_SET_MAP) | |
383 | esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); | |
384 | ||
385 | if (desc->size) { | |
386 | est->size = sizeof(struct nft_hash) + | |
387 | nft_hash_tbl_size(desc->size) * | |
388 | sizeof(struct nft_hash_elem *) + | |
389 | desc->size * esize; | |
390 | } else { | |
391 | /* Resizing happens when the load drops below 30% or goes | |
392 | * above 75%. The average of 52.5% load (approximated by 50%) | |
393 | * is used for the size estimation of the hash buckets, | |
394 | * meaning we calculate two buckets per element. | |
395 | */ | |
396 | est->size = esize + 2 * sizeof(struct nft_hash_elem *); | |
397 | } | |
398 | ||
399 | est->class = NFT_SET_CLASS_O_1; | |
400 | ||
401 | return true; | |
402 | } | |
403 | ||
20a69341 PM |
404 | static struct nft_set_ops nft_hash_ops __read_mostly = { |
405 | .privsize = nft_hash_privsize, | |
c50b960c | 406 | .estimate = nft_hash_estimate, |
96518518 PM |
407 | .init = nft_hash_init, |
408 | .destroy = nft_hash_destroy, | |
20a69341 PM |
409 | .get = nft_hash_get, |
410 | .insert = nft_hash_insert, | |
411 | .remove = nft_hash_remove, | |
412 | .lookup = nft_hash_lookup, | |
413 | .walk = nft_hash_walk, | |
414 | .features = NFT_SET_MAP, | |
415 | .owner = THIS_MODULE, | |
96518518 PM |
416 | }; |
417 | ||
418 | static int __init nft_hash_module_init(void) | |
419 | { | |
20a69341 | 420 | return nft_register_set(&nft_hash_ops); |
96518518 PM |
421 | } |
422 | ||
423 | static void __exit nft_hash_module_exit(void) | |
424 | { | |
20a69341 | 425 | nft_unregister_set(&nft_hash_ops); |
96518518 PM |
426 | } |
427 | ||
428 | module_init(nft_hash_module_init); | |
429 | module_exit(nft_hash_module_exit); | |
430 | ||
431 | MODULE_LICENSE("GPL"); | |
432 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | |
20a69341 | 433 | MODULE_ALIAS_NFT_SET(); |