Commit | Line | Data |
---|---|---|
96518518 | 1 | /* |
ce6eb0d7 | 2 | * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> |
96518518 PM |
3 | * |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License version 2 as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/module.h> | |
14 | #include <linux/list.h> | |
15 | #include <linux/jhash.h> | |
16 | #include <linux/netlink.h> | |
3ab428a4 | 17 | #include <linux/vmalloc.h> |
96518518 PM |
18 | #include <linux/netfilter.h> |
19 | #include <linux/netfilter/nf_tables.h> | |
20 | #include <net/netfilter/nf_tables.h> | |
21 | ||
ce6eb0d7 PM |
22 | #define NFT_HASH_MIN_SIZE 4 |
23 | ||
96518518 | 24 | struct nft_hash { |
ce6eb0d7 PM |
25 | struct nft_hash_table __rcu *tbl; |
26 | }; | |
27 | ||
28 | struct nft_hash_table { | |
29 | unsigned int size; | |
30 | unsigned int elements; | |
31 | struct nft_hash_elem __rcu *buckets[]; | |
96518518 PM |
32 | }; |
33 | ||
34 | struct nft_hash_elem { | |
ce6eb0d7 PM |
35 | struct nft_hash_elem __rcu *next; |
36 | struct nft_data key; | |
37 | struct nft_data data[]; | |
96518518 PM |
38 | }; |
39 | ||
ce6eb0d7 PM |
40 | #define nft_hash_for_each_entry(i, head) \ |
41 | for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) | |
42 | #define nft_hash_for_each_entry_rcu(i, head) \ | |
43 | for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) | |
44 | ||
96518518 PM |
45 | static u32 nft_hash_rnd __read_mostly; |
46 | static bool nft_hash_rnd_initted __read_mostly; | |
47 | ||
48 | static unsigned int nft_hash_data(const struct nft_data *data, | |
49 | unsigned int hsize, unsigned int len) | |
50 | { | |
51 | unsigned int h; | |
52 | ||
20a69341 | 53 | h = jhash(data->data, len, nft_hash_rnd); |
ce6eb0d7 | 54 | return h & (hsize - 1); |
96518518 PM |
55 | } |
56 | ||
20a69341 PM |
57 | static bool nft_hash_lookup(const struct nft_set *set, |
58 | const struct nft_data *key, | |
59 | struct nft_data *data) | |
96518518 | 60 | { |
20a69341 | 61 | const struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 62 | const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); |
20a69341 | 63 | const struct nft_hash_elem *he; |
96518518 PM |
64 | unsigned int h; |
65 | ||
ce6eb0d7 PM |
66 | h = nft_hash_data(key, tbl->size, set->klen); |
67 | nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { | |
20a69341 | 68 | if (nft_data_cmp(&he->key, key, set->klen)) |
96518518 | 69 | continue; |
20a69341 PM |
70 | if (set->flags & NFT_SET_MAP) |
71 | nft_data_copy(data, he->data); | |
72 | return true; | |
96518518 | 73 | } |
20a69341 | 74 | return false; |
96518518 PM |
75 | } |
76 | ||
ce6eb0d7 | 77 | static void nft_hash_tbl_free(const struct nft_hash_table *tbl) |
96518518 | 78 | { |
ce6eb0d7 PM |
79 | if (is_vmalloc_addr(tbl)) |
80 | vfree(tbl); | |
81 | else | |
82 | kfree(tbl); | |
83 | } | |
84 | ||
85 | static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) | |
86 | { | |
87 | struct nft_hash_table *tbl; | |
88 | size_t size; | |
89 | ||
90 | size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); | |
91 | tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); | |
92 | if (tbl == NULL) | |
93 | tbl = vzalloc(size); | |
94 | if (tbl == NULL) | |
95 | return NULL; | |
96 | tbl->size = nbuckets; | |
97 | ||
98 | return tbl; | |
99 | } | |
100 | ||
101 | static void nft_hash_chain_unzip(const struct nft_set *set, | |
102 | const struct nft_hash_table *ntbl, | |
103 | struct nft_hash_table *tbl, unsigned int n) | |
104 | { | |
105 | struct nft_hash_elem *he, *last, *next; | |
106 | unsigned int h; | |
107 | ||
108 | he = nft_dereference(tbl->buckets[n]); | |
109 | if (he == NULL) | |
110 | return; | |
111 | h = nft_hash_data(&he->key, ntbl->size, set->klen); | |
112 | ||
113 | /* Find last element of first chain hashing to bucket h */ | |
114 | last = he; | |
115 | nft_hash_for_each_entry(he, he->next) { | |
116 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | |
117 | break; | |
118 | last = he; | |
119 | } | |
120 | ||
121 | /* Unlink first chain from the old table */ | |
122 | RCU_INIT_POINTER(tbl->buckets[n], last->next); | |
123 | ||
124 | /* If end of chain reached, done */ | |
125 | if (he == NULL) | |
126 | return; | |
127 | ||
128 | /* Find first element of second chain hashing to bucket h */ | |
129 | next = NULL; | |
130 | nft_hash_for_each_entry(he, he->next) { | |
131 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | |
132 | continue; | |
133 | next = he; | |
134 | break; | |
135 | } | |
136 | ||
137 | /* Link the two chains */ | |
138 | RCU_INIT_POINTER(last->next, next); | |
139 | } | |
140 | ||
141 | static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) | |
142 | { | |
143 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | |
144 | struct nft_hash_elem *he; | |
145 | unsigned int i, h; | |
146 | bool complete; | |
147 | ||
148 | ntbl = nft_hash_tbl_alloc(tbl->size * 2); | |
149 | if (ntbl == NULL) | |
150 | return -ENOMEM; | |
151 | ||
152 | /* Link new table's buckets to first element in the old table | |
153 | * hashing to the new bucket. | |
154 | */ | |
155 | for (i = 0; i < ntbl->size; i++) { | |
156 | h = i < tbl->size ? i : i - tbl->size; | |
157 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | |
158 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) | |
159 | continue; | |
160 | RCU_INIT_POINTER(ntbl->buckets[i], he); | |
161 | break; | |
162 | } | |
163 | } | |
164 | ntbl->elements = tbl->elements; | |
165 | ||
166 | /* Publish new table */ | |
167 | rcu_assign_pointer(priv->tbl, ntbl); | |
168 | ||
169 | /* Unzip interleaved hash chains */ | |
170 | do { | |
171 | /* Wait for readers to use new table/unzipped chains */ | |
172 | synchronize_rcu(); | |
173 | ||
174 | complete = true; | |
175 | for (i = 0; i < tbl->size; i++) { | |
176 | nft_hash_chain_unzip(set, ntbl, tbl, i); | |
177 | if (tbl->buckets[i] != NULL) | |
178 | complete = false; | |
179 | } | |
180 | } while (!complete); | |
181 | ||
182 | nft_hash_tbl_free(tbl); | |
183 | return 0; | |
184 | } | |
185 | ||
186 | static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) | |
187 | { | |
188 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | |
189 | struct nft_hash_elem __rcu **pprev; | |
190 | unsigned int i; | |
191 | ||
192 | ntbl = nft_hash_tbl_alloc(tbl->size / 2); | |
193 | if (ntbl == NULL) | |
194 | return -ENOMEM; | |
195 | ||
196 | for (i = 0; i < ntbl->size; i++) { | |
197 | ntbl->buckets[i] = tbl->buckets[i]; | |
198 | ||
199 | for (pprev = &ntbl->buckets[i]; *pprev != NULL; | |
200 | pprev = &nft_dereference(*pprev)->next) | |
201 | ; | |
202 | RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); | |
203 | } | |
204 | ntbl->elements = tbl->elements; | |
205 | ||
206 | /* Publish new table */ | |
207 | rcu_assign_pointer(priv->tbl, ntbl); | |
208 | synchronize_rcu(); | |
209 | ||
210 | nft_hash_tbl_free(tbl); | |
211 | return 0; | |
96518518 PM |
212 | } |
213 | ||
20a69341 PM |
214 | static int nft_hash_insert(const struct nft_set *set, |
215 | const struct nft_set_elem *elem) | |
96518518 | 216 | { |
20a69341 | 217 | struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 218 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
20a69341 PM |
219 | struct nft_hash_elem *he; |
220 | unsigned int size, h; | |
96518518 | 221 | |
20a69341 | 222 | if (elem->flags != 0) |
96518518 | 223 | return -EINVAL; |
96518518 | 224 | |
20a69341 PM |
225 | size = sizeof(*he); |
226 | if (set->flags & NFT_SET_MAP) | |
227 | size += sizeof(he->data[0]); | |
228 | ||
229 | he = kzalloc(size, GFP_KERNEL); | |
230 | if (he == NULL) | |
96518518 PM |
231 | return -ENOMEM; |
232 | ||
20a69341 PM |
233 | nft_data_copy(&he->key, &elem->key); |
234 | if (set->flags & NFT_SET_MAP) | |
235 | nft_data_copy(he->data, &elem->data); | |
96518518 | 236 | |
ce6eb0d7 PM |
237 | h = nft_hash_data(&he->key, tbl->size, set->klen); |
238 | RCU_INIT_POINTER(he->next, tbl->buckets[h]); | |
239 | rcu_assign_pointer(tbl->buckets[h], he); | |
240 | tbl->elements++; | |
241 | ||
242 | /* Expand table when exceeding 75% load */ | |
243 | if (tbl->elements > tbl->size / 4 * 3) | |
244 | nft_hash_tbl_expand(set, priv); | |
245 | ||
96518518 | 246 | return 0; |
96518518 PM |
247 | } |
248 | ||
ce6eb0d7 PM |
249 | static void nft_hash_elem_destroy(const struct nft_set *set, |
250 | struct nft_hash_elem *he) | |
251 | { | |
252 | nft_data_uninit(&he->key, NFT_DATA_VALUE); | |
253 | if (set->flags & NFT_SET_MAP) | |
254 | nft_data_uninit(he->data, set->dtype); | |
255 | kfree(he); | |
256 | } | |
257 | ||
20a69341 PM |
258 | static void nft_hash_remove(const struct nft_set *set, |
259 | const struct nft_set_elem *elem) | |
96518518 | 260 | { |
ce6eb0d7 PM |
261 | struct nft_hash *priv = nft_set_priv(set); |
262 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); | |
263 | struct nft_hash_elem *he, __rcu **pprev; | |
96518518 | 264 | |
ce6eb0d7 PM |
265 | pprev = elem->cookie; |
266 | he = nft_dereference((*pprev)); | |
267 | ||
268 | RCU_INIT_POINTER(*pprev, he->next); | |
269 | synchronize_rcu(); | |
20a69341 | 270 | kfree(he); |
ce6eb0d7 PM |
271 | tbl->elements--; |
272 | ||
273 | /* Shrink table beneath 30% load */ | |
274 | if (tbl->elements < tbl->size * 3 / 10 && | |
275 | tbl->size > NFT_HASH_MIN_SIZE) | |
276 | nft_hash_tbl_shrink(set, priv); | |
20a69341 | 277 | } |
96518518 | 278 | |
20a69341 PM |
279 | static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) |
280 | { | |
281 | const struct nft_hash *priv = nft_set_priv(set); | |
ce6eb0d7 PM |
282 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
283 | struct nft_hash_elem __rcu * const *pprev; | |
20a69341 PM |
284 | struct nft_hash_elem *he; |
285 | unsigned int h; | |
96518518 | 286 | |
ce6eb0d7 PM |
287 | h = nft_hash_data(&elem->key, tbl->size, set->klen); |
288 | pprev = &tbl->buckets[h]; | |
289 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | |
290 | if (nft_data_cmp(&he->key, &elem->key, set->klen)) { | |
291 | pprev = &he->next; | |
20a69341 | 292 | continue; |
ce6eb0d7 | 293 | } |
96518518 | 294 | |
ce6eb0d7 PM |
295 | elem->cookie = (void *)pprev; |
296 | elem->flags = 0; | |
20a69341 PM |
297 | if (set->flags & NFT_SET_MAP) |
298 | nft_data_copy(&elem->data, he->data); | |
299 | return 0; | |
300 | } | |
301 | return -ENOENT; | |
96518518 PM |
302 | } |
303 | ||
20a69341 PM |
304 | static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, |
305 | struct nft_set_iter *iter) | |
96518518 | 306 | { |
20a69341 | 307 | const struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 308 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
20a69341 PM |
309 | const struct nft_hash_elem *he; |
310 | struct nft_set_elem elem; | |
96518518 PM |
311 | unsigned int i; |
312 | ||
ce6eb0d7 PM |
313 | for (i = 0; i < tbl->size; i++) { |
314 | nft_hash_for_each_entry(he, tbl->buckets[i]) { | |
20a69341 PM |
315 | if (iter->count < iter->skip) |
316 | goto cont; | |
317 | ||
318 | memcpy(&elem.key, &he->key, sizeof(elem.key)); | |
319 | if (set->flags & NFT_SET_MAP) | |
320 | memcpy(&elem.data, he->data, sizeof(elem.data)); | |
321 | elem.flags = 0; | |
322 | ||
323 | iter->err = iter->fn(ctx, set, iter, &elem); | |
324 | if (iter->err < 0) | |
325 | return; | |
326 | cont: | |
327 | iter->count++; | |
96518518 PM |
328 | } |
329 | } | |
96518518 PM |
330 | } |
331 | ||
20a69341 PM |
332 | static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) |
333 | { | |
334 | return sizeof(struct nft_hash); | |
335 | } | |
96518518 | 336 | |
20a69341 | 337 | static int nft_hash_init(const struct nft_set *set, |
96518518 PM |
338 | const struct nlattr * const tb[]) |
339 | { | |
20a69341 | 340 | struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 | 341 | struct nft_hash_table *tbl; |
96518518 PM |
342 | |
343 | if (unlikely(!nft_hash_rnd_initted)) { | |
344 | get_random_bytes(&nft_hash_rnd, 4); | |
345 | nft_hash_rnd_initted = true; | |
346 | } | |
347 | ||
ce6eb0d7 PM |
348 | tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); |
349 | if (tbl == NULL) | |
96518518 | 350 | return -ENOMEM; |
ce6eb0d7 | 351 | RCU_INIT_POINTER(priv->tbl, tbl); |
96518518 | 352 | return 0; |
96518518 PM |
353 | } |
354 | ||
20a69341 | 355 | static void nft_hash_destroy(const struct nft_set *set) |
96518518 | 356 | { |
20a69341 | 357 | const struct nft_hash *priv = nft_set_priv(set); |
ce6eb0d7 PM |
358 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); |
359 | struct nft_hash_elem *he, *next; | |
96518518 PM |
360 | unsigned int i; |
361 | ||
ce6eb0d7 PM |
362 | for (i = 0; i < tbl->size; i++) { |
363 | for (he = nft_dereference(tbl->buckets[i]); he != NULL; | |
364 | he = next) { | |
365 | next = nft_dereference(he->next); | |
366 | nft_hash_elem_destroy(set, he); | |
96518518 PM |
367 | } |
368 | } | |
ce6eb0d7 | 369 | kfree(tbl); |
96518518 PM |
370 | } |
371 | ||
20a69341 PM |
372 | static struct nft_set_ops nft_hash_ops __read_mostly = { |
373 | .privsize = nft_hash_privsize, | |
96518518 PM |
374 | .init = nft_hash_init, |
375 | .destroy = nft_hash_destroy, | |
20a69341 PM |
376 | .get = nft_hash_get, |
377 | .insert = nft_hash_insert, | |
378 | .remove = nft_hash_remove, | |
379 | .lookup = nft_hash_lookup, | |
380 | .walk = nft_hash_walk, | |
381 | .features = NFT_SET_MAP, | |
382 | .owner = THIS_MODULE, | |
96518518 PM |
383 | }; |
384 | ||
385 | static int __init nft_hash_module_init(void) | |
386 | { | |
20a69341 | 387 | return nft_register_set(&nft_hash_ops); |
96518518 PM |
388 | } |
389 | ||
390 | static void __exit nft_hash_module_exit(void) | |
391 | { | |
20a69341 | 392 | nft_unregister_set(&nft_hash_ops); |
96518518 PM |
393 | } |
394 | ||
395 | module_init(nft_hash_module_init); | |
396 | module_exit(nft_hash_module_exit); | |
397 | ||
398 | MODULE_LICENSE("GPL"); | |
399 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | |
20a69341 | 400 | MODULE_ALIAS_NFT_SET(); |