d234a012f0462c66bdcaf0126107a7be694d303d
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 #include <linux/bpf.h>
13 #include <linux/jhash.h>
14 #include <linux/filter.h>
15 #include <linux/vmalloc.h>
19 struct hlist_head
*buckets
;
21 u32 count
; /* number of elements in this hashtable */
22 u32 n_buckets
; /* number of hash buckets */
23 u32 elem_size
; /* size of each element in bytes */
26 /* each htab element is struct htab_elem + key + value */
28 struct hlist_node hash_node
;
31 char key
[0] __aligned(8);
34 /* Called from syscall */
35 static struct bpf_map
*htab_map_alloc(union bpf_attr
*attr
)
37 struct bpf_htab
*htab
;
40 htab
= kzalloc(sizeof(*htab
), GFP_USER
);
42 return ERR_PTR(-ENOMEM
);
44 /* mandatory map attributes */
45 htab
->map
.key_size
= attr
->key_size
;
46 htab
->map
.value_size
= attr
->value_size
;
47 htab
->map
.max_entries
= attr
->max_entries
;
49 /* check sanity of attributes.
50 * value_size == 0 may be allowed in the future to use map as a set
53 if (htab
->map
.max_entries
== 0 || htab
->map
.key_size
== 0 ||
54 htab
->map
.value_size
== 0)
57 /* hash table size must be power of 2 */
58 htab
->n_buckets
= roundup_pow_of_two(htab
->map
.max_entries
);
61 if (htab
->map
.key_size
> MAX_BPF_STACK
)
62 /* eBPF programs initialize keys on stack, so they cannot be
63 * larger than max stack size
68 htab
->buckets
= kmalloc_array(htab
->n_buckets
, sizeof(struct hlist_head
),
69 GFP_USER
| __GFP_NOWARN
);
72 htab
->buckets
= vmalloc(htab
->n_buckets
* sizeof(struct hlist_head
));
77 for (i
= 0; i
< htab
->n_buckets
; i
++)
78 INIT_HLIST_HEAD(&htab
->buckets
[i
]);
80 spin_lock_init(&htab
->lock
);
83 htab
->elem_size
= sizeof(struct htab_elem
) +
84 round_up(htab
->map
.key_size
, 8) +
93 static inline u32
htab_map_hash(const void *key
, u32 key_len
)
95 return jhash(key
, key_len
, 0);
98 static inline struct hlist_head
*select_bucket(struct bpf_htab
*htab
, u32 hash
)
100 return &htab
->buckets
[hash
& (htab
->n_buckets
- 1)];
103 static struct htab_elem
*lookup_elem_raw(struct hlist_head
*head
, u32 hash
,
104 void *key
, u32 key_size
)
108 hlist_for_each_entry_rcu(l
, head
, hash_node
)
109 if (l
->hash
== hash
&& !memcmp(&l
->key
, key
, key_size
))
115 /* Called from syscall or from eBPF program */
116 static void *htab_map_lookup_elem(struct bpf_map
*map
, void *key
)
118 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
119 struct hlist_head
*head
;
123 /* Must be called with rcu_read_lock. */
124 WARN_ON_ONCE(!rcu_read_lock_held());
126 key_size
= map
->key_size
;
128 hash
= htab_map_hash(key
, key_size
);
130 head
= select_bucket(htab
, hash
);
132 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
135 return l
->key
+ round_up(map
->key_size
, 8);
140 /* Called from syscall */
141 static int htab_map_get_next_key(struct bpf_map
*map
, void *key
, void *next_key
)
143 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
144 struct hlist_head
*head
;
145 struct htab_elem
*l
, *next_l
;
149 WARN_ON_ONCE(!rcu_read_lock_held());
151 key_size
= map
->key_size
;
153 hash
= htab_map_hash(key
, key_size
);
155 head
= select_bucket(htab
, hash
);
158 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
162 goto find_first_elem
;
165 /* key was found, get next key in the same bucket */
166 next_l
= hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l
->hash_node
)),
167 struct htab_elem
, hash_node
);
170 /* if next elem in this hash list is non-zero, just return it */
171 memcpy(next_key
, next_l
->key
, key_size
);
175 /* no more elements in this hash list, go to the next bucket */
176 i
= hash
& (htab
->n_buckets
- 1);
180 /* iterate over buckets */
181 for (; i
< htab
->n_buckets
; i
++) {
182 head
= select_bucket(htab
, i
);
184 /* pick first element in the bucket */
185 next_l
= hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head
)),
186 struct htab_elem
, hash_node
);
188 /* if it's not empty, just return it */
189 memcpy(next_key
, next_l
->key
, key_size
);
194 /* itereated over all buckets and all elements */
198 /* Called from syscall or from eBPF program */
199 static int htab_map_update_elem(struct bpf_map
*map
, void *key
, void *value
,
202 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
203 struct htab_elem
*l_new
, *l_old
;
204 struct hlist_head
*head
;
209 if (map_flags
> BPF_EXIST
)
213 WARN_ON_ONCE(!rcu_read_lock_held());
215 /* allocate new element outside of lock */
216 l_new
= kmalloc(htab
->elem_size
, GFP_ATOMIC
);
220 key_size
= map
->key_size
;
222 memcpy(l_new
->key
, key
, key_size
);
223 memcpy(l_new
->key
+ round_up(key_size
, 8), value
, map
->value_size
);
225 l_new
->hash
= htab_map_hash(l_new
->key
, key_size
);
227 /* bpf_map_update_elem() can be called in_irq() */
228 spin_lock_irqsave(&htab
->lock
, flags
);
230 head
= select_bucket(htab
, l_new
->hash
);
232 l_old
= lookup_elem_raw(head
, l_new
->hash
, key
, key_size
);
234 if (!l_old
&& unlikely(htab
->count
>= map
->max_entries
)) {
235 /* if elem with this 'key' doesn't exist and we've reached
236 * max_entries limit, fail insertion of new elem
242 if (l_old
&& map_flags
== BPF_NOEXIST
) {
243 /* elem already exists */
248 if (!l_old
&& map_flags
== BPF_EXIST
) {
249 /* elem doesn't exist, cannot update it */
254 /* add new element to the head of the list, so that concurrent
255 * search will find it before old elem
257 hlist_add_head_rcu(&l_new
->hash_node
, head
);
259 hlist_del_rcu(&l_old
->hash_node
);
260 kfree_rcu(l_old
, rcu
);
264 spin_unlock_irqrestore(&htab
->lock
, flags
);
268 spin_unlock_irqrestore(&htab
->lock
, flags
);
273 /* Called from syscall or from eBPF program */
274 static int htab_map_delete_elem(struct bpf_map
*map
, void *key
)
276 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
277 struct hlist_head
*head
;
283 WARN_ON_ONCE(!rcu_read_lock_held());
285 key_size
= map
->key_size
;
287 hash
= htab_map_hash(key
, key_size
);
289 spin_lock_irqsave(&htab
->lock
, flags
);
291 head
= select_bucket(htab
, hash
);
293 l
= lookup_elem_raw(head
, hash
, key
, key_size
);
296 hlist_del_rcu(&l
->hash_node
);
302 spin_unlock_irqrestore(&htab
->lock
, flags
);
306 static void delete_all_elements(struct bpf_htab
*htab
)
310 for (i
= 0; i
< htab
->n_buckets
; i
++) {
311 struct hlist_head
*head
= select_bucket(htab
, i
);
312 struct hlist_node
*n
;
315 hlist_for_each_entry_safe(l
, n
, head
, hash_node
) {
316 hlist_del_rcu(&l
->hash_node
);
323 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
324 static void htab_map_free(struct bpf_map
*map
)
326 struct bpf_htab
*htab
= container_of(map
, struct bpf_htab
, map
);
328 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
329 * so the programs (can be more than one that used this map) were
330 * disconnected from events. Wait for outstanding critical sections in
331 * these programs to complete
335 /* some of kfree_rcu() callbacks for elements of this map may not have
336 * executed. It's ok. Proceed to free residual elements and map itself
338 delete_all_elements(htab
);
339 kvfree(htab
->buckets
);
343 static struct bpf_map_ops htab_ops
= {
344 .map_alloc
= htab_map_alloc
,
345 .map_free
= htab_map_free
,
346 .map_get_next_key
= htab_map_get_next_key
,
347 .map_lookup_elem
= htab_map_lookup_elem
,
348 .map_update_elem
= htab_map_update_elem
,
349 .map_delete_elem
= htab_map_delete_elem
,
352 static struct bpf_map_type_list tl
= {
354 .type
= BPF_MAP_TYPE_HASH
,
357 static int __init
register_htab_map(void)
359 bpf_register_map_type(&tl
);
362 late_initcall(register_htab_map
);
This page took 0.037992 seconds and 4 git commands to generate.