3 * TODO: keys are currently assumed <= sizeof(void *). Key target never freed.
13 #include <urcu-defer.h>
15 #include <arch_atomic.h>
17 #include <urcu/jhash.h>
23 #define NODE_STOLEN (1 << 0)
28 struct rcu_ht_node
*next
;
35 struct rcu_ht_node
**tbl
;
37 void (*free_fct
)(void *data
); /* fct to free data */
41 pthread_mutex_t resize_mutex
; /* resize mutex: add/del mutex */
42 int resize_ongoing
; /* fast-path resize check */
45 struct rcu_ht
*ht_new(ht_hash_fct hash_fct
, void (*free_fct
)(void *data
),
46 unsigned long init_size
, uint32_t keylen
,
51 ht
= calloc(1, sizeof(struct rcu_ht
));
52 ht
->hash_fct
= hash_fct
;
53 ht
->free_fct
= free_fct
;
56 ht
->hashseed
= hashseed
;
57 /* this mutex should not nest in read-side C.S. */
58 pthread_mutex_init(&ht
->resize_mutex
, NULL
);
59 ht
->resize_ongoing
= 0;
60 ht
->tbl
= calloc(init_size
, sizeof(struct rcu_ht_node
*));
64 void *ht_lookup(struct rcu_ht
*ht
, void *key
)
67 struct rcu_ht_node
*node
;
70 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % ht
->size
;
71 smp_read_barrier_depends(); /* read size before links */
74 node
= rcu_dereference(ht
->tbl
[hash
]);
80 if (node
->key
== key
) {
84 node
= rcu_dereference(node
->next
);
92 * Will re-try until either:
93 * - The key is already there (-EEXIST)
94 * - We successfully add the key at the head of a table bucket.
96 int ht_add(struct rcu_ht
*ht
, void *key
, void *data
)
98 struct rcu_ht_node
*node
, *old_head
, *new_head
;
102 new_head
= calloc(1, sizeof(struct rcu_ht_node
));
104 new_head
->data
= data
;
106 /* here comes the fun and tricky part.
107 * Add at the beginning with a cmpxchg.
108 * Hold a read lock between the moment the first element is read
109 * and the nodes traversal (to find duplicates). This ensures
110 * the head pointer has not been reclaimed when cmpxchg is done.
111 * Always adding at the head ensures that we would have to
112 * re-try if a new item has been added concurrently. So we ensure that
113 * we never add duplicates. */
117 if (unlikely(ht
->resize_ongoing
)) {
120 * Wait for resize to complete before continuing.
122 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
124 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
129 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % ht
->size
;
131 old_head
= node
= rcu_dereference(ht
->tbl
[hash
]);
136 if (node
->key
== key
) {
140 node
= rcu_dereference(node
->next
);
142 new_head
->next
= old_head
;
143 if (rcu_cmpxchg_pointer(&ht
->tbl
[hash
], old_head
, new_head
) != old_head
)
149 /* restart loop, release and re-take the read lock to be kind to GP */
156 * Restart until we successfully remove the entry, or no entry is left
157 * ((void *)(unsigned long)-ENOENT).
158 * Deal with concurrent stealers by doing an extra verification pass to check
159 * that no element in the list are still pointing to the element stolen.
160 * This could happen if two concurrent steal for consecutive objects are
161 * executed. A pointer to an object being stolen could be saved by the
162 * concurrent stealer for the previous object.
163 * Also, given that in this precise scenario, another stealer can also want to
164 * delete the doubly-referenced object; use a "stolen" flag to let only one
165 * stealer delete the object.
167 void *ht_steal(struct rcu_ht
*ht
, void *key
)
169 struct rcu_ht_node
**prev
, *node
, *del_node
= NULL
;
177 if (unlikely(ht
->resize_ongoing
)) {
180 * Wait for resize to complete before continuing.
182 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
184 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
189 hash
= ht
->hash_fct(key
, ht
->keylen
, ht
->hashseed
) % ht
->size
;
191 prev
= &ht
->tbl
[hash
];
192 node
= rcu_dereference(*prev
);
201 if (node
->key
== key
) {
205 node
= rcu_dereference(*prev
);
210 * Another concurrent thread stole it ? If so, let it deal with
211 * this. Assume NODE_STOLEN is the only flag. If this changes,
212 * read flags before cmpxchg.
214 if (cmpxchg(&node
->flags
, 0, NODE_STOLEN
) != 0)
218 /* Found it ! pointer to object is in "prev" */
219 if (rcu_cmpxchg_pointer(prev
, node
, node
->next
) == node
)
225 * From that point, we own node. Note that there can still be concurrent
226 * RCU readers using it. We can free it outside of read lock after a GP.
230 data
= del_node
->data
;
231 call_rcu(free
, del_node
);
235 data
= (void *)(unsigned long)-ENOENT
;
239 /* restart loop, release and re-take the read lock to be kind to GP */
245 int ht_delete(struct rcu_ht
*ht
, void *key
)
249 data
= ht_steal(ht
, key
);
250 if (data
&& data
!= (void *)(unsigned long)-ENOENT
) {
252 call_rcu(ht
->free_fct
, data
);
259 /* Delete all old elements. Allow concurrent writer accesses. */
260 int ht_delete_all(struct rcu_ht
*ht
)
263 struct rcu_ht_node
**prev
, *node
, *inext
;
268 * Mutual exclusion with resize operations, but leave add/steal execute
269 * concurrently. This is OK because we operate only on the heads.
271 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
274 for (i
= 0; i
< ht
->size
; i
++) {
278 * Cut the head. After that, we own the first element.
280 node
= rcu_xchg_pointer(prev
, NULL
);
286 * We manage a list shared with concurrent writers and readers.
287 * Note that a concurrent add may or may not be deleted by us,
288 * depending if it arrives before or after the head is cut.
289 * "node" points to our first node. Remove first elements
296 inext
= rcu_xchg_pointer(prev
, NULL
);
298 * "node" is the first element of the list we have cut.
299 * We therefore own it, no concurrent writer may delete
300 * it. There can only be concurrent lookups. Concurrent
301 * add can only be done on a bucket head, but we've cut
302 * it already. inext is also owned by us, because we
303 * have exchanged it for "NULL". It will therefore be
304 * safe to use it after a G.P.
308 call_rcu(ht
->free_fct
, node
->data
);
309 call_rcu(free
, node
);
318 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
324 * Should only be called when no more concurrent readers nor writers can
325 * possibly access the table.
327 int ht_destroy(struct rcu_ht
*ht
)
331 ret
= ht_delete_all(ht
);
337 static void ht_resize_grow(struct rcu_ht
*ht
)
339 unsigned long i
, new_size
, old_size
;
340 struct rcu_ht_node
**new_tbl
, **old_tbl
;
341 struct rcu_ht_node
*node
, *new_node
, *tmp
;
349 new_size
= old_size
<< 1;
350 new_tbl
= calloc(new_size
, sizeof(struct rcu_ht_node
*));
352 for (i
= 0; i
< old_size
; i
++) {
354 * Re-hash each entry, insert in new table.
355 * It's important that a reader looking for a key _will_ find it
356 * if it's in the table.
357 * Copy each node. (just the node, not ->data)
361 hash
= ht
->hash_fct(node
->key
, ht
->keylen
, ht
->hashseed
)
363 new_node
= malloc(sizeof(struct rcu_ht_node
));
364 new_node
->key
= node
->key
;
365 new_node
->data
= node
->data
;
366 new_node
->next
= new_tbl
[i
]; /* add to head */
367 new_tbl
[i
] = new_node
;
374 smp_wmb(); /* write links and table before changing size */
377 /* Ensure all concurrent lookups use new size and table */
380 for (i
= 0; i
< old_size
; i
++) {
391 static void ht_resize_shrink(struct rcu_ht
*ht
)
393 unsigned long i
, new_size
;
394 struct rcu_ht_node
**new_tbl
;
395 struct rcu_ht_node
**prev
, *node
;
400 new_size
= ht
->size
>> 1;
402 for (i
= 0; i
< new_size
; i
++) {
403 /* Link end with first entry of 2*i */
410 *prev
= ht
->tbl
[i
<< 1];
412 smp_wmb(); /* write links before changing size */
415 /* Ensure all concurrent lookups use new size */
418 new_tbl
= realloc(ht
->tbl
, new_size
* sizeof(struct rcu_ht_node
*));
419 /* shrinking, pointers should not move */
420 assert(new_tbl
== ht
->tbl
);
424 * growth: >0: *2, <0: /2
426 void ht_resize(struct rcu_ht
*ht
, int growth
)
430 ret
= pthread_mutex_lock(&ht
->resize_mutex
);
432 ht
->resize_ongoing
= 1;
434 /* All add/remove are waiting on the mutex. */
438 ht_resize_shrink(ht
);
440 ht
->resize_ongoing
= 0;
441 ret
= pthread_mutex_unlock(&ht
->resize_mutex
);
446 * Expects keys <= than pointer size to be encoded in the pointer itself.
448 uint32_t ht_jhash(void *key
, uint32_t length
, uint32_t initval
)
453 if (length
<= sizeof(void *))
457 ret
= jhash(vkey
, length
, initval
);