Commit | Line | Data |
---|---|---|
f5db4af4 JB |
1 | /* |
2 | * Copyright (C) 2006-2009 Red Hat, Inc. | |
3 | * | |
4 | * This file is released under the LGPL. | |
5 | */ | |
6 | ||
7 | #include <linux/bio.h> | |
5a0e3ad6 | 8 | #include <linux/slab.h> |
f5db4af4 JB |
9 | #include <linux/dm-dirty-log.h> |
10 | #include <linux/device-mapper.h> | |
11 | #include <linux/dm-log-userspace.h> | |
12 | ||
13 | #include "dm-log-userspace-transfer.h" | |
14 | ||
86a54a48 JB |
15 | #define DM_LOG_USERSPACE_VSN "1.1.0" |
16 | ||
f5db4af4 JB |
17 | struct flush_entry { |
18 | int type; | |
19 | region_t region; | |
20 | struct list_head list; | |
21 | }; | |
22 | ||
085ae065 JB |
23 | /* |
24 | * This limit on the number of mark and clear request is, to a degree, | |
25 | * arbitrary. However, there is some basis for the choice in the limits | |
26 | * imposed on the size of data payload by dm-log-userspace-transfer.c: | |
27 | * dm_consult_userspace(). | |
28 | */ | |
29 | #define MAX_FLUSH_GROUP_COUNT 32 | |
30 | ||
f5db4af4 JB |
31 | struct log_c { |
32 | struct dm_target *ti; | |
33 | uint32_t region_size; | |
34 | region_t region_count; | |
7ec23d50 | 35 | uint64_t luid; |
f5db4af4 JB |
36 | char uuid[DM_UUID_LEN]; |
37 | ||
38 | char *usr_argv_str; | |
39 | uint32_t usr_argc; | |
40 | ||
41 | /* | |
42 | * in_sync_hint gets set when doing is_remote_recovering. It | |
43 | * represents the first region that needs recovery. IOW, the | |
44 | * first zero bit of sync_bits. This can be useful for to limit | |
45 | * traffic for calls like is_remote_recovering and get_resync_work, | |
46 | * but be take care in its use for anything else. | |
47 | */ | |
48 | uint64_t in_sync_hint; | |
49 | ||
909cc4fb JB |
50 | /* |
51 | * Mark and clear requests are held until a flush is issued | |
52 | * so that we can group, and thereby limit, the amount of | |
53 | * network traffic between kernel and userspace. The 'flush_lock' | |
54 | * is used to protect these lists. | |
55 | */ | |
f5db4af4 | 56 | spinlock_t flush_lock; |
909cc4fb JB |
57 | struct list_head mark_list; |
58 | struct list_head clear_list; | |
f5db4af4 JB |
59 | }; |
60 | ||
61 | static mempool_t *flush_entry_pool; | |
62 | ||
63 | static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) | |
64 | { | |
65 | return kmalloc(sizeof(struct flush_entry), gfp_mask); | |
66 | } | |
67 | ||
68 | static void flush_entry_free(void *element, void *pool_data) | |
69 | { | |
70 | kfree(element); | |
71 | } | |
72 | ||
73 | static int userspace_do_request(struct log_c *lc, const char *uuid, | |
74 | int request_type, char *data, size_t data_size, | |
75 | char *rdata, size_t *rdata_size) | |
76 | { | |
77 | int r; | |
78 | ||
79 | /* | |
80 | * If the server isn't there, -ESRCH is returned, | |
81 | * and we must keep trying until the server is | |
82 | * restored. | |
83 | */ | |
84 | retry: | |
7ec23d50 | 85 | r = dm_consult_userspace(uuid, lc->luid, request_type, data, |
f5db4af4 JB |
86 | data_size, rdata, rdata_size); |
87 | ||
88 | if (r != -ESRCH) | |
89 | return r; | |
90 | ||
91 | DMERR(" Userspace log server not found."); | |
92 | while (1) { | |
93 | set_current_state(TASK_INTERRUPTIBLE); | |
94 | schedule_timeout(2*HZ); | |
95 | DMWARN("Attempting to contact userspace log server..."); | |
7ec23d50 JB |
96 | r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR, |
97 | lc->usr_argv_str, | |
f5db4af4 JB |
98 | strlen(lc->usr_argv_str) + 1, |
99 | NULL, NULL); | |
100 | if (!r) | |
101 | break; | |
102 | } | |
103 | DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); | |
7ec23d50 | 104 | r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL, |
f5db4af4 JB |
105 | 0, NULL, NULL); |
106 | if (!r) | |
107 | goto retry; | |
108 | ||
109 | DMERR("Error trying to resume userspace log: %d", r); | |
110 | ||
111 | return -ESRCH; | |
112 | } | |
113 | ||
114 | static int build_constructor_string(struct dm_target *ti, | |
115 | unsigned argc, char **argv, | |
116 | char **ctr_str) | |
117 | { | |
118 | int i, str_size; | |
119 | char *str = NULL; | |
120 | ||
121 | *ctr_str = NULL; | |
122 | ||
123 | for (i = 0, str_size = 0; i < argc; i++) | |
124 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ | |
125 | ||
126 | str_size += 20; /* Max number of chars in a printed u64 number */ | |
127 | ||
128 | str = kzalloc(str_size, GFP_KERNEL); | |
129 | if (!str) { | |
130 | DMWARN("Unable to allocate memory for constructor string"); | |
131 | return -ENOMEM; | |
132 | } | |
133 | ||
b8313b6d JB |
134 | str_size = sprintf(str, "%llu", (unsigned long long)ti->len); |
135 | for (i = 0; i < argc; i++) | |
136 | str_size += sprintf(str + str_size, " %s", argv[i]); | |
f5db4af4 JB |
137 | |
138 | *ctr_str = str; | |
139 | return str_size; | |
140 | } | |
141 | ||
142 | /* | |
143 | * userspace_ctr | |
144 | * | |
145 | * argv contains: | |
146 | * <UUID> <other args> | |
147 | * Where 'other args' is the userspace implementation specific log | |
148 | * arguments. An example might be: | |
149 | * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] | |
150 | * | |
151 | * So, this module will strip off the <UUID> for identification purposes | |
152 | * when communicating with userspace about a log; but will pass on everything | |
153 | * else. | |
154 | */ | |
155 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |
156 | unsigned argc, char **argv) | |
157 | { | |
158 | int r = 0; | |
159 | int str_size; | |
160 | char *ctr_str = NULL; | |
161 | struct log_c *lc = NULL; | |
162 | uint64_t rdata; | |
163 | size_t rdata_size = sizeof(rdata); | |
164 | ||
165 | if (argc < 3) { | |
166 | DMWARN("Too few arguments to userspace dirty log"); | |
167 | return -EINVAL; | |
168 | } | |
169 | ||
170 | lc = kmalloc(sizeof(*lc), GFP_KERNEL); | |
171 | if (!lc) { | |
172 | DMWARN("Unable to allocate userspace log context."); | |
173 | return -ENOMEM; | |
174 | } | |
175 | ||
7ec23d50 | 176 | /* The ptr value is sufficient for local unique id */ |
bca915aa | 177 | lc->luid = (unsigned long)lc; |
7ec23d50 | 178 | |
f5db4af4 JB |
179 | lc->ti = ti; |
180 | ||
181 | if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { | |
182 | DMWARN("UUID argument too long."); | |
183 | kfree(lc); | |
184 | return -EINVAL; | |
185 | } | |
186 | ||
187 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); | |
188 | spin_lock_init(&lc->flush_lock); | |
909cc4fb JB |
189 | INIT_LIST_HEAD(&lc->mark_list); |
190 | INIT_LIST_HEAD(&lc->clear_list); | |
f5db4af4 JB |
191 | |
192 | str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); | |
193 | if (str_size < 0) { | |
194 | kfree(lc); | |
195 | return str_size; | |
196 | } | |
197 | ||
198 | /* Send table string */ | |
7ec23d50 | 199 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, |
f5db4af4 JB |
200 | ctr_str, str_size, NULL, NULL); |
201 | ||
4a038677 JB |
202 | if (r < 0) { |
203 | if (r == -ESRCH) | |
204 | DMERR("Userspace log server not found"); | |
205 | else | |
206 | DMERR("Userspace log server failed to create log"); | |
f5db4af4 JB |
207 | goto out; |
208 | } | |
209 | ||
210 | /* Since the region size does not change, get it now */ | |
211 | rdata_size = sizeof(rdata); | |
7ec23d50 | 212 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE, |
f5db4af4 JB |
213 | NULL, 0, (char *)&rdata, &rdata_size); |
214 | ||
215 | if (r) { | |
216 | DMERR("Failed to get region size of dirty log"); | |
217 | goto out; | |
218 | } | |
219 | ||
220 | lc->region_size = (uint32_t)rdata; | |
221 | lc->region_count = dm_sector_div_up(ti->len, lc->region_size); | |
222 | ||
223 | out: | |
224 | if (r) { | |
225 | kfree(lc); | |
226 | kfree(ctr_str); | |
227 | } else { | |
228 | lc->usr_argv_str = ctr_str; | |
229 | lc->usr_argc = argc; | |
230 | log->context = lc; | |
231 | } | |
232 | ||
233 | return r; | |
234 | } | |
235 | ||
236 | static void userspace_dtr(struct dm_dirty_log *log) | |
237 | { | |
f5db4af4 JB |
238 | struct log_c *lc = log->context; |
239 | ||
4a038677 | 240 | (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, |
f5db4af4 JB |
241 | NULL, 0, |
242 | NULL, NULL); | |
243 | ||
244 | kfree(lc->usr_argv_str); | |
245 | kfree(lc); | |
246 | ||
247 | return; | |
248 | } | |
249 | ||
250 | static int userspace_presuspend(struct dm_dirty_log *log) | |
251 | { | |
252 | int r; | |
253 | struct log_c *lc = log->context; | |
254 | ||
7ec23d50 | 255 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, |
f5db4af4 JB |
256 | NULL, 0, |
257 | NULL, NULL); | |
258 | ||
259 | return r; | |
260 | } | |
261 | ||
262 | static int userspace_postsuspend(struct dm_dirty_log *log) | |
263 | { | |
264 | int r; | |
265 | struct log_c *lc = log->context; | |
266 | ||
7ec23d50 | 267 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, |
f5db4af4 JB |
268 | NULL, 0, |
269 | NULL, NULL); | |
270 | ||
271 | return r; | |
272 | } | |
273 | ||
274 | static int userspace_resume(struct dm_dirty_log *log) | |
275 | { | |
276 | int r; | |
277 | struct log_c *lc = log->context; | |
278 | ||
279 | lc->in_sync_hint = 0; | |
7ec23d50 | 280 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, |
f5db4af4 JB |
281 | NULL, 0, |
282 | NULL, NULL); | |
283 | ||
284 | return r; | |
285 | } | |
286 | ||
287 | static uint32_t userspace_get_region_size(struct dm_dirty_log *log) | |
288 | { | |
289 | struct log_c *lc = log->context; | |
290 | ||
291 | return lc->region_size; | |
292 | } | |
293 | ||
294 | /* | |
295 | * userspace_is_clean | |
296 | * | |
297 | * Check whether a region is clean. If there is any sort of | |
298 | * failure when consulting the server, we return not clean. | |
299 | * | |
300 | * Returns: 1 if clean, 0 otherwise | |
301 | */ | |
302 | static int userspace_is_clean(struct dm_dirty_log *log, region_t region) | |
303 | { | |
304 | int r; | |
305 | uint64_t region64 = (uint64_t)region; | |
306 | int64_t is_clean; | |
307 | size_t rdata_size; | |
308 | struct log_c *lc = log->context; | |
309 | ||
310 | rdata_size = sizeof(is_clean); | |
311 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, | |
312 | (char *)®ion64, sizeof(region64), | |
313 | (char *)&is_clean, &rdata_size); | |
314 | ||
315 | return (r) ? 0 : (int)is_clean; | |
316 | } | |
317 | ||
318 | /* | |
319 | * userspace_in_sync | |
320 | * | |
321 | * Check if the region is in-sync. If there is any sort | |
322 | * of failure when consulting the server, we assume that | |
323 | * the region is not in sync. | |
324 | * | |
325 | * If 'can_block' is set, return immediately | |
326 | * | |
327 | * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK | |
328 | */ | |
329 | static int userspace_in_sync(struct dm_dirty_log *log, region_t region, | |
330 | int can_block) | |
331 | { | |
332 | int r; | |
333 | uint64_t region64 = region; | |
334 | int64_t in_sync; | |
335 | size_t rdata_size; | |
336 | struct log_c *lc = log->context; | |
337 | ||
338 | /* | |
339 | * We can never respond directly - even if in_sync_hint is | |
340 | * set. This is because another machine could see a device | |
341 | * failure and mark the region out-of-sync. If we don't go | |
342 | * to userspace to ask, we might think the region is in-sync | |
343 | * and allow a read to pick up data that is stale. (This is | |
344 | * very unlikely if a device actually fails; but it is very | |
345 | * likely if a connection to one device from one machine fails.) | |
346 | * | |
347 | * There still might be a problem if the mirror caches the region | |
348 | * state as in-sync... but then this call would not be made. So, | |
349 | * that is a mirror problem. | |
350 | */ | |
351 | if (!can_block) | |
352 | return -EWOULDBLOCK; | |
353 | ||
354 | rdata_size = sizeof(in_sync); | |
355 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, | |
356 | (char *)®ion64, sizeof(region64), | |
357 | (char *)&in_sync, &rdata_size); | |
358 | return (r) ? 0 : (int)in_sync; | |
359 | } | |
360 | ||
085ae065 JB |
361 | static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) |
362 | { | |
363 | int r = 0; | |
364 | struct flush_entry *fe; | |
365 | ||
366 | list_for_each_entry(fe, flush_list, list) { | |
367 | r = userspace_do_request(lc, lc->uuid, fe->type, | |
368 | (char *)&fe->region, | |
369 | sizeof(fe->region), | |
370 | NULL, NULL); | |
371 | if (r) | |
372 | break; | |
373 | } | |
374 | ||
375 | return r; | |
376 | } | |
377 | ||
378 | static int flush_by_group(struct log_c *lc, struct list_head *flush_list) | |
379 | { | |
380 | int r = 0; | |
381 | int count; | |
382 | uint32_t type = 0; | |
383 | struct flush_entry *fe, *tmp_fe; | |
384 | LIST_HEAD(tmp_list); | |
385 | uint64_t group[MAX_FLUSH_GROUP_COUNT]; | |
386 | ||
387 | /* | |
388 | * Group process the requests | |
389 | */ | |
390 | while (!list_empty(flush_list)) { | |
391 | count = 0; | |
392 | ||
393 | list_for_each_entry_safe(fe, tmp_fe, flush_list, list) { | |
394 | group[count] = fe->region; | |
395 | count++; | |
396 | ||
397 | list_del(&fe->list); | |
398 | list_add(&fe->list, &tmp_list); | |
399 | ||
400 | type = fe->type; | |
401 | if (count >= MAX_FLUSH_GROUP_COUNT) | |
402 | break; | |
403 | } | |
404 | ||
405 | r = userspace_do_request(lc, lc->uuid, type, | |
406 | (char *)(group), | |
407 | count * sizeof(uint64_t), | |
408 | NULL, NULL); | |
409 | if (r) { | |
410 | /* Group send failed. Attempt one-by-one. */ | |
411 | list_splice_init(&tmp_list, flush_list); | |
412 | r = flush_one_by_one(lc, flush_list); | |
413 | break; | |
414 | } | |
415 | } | |
416 | ||
417 | /* | |
418 | * Must collect flush_entrys that were successfully processed | |
419 | * as a group so that they will be free'd by the caller. | |
420 | */ | |
421 | list_splice_init(&tmp_list, flush_list); | |
422 | ||
423 | return r; | |
424 | } | |
425 | ||
f5db4af4 JB |
426 | /* |
427 | * userspace_flush | |
428 | * | |
429 | * This function is ok to block. | |
430 | * The flush happens in two stages. First, it sends all | |
431 | * clear/mark requests that are on the list. Then it | |
432 | * tells the server to commit them. This gives the | |
433 | * server a chance to optimise the commit, instead of | |
434 | * doing it for every request. | |
435 | * | |
436 | * Additionally, we could implement another thread that | |
437 | * sends the requests up to the server - reducing the | |
438 | * load on flush. Then the flush would have less in | |
439 | * the list and be responsible for the finishing commit. | |
440 | * | |
441 | * Returns: 0 on success, < 0 on failure | |
442 | */ | |
443 | static int userspace_flush(struct dm_dirty_log *log) | |
444 | { | |
445 | int r = 0; | |
446 | unsigned long flags; | |
447 | struct log_c *lc = log->context; | |
909cc4fb JB |
448 | LIST_HEAD(mark_list); |
449 | LIST_HEAD(clear_list); | |
f5db4af4 JB |
450 | struct flush_entry *fe, *tmp_fe; |
451 | ||
452 | spin_lock_irqsave(&lc->flush_lock, flags); | |
909cc4fb JB |
453 | list_splice_init(&lc->mark_list, &mark_list); |
454 | list_splice_init(&lc->clear_list, &clear_list); | |
f5db4af4 JB |
455 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
456 | ||
909cc4fb | 457 | if (list_empty(&mark_list) && list_empty(&clear_list)) |
f5db4af4 JB |
458 | return 0; |
459 | ||
085ae065 JB |
460 | r = flush_by_group(lc, &mark_list); |
461 | if (r) | |
462 | goto fail; | |
909cc4fb | 463 | |
085ae065 JB |
464 | r = flush_by_group(lc, &clear_list); |
465 | if (r) | |
466 | goto fail; | |
f5db4af4 JB |
467 | |
468 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, | |
469 | NULL, 0, NULL, NULL); | |
470 | ||
471 | fail: | |
472 | /* | |
473 | * We can safely remove these entries, even if failure. | |
474 | * Calling code will receive an error and will know that | |
475 | * the log facility has failed. | |
476 | */ | |
909cc4fb JB |
477 | list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) { |
478 | list_del(&fe->list); | |
479 | mempool_free(fe, flush_entry_pool); | |
480 | } | |
481 | list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) { | |
f5db4af4 JB |
482 | list_del(&fe->list); |
483 | mempool_free(fe, flush_entry_pool); | |
484 | } | |
485 | ||
486 | if (r) | |
487 | dm_table_event(lc->ti->table); | |
488 | ||
489 | return r; | |
490 | } | |
491 | ||
492 | /* | |
493 | * userspace_mark_region | |
494 | * | |
495 | * This function should avoid blocking unless absolutely required. | |
496 | * (Memory allocation is valid for blocking.) | |
497 | */ | |
498 | static void userspace_mark_region(struct dm_dirty_log *log, region_t region) | |
499 | { | |
500 | unsigned long flags; | |
501 | struct log_c *lc = log->context; | |
502 | struct flush_entry *fe; | |
503 | ||
504 | /* Wait for an allocation, but _never_ fail */ | |
505 | fe = mempool_alloc(flush_entry_pool, GFP_NOIO); | |
506 | BUG_ON(!fe); | |
507 | ||
508 | spin_lock_irqsave(&lc->flush_lock, flags); | |
509 | fe->type = DM_ULOG_MARK_REGION; | |
510 | fe->region = region; | |
909cc4fb | 511 | list_add(&fe->list, &lc->mark_list); |
f5db4af4 JB |
512 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
513 | ||
514 | return; | |
515 | } | |
516 | ||
517 | /* | |
518 | * userspace_clear_region | |
519 | * | |
520 | * This function must not block. | |
521 | * So, the alloc can't block. In the worst case, it is ok to | |
522 | * fail. It would simply mean we can't clear the region. | |
523 | * Does nothing to current sync context, but does mean | |
524 | * the region will be re-sync'ed on a reload of the mirror | |
525 | * even though it is in-sync. | |
526 | */ | |
527 | static void userspace_clear_region(struct dm_dirty_log *log, region_t region) | |
528 | { | |
529 | unsigned long flags; | |
530 | struct log_c *lc = log->context; | |
531 | struct flush_entry *fe; | |
532 | ||
533 | /* | |
534 | * If we fail to allocate, we skip the clearing of | |
535 | * the region. This doesn't hurt us in any way, except | |
536 | * to cause the region to be resync'ed when the | |
537 | * device is activated next time. | |
538 | */ | |
539 | fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); | |
540 | if (!fe) { | |
541 | DMERR("Failed to allocate memory to clear region."); | |
542 | return; | |
543 | } | |
544 | ||
545 | spin_lock_irqsave(&lc->flush_lock, flags); | |
546 | fe->type = DM_ULOG_CLEAR_REGION; | |
547 | fe->region = region; | |
909cc4fb | 548 | list_add(&fe->list, &lc->clear_list); |
f5db4af4 JB |
549 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
550 | ||
551 | return; | |
552 | } | |
553 | ||
554 | /* | |
555 | * userspace_get_resync_work | |
556 | * | |
557 | * Get a region that needs recovery. It is valid to return | |
558 | * an error for this function. | |
559 | * | |
560 | * Returns: 1 if region filled, 0 if no work, <0 on error | |
561 | */ | |
562 | static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) | |
563 | { | |
564 | int r; | |
565 | size_t rdata_size; | |
566 | struct log_c *lc = log->context; | |
567 | struct { | |
568 | int64_t i; /* 64-bit for mix arch compatibility */ | |
569 | region_t r; | |
570 | } pkg; | |
571 | ||
572 | if (lc->in_sync_hint >= lc->region_count) | |
573 | return 0; | |
574 | ||
575 | rdata_size = sizeof(pkg); | |
576 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, | |
577 | NULL, 0, | |
578 | (char *)&pkg, &rdata_size); | |
579 | ||
580 | *region = pkg.r; | |
581 | return (r) ? r : (int)pkg.i; | |
582 | } | |
583 | ||
584 | /* | |
585 | * userspace_set_region_sync | |
586 | * | |
587 | * Set the sync status of a given region. This function | |
588 | * must not fail. | |
589 | */ | |
590 | static void userspace_set_region_sync(struct dm_dirty_log *log, | |
591 | region_t region, int in_sync) | |
592 | { | |
593 | int r; | |
594 | struct log_c *lc = log->context; | |
595 | struct { | |
596 | region_t r; | |
597 | int64_t i; | |
598 | } pkg; | |
599 | ||
600 | pkg.r = region; | |
601 | pkg.i = (int64_t)in_sync; | |
602 | ||
603 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, | |
604 | (char *)&pkg, sizeof(pkg), | |
605 | NULL, NULL); | |
606 | ||
607 | /* | |
608 | * It would be nice to be able to report failures. | |
609 | * However, it is easy emough to detect and resolve. | |
610 | */ | |
611 | return; | |
612 | } | |
613 | ||
614 | /* | |
615 | * userspace_get_sync_count | |
616 | * | |
617 | * If there is any sort of failure when consulting the server, | |
618 | * we assume that the sync count is zero. | |
619 | * | |
620 | * Returns: sync count on success, 0 on failure | |
621 | */ | |
622 | static region_t userspace_get_sync_count(struct dm_dirty_log *log) | |
623 | { | |
624 | int r; | |
625 | size_t rdata_size; | |
626 | uint64_t sync_count; | |
627 | struct log_c *lc = log->context; | |
628 | ||
629 | rdata_size = sizeof(sync_count); | |
630 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, | |
631 | NULL, 0, | |
632 | (char *)&sync_count, &rdata_size); | |
633 | ||
634 | if (r) | |
635 | return 0; | |
636 | ||
637 | if (sync_count >= lc->region_count) | |
638 | lc->in_sync_hint = lc->region_count; | |
639 | ||
640 | return (region_t)sync_count; | |
641 | } | |
642 | ||
643 | /* | |
644 | * userspace_status | |
645 | * | |
646 | * Returns: amount of space consumed | |
647 | */ | |
648 | static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, | |
649 | char *result, unsigned maxlen) | |
650 | { | |
651 | int r = 0; | |
b8313b6d | 652 | char *table_args; |
f5db4af4 JB |
653 | size_t sz = (size_t)maxlen; |
654 | struct log_c *lc = log->context; | |
655 | ||
656 | switch (status_type) { | |
657 | case STATUSTYPE_INFO: | |
658 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, | |
659 | NULL, 0, | |
660 | result, &sz); | |
661 | ||
662 | if (r) { | |
663 | sz = 0; | |
664 | DMEMIT("%s 1 COM_FAILURE", log->type->name); | |
665 | } | |
666 | break; | |
667 | case STATUSTYPE_TABLE: | |
668 | sz = 0; | |
0d03d59d | 669 | table_args = strchr(lc->usr_argv_str, ' '); |
b8313b6d JB |
670 | BUG_ON(!table_args); /* There will always be a ' ' */ |
671 | table_args++; | |
672 | ||
673 | DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, | |
674 | lc->uuid, table_args); | |
f5db4af4 JB |
675 | break; |
676 | } | |
677 | return (r) ? 0 : (int)sz; | |
678 | } | |
679 | ||
680 | /* | |
681 | * userspace_is_remote_recovering | |
682 | * | |
683 | * Returns: 1 if region recovering, 0 otherwise | |
684 | */ | |
685 | static int userspace_is_remote_recovering(struct dm_dirty_log *log, | |
686 | region_t region) | |
687 | { | |
688 | int r; | |
689 | uint64_t region64 = region; | |
690 | struct log_c *lc = log->context; | |
691 | static unsigned long long limit; | |
692 | struct { | |
693 | int64_t is_recovering; | |
694 | uint64_t in_sync_hint; | |
695 | } pkg; | |
696 | size_t rdata_size = sizeof(pkg); | |
697 | ||
698 | /* | |
699 | * Once the mirror has been reported to be in-sync, | |
700 | * it will never again ask for recovery work. So, | |
701 | * we can safely say there is not a remote machine | |
702 | * recovering if the device is in-sync. (in_sync_hint | |
703 | * must be reset at resume time.) | |
704 | */ | |
705 | if (region < lc->in_sync_hint) | |
706 | return 0; | |
707 | else if (jiffies < limit) | |
708 | return 1; | |
709 | ||
710 | limit = jiffies + (HZ / 4); | |
711 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, | |
712 | (char *)®ion64, sizeof(region64), | |
713 | (char *)&pkg, &rdata_size); | |
714 | if (r) | |
715 | return 1; | |
716 | ||
717 | lc->in_sync_hint = pkg.in_sync_hint; | |
718 | ||
719 | return (int)pkg.is_recovering; | |
720 | } | |
721 | ||
722 | static struct dm_dirty_log_type _userspace_type = { | |
723 | .name = "userspace", | |
724 | .module = THIS_MODULE, | |
725 | .ctr = userspace_ctr, | |
726 | .dtr = userspace_dtr, | |
727 | .presuspend = userspace_presuspend, | |
728 | .postsuspend = userspace_postsuspend, | |
729 | .resume = userspace_resume, | |
730 | .get_region_size = userspace_get_region_size, | |
731 | .is_clean = userspace_is_clean, | |
732 | .in_sync = userspace_in_sync, | |
733 | .flush = userspace_flush, | |
734 | .mark_region = userspace_mark_region, | |
735 | .clear_region = userspace_clear_region, | |
736 | .get_resync_work = userspace_get_resync_work, | |
737 | .set_region_sync = userspace_set_region_sync, | |
738 | .get_sync_count = userspace_get_sync_count, | |
739 | .status = userspace_status, | |
740 | .is_remote_recovering = userspace_is_remote_recovering, | |
741 | }; | |
742 | ||
743 | static int __init userspace_dirty_log_init(void) | |
744 | { | |
745 | int r = 0; | |
746 | ||
747 | flush_entry_pool = mempool_create(100, flush_entry_alloc, | |
748 | flush_entry_free, NULL); | |
749 | ||
750 | if (!flush_entry_pool) { | |
751 | DMWARN("Unable to create flush_entry_pool: No memory."); | |
752 | return -ENOMEM; | |
753 | } | |
754 | ||
755 | r = dm_ulog_tfr_init(); | |
756 | if (r) { | |
757 | DMWARN("Unable to initialize userspace log communications"); | |
758 | mempool_destroy(flush_entry_pool); | |
759 | return r; | |
760 | } | |
761 | ||
762 | r = dm_dirty_log_type_register(&_userspace_type); | |
763 | if (r) { | |
764 | DMWARN("Couldn't register userspace dirty log type"); | |
765 | dm_ulog_tfr_exit(); | |
766 | mempool_destroy(flush_entry_pool); | |
767 | return r; | |
768 | } | |
769 | ||
86a54a48 | 770 | DMINFO("version " DM_LOG_USERSPACE_VSN " loaded"); |
f5db4af4 JB |
771 | return 0; |
772 | } | |
773 | ||
774 | static void __exit userspace_dirty_log_exit(void) | |
775 | { | |
776 | dm_dirty_log_type_unregister(&_userspace_type); | |
777 | dm_ulog_tfr_exit(); | |
778 | mempool_destroy(flush_entry_pool); | |
779 | ||
86a54a48 | 780 | DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded"); |
f5db4af4 JB |
781 | return; |
782 | } | |
783 | ||
784 | module_init(userspace_dirty_log_init); | |
785 | module_exit(userspace_dirty_log_exit); | |
786 | ||
787 | MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); | |
788 | MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); | |
789 | MODULE_LICENSE("GPL"); |