From ed63287dd670f8e9d2412a913de7fdc50a689831 Mon Sep 17 00:00:00 2001 From: Lidong Zhong Date: Wed, 13 May 2015 14:04:10 +0800 Subject: [PATCH] dm raid1: keep issuing IO after leg failure Currently if there is a leg failure, the bio will be put into the hold list until userspace does a remove/replace on the leg. Doing so in a cluster config (clvmd) is problematic because there may be a temporary path failure that results in cluster raid1 remove/replace. Such recovery takes a long time due to a full resync. Update dm-raid1 to optionally ignore these failures so bios continue being issued without interrupton. To enable this feature userspace must pass "keep_log" when creating the dm-raid1 device. Signed-off-by: Lidong Zhong Tested-by: Liuhua Wang Acked-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid1.c | 75 +++++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 743fa9bbae9e..d83696bf403b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -23,8 +23,10 @@ #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ -#define DM_RAID1_HANDLE_ERRORS 0x01 +#define DM_RAID1_HANDLE_ERRORS 0x01 +#define DM_RAID1_KEEP_LOG 0x02 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) +#define keep_log(p) ((p)->features & DM_RAID1_KEEP_LOG) static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); @@ -229,7 +231,7 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) if (m != get_default_mirror(ms)) goto out; - if (!ms->in_sync) { + if (!ms->in_sync && !keep_log(ms)) { /* * Better to issue requests to same failing device * than to risk returning corrupt data. @@ -370,6 +372,17 @@ static int recover(struct mirror_set *ms, struct dm_region *reg) return r; } +static void reset_ms_flags(struct mirror_set *ms) +{ + unsigned int m; + + ms->leg_failure = 0; + for (m = 0; m < ms->nr_mirrors; m++) { + atomic_set(&(ms->mirror[m].error_count), 0); + ms->mirror[m].error_type = 0; + } +} + static void do_recovery(struct mirror_set *ms) { struct dm_region *reg; @@ -398,6 +411,7 @@ static void do_recovery(struct mirror_set *ms) /* the sync is complete */ dm_table_event(ms->ti->table); ms->in_sync = 1; + reset_ms_flags(ms); } } @@ -759,7 +773,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) dm_rh_delay(ms->rh, bio); while ((bio = bio_list_pop(&nosync))) { - if (unlikely(ms->leg_failure) && errors_handled(ms)) { + if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) { spin_lock_irq(&ms->lock); bio_list_add(&ms->failures, bio); spin_unlock_irq(&ms->lock); @@ -803,15 +817,21 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) /* * If all the legs are dead, fail the I/O. - * If we have been told to handle errors, hold the bio - * and wait for userspace to deal with the problem. + * If the device has failed and keep_log is enabled, + * fail the I/O. + * + * If we have been told to handle errors, and keep_log + * isn't enabled, hold the bio and wait for userspace to + * deal with the problem. + * * Otherwise pretend that the I/O succeeded. (This would * be wrong if the failed leg returned after reboot and * got replicated back to the good legs.) */ - if (!get_valid_mirror(ms)) + + if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure))) bio_endio(bio, -EIO); - else if (errors_handled(ms)) + else if (errors_handled(ms) && !keep_log(ms)) hold_bio(ms, bio); else bio_endio(bio, 0); @@ -987,6 +1007,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, unsigned num_features; struct dm_target *ti = ms->ti; char dummy; + int i; *args_used = 0; @@ -1007,15 +1028,25 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, return -EINVAL; } - if (!strcmp("handle_errors", argv[0])) - ms->features |= DM_RAID1_HANDLE_ERRORS; - else { - ti->error = "Unrecognised feature requested"; + for (i = 0; i < num_features; i++) { + if (!strcmp("handle_errors", argv[0])) + ms->features |= DM_RAID1_HANDLE_ERRORS; + else if (!strcmp("keep_log", argv[0])) + ms->features |= DM_RAID1_KEEP_LOG; + else { + ti->error = "Unrecognised feature requested"; + return -EINVAL; + } + + argc--; + argv++; + (*args_used)++; + } + if (!errors_handled(ms) && keep_log(ms)) { + ti->error = "keep_log feature requires the handle_errors feature"; return -EINVAL; } - (*args_used)++; - return 0; } @@ -1029,7 +1060,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, * log_type is "core" or "disk" * #log_params is between 1 and 3 * - * If present, features must be "handle_errors". + * If present, supported features are "handle_errors" and "keep_log". */ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -1363,6 +1394,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; + int num_feature_args = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); char buffer[ms->nr_mirrors + 1]; @@ -1392,8 +1424,17 @@ static void mirror_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", ms->mirror[m].dev->name, (unsigned long long)ms->mirror[m].offset); - if (ms->features & DM_RAID1_HANDLE_ERRORS) - DMEMIT(" 1 handle_errors"); + num_feature_args += !!errors_handled(ms); + num_feature_args += !!keep_log(ms); + if (num_feature_args) { + DMEMIT(" %d", num_feature_args); + if (errors_handled(ms)) + DMEMIT(" handle_errors"); + if (keep_log(ms)) + DMEMIT(" keep_log"); + } + + break; } } @@ -1413,7 +1454,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 13, 2}, + .version = {1, 14, 0}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, -- 2.34.1