X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid5.c;h=061d265ed94ee85cc892e1fb02a379e7437cec42;hb=d6065f7bf8bec170c9c56524a250093ce73ca5d9;hp=1223e98ecd70f538419790c48bfe19f664561480;hpb=b3ce1debe2685383a9ad6ace9c49869c3968c013;p=deliverable%2Flinux.git diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1223e98ecd70..061d265ed94e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -293,9 +293,31 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector return sh; } -static int grow_stripes(raid5_conf_t *conf, int num) +static int grow_one_stripe(raid5_conf_t *conf) { struct stripe_head *sh; + sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); + if (!sh) + return 0; + memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); + sh->raid_conf = conf; + spin_lock_init(&sh->lock); + + if (grow_buffers(sh, conf->raid_disks)) { + shrink_buffers(sh, conf->raid_disks); + kmem_cache_free(conf->slab_cache, sh); + return 0; + } + /* we just created an active stripe so... */ + atomic_set(&sh->count, 1); + atomic_inc(&conf->active_stripes); + INIT_LIST_HEAD(&sh->lru); + release_stripe(sh); + return 1; +} + +static int grow_stripes(raid5_conf_t *conf, int num) +{ kmem_cache_t *sc; int devs = conf->raid_disks; @@ -308,48 +330,39 @@ static int grow_stripes(raid5_conf_t *conf, int num) return 1; conf->slab_cache = sc; while (num--) { - sh = kmem_cache_alloc(sc, GFP_KERNEL); - if (!sh) - return 1; - memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev)); - sh->raid_conf = conf; - spin_lock_init(&sh->lock); - - if (grow_buffers(sh, conf->raid_disks)) { - shrink_buffers(sh, conf->raid_disks); - kmem_cache_free(sc, sh); + if (!grow_one_stripe(conf)) return 1; - } - /* we just created an active stripe so... */ - atomic_set(&sh->count, 1); - atomic_inc(&conf->active_stripes); - INIT_LIST_HEAD(&sh->lru); - release_stripe(sh); } return 0; } -static void shrink_stripes(raid5_conf_t *conf) +static int drop_one_stripe(raid5_conf_t *conf) { struct stripe_head *sh; - while (1) { - spin_lock_irq(&conf->device_lock); - sh = get_free_stripe(conf); - spin_unlock_irq(&conf->device_lock); - if (!sh) - break; - if (atomic_read(&sh->count)) - BUG(); - shrink_buffers(sh, conf->raid_disks); - kmem_cache_free(conf->slab_cache, sh); - atomic_dec(&conf->active_stripes); - } + spin_lock_irq(&conf->device_lock); + sh = get_free_stripe(conf); + spin_unlock_irq(&conf->device_lock); + if (!sh) + return 0; + if (atomic_read(&sh->count)) + BUG(); + shrink_buffers(sh, conf->raid_disks); + kmem_cache_free(conf->slab_cache, sh); + atomic_dec(&conf->active_stripes); + return 1; +} + +static void shrink_stripes(raid5_conf_t *conf) +{ + while (drop_one_stripe(conf)) + ; + kmem_cache_destroy(conf->slab_cache); conf->slab_cache = NULL; } -static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done, +static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error) { struct stripe_head *sh = bi->bi_private; @@ -401,10 +414,27 @@ static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done, } #else set_bit(R5_UPTODATE, &sh->dev[i].flags); -#endif +#endif + if (test_bit(R5_ReadError, &sh->dev[i].flags)) { + printk("R5: read error corrected!!\n"); + clear_bit(R5_ReadError, &sh->dev[i].flags); + clear_bit(R5_ReWrite, &sh->dev[i].flags); + } } else { - md_error(conf->mddev, conf->disks[i].rdev); clear_bit(R5_UPTODATE, &sh->dev[i].flags); + if (conf->mddev->degraded) { + printk("R5: read error not correctable.\n"); + clear_bit(R5_ReadError, &sh->dev[i].flags); + clear_bit(R5_ReWrite, &sh->dev[i].flags); + md_error(conf->mddev, conf->disks[i].rdev); + } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { + /* Oh, no!!! */ + printk("R5: read error NOT corrected!!\n"); + clear_bit(R5_ReadError, &sh->dev[i].flags); + clear_bit(R5_ReWrite, &sh->dev[i].flags); + md_error(conf->mddev, conf->disks[i].rdev); + } else + set_bit(R5_ReadError, &sh->dev[i].flags); } rdev_dec_pending(conf->disks[i].rdev, conf->mddev); #if 0 @@ -966,6 +996,12 @@ static void handle_stripe(struct stripe_head *sh) if (dev->written) written++; rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ if (!rdev || !rdev->in_sync) { + /* The ReadError flag wil just be confusing now */ + clear_bit(R5_ReadError, &dev->flags); + clear_bit(R5_ReWrite, &dev->flags); + } + if (!rdev || !rdev->in_sync + || test_bit(R5_ReadError, &dev->flags)) { failed++; failed_num = i; } else @@ -980,6 +1016,14 @@ static void handle_stripe(struct stripe_head *sh) if (failed > 1 && to_read+to_write+written) { for (i=disks; i--; ) { int bitmap_end = 0; + + if (test_bit(R5_ReadError, &sh->dev[i].flags)) { + mdk_rdev_t *rdev = conf->disks[i].rdev; + if (rdev && rdev->in_sync) + /* multiple read failures in one stripe */ + md_error(conf->mddev, rdev); + } + spin_lock_irq(&conf->device_lock); /* fail all writes first */ bi = sh->dev[i].towrite; @@ -1015,7 +1059,8 @@ static void handle_stripe(struct stripe_head *sh) } /* fail any reads if this device is non-operational */ - if (!test_bit(R5_Insync, &sh->dev[i].flags)) { + if (!test_bit(R5_Insync, &sh->dev[i].flags) || + test_bit(R5_ReadError, &sh->dev[i].flags)) { bi = sh->dev[i].toread; sh->dev[i].toread = NULL; if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) @@ -1247,6 +1292,11 @@ static void handle_stripe(struct stripe_head *sh) !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) { /* parity is correct (on disc, not in buffer any more) */ set_bit(STRIPE_INSYNC, &sh->state); + } else { + conf->mddev->resync_mismatches += STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + set_bit(STRIPE_INSYNC, &sh->state); } } if (!test_bit(STRIPE_INSYNC, &sh->state)) { @@ -1274,7 +1324,26 @@ static void handle_stripe(struct stripe_head *sh) md_done_sync(conf->mddev, STRIPE_SECTORS,1); clear_bit(STRIPE_SYNCING, &sh->state); } - + + /* If the failed drive is just a ReadError, then we might need to progress + * the repair/check process + */ + if (failed == 1 && test_bit(R5_ReadError, &sh->dev[failed_num].flags) + && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags) + && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags) + ) { + dev = &sh->dev[failed_num]; + if (!test_bit(R5_ReWrite, &dev->flags)) { + set_bit(R5_Wantwrite, &dev->flags); + set_bit(R5_ReWrite, &dev->flags); + set_bit(R5_LOCKED, &dev->flags); + } else { + /* let's read it back */ + set_bit(R5_Wantread, &dev->flags); + set_bit(R5_LOCKED, &dev->flags); + } + } + spin_unlock(&sh->lock); while ((bi=return_bi)) { @@ -1305,7 +1374,7 @@ static void handle_stripe(struct stripe_head *sh) bi->bi_end_io = raid5_end_read_request; rcu_read_lock(); - rdev = conf->disks[i].rdev; + rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && rdev->faulty) rdev = NULL; if (rdev) @@ -1379,7 +1448,7 @@ static void unplug_slaves(mddev_t *mddev) rcu_read_lock(); for (i=0; iraid_disks; i++) { - mdk_rdev_t *rdev = conf->disks[i].rdev; + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { request_queue_t *r_queue = bdev_get_queue(rdev->bdev); @@ -1424,7 +1493,7 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk, rcu_read_lock(); for (i=0; iraid_disks && ret == 0; i++) { - mdk_rdev_t *rdev = conf->disks[i].rdev; + mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !rdev->faulty) { struct block_device *bdev = rdev->bdev; request_queue_t *r_queue = bdev_get_queue(bdev); @@ -1663,6 +1732,108 @@ static void raid5d (mddev_t *mddev) PRINTK("--- raid5d inactive\n"); } +struct raid5_sysfs_entry { + struct attribute attr; + ssize_t (*show)(raid5_conf_t *, char *); + ssize_t (*store)(raid5_conf_t *, const char *, ssize_t); +}; + +static ssize_t +raid5_show_stripe_cache_size(raid5_conf_t *conf, char *page) +{ + return sprintf(page, "%d\n", conf->max_nr_stripes); +} + +static ssize_t +raid5_store_stripe_cache_size(raid5_conf_t *conf, const char *page, ssize_t len) +{ + char *end; + int new; + if (len >= PAGE_SIZE) + return -EINVAL; + + new = simple_strtoul(page, &end, 10); + if (!*page || (*end && *end != '\n') ) + return -EINVAL; + if (new <= 16 || new > 32768) + return -EINVAL; + while (new < conf->max_nr_stripes) { + if (drop_one_stripe(conf)) + conf->max_nr_stripes--; + else + break; + } + while (new > conf->max_nr_stripes) { + if (grow_one_stripe(conf)) + conf->max_nr_stripes++; + else break; + } + return len; +} +static struct raid5_sysfs_entry raid5_stripecache_size = { + .attr = {.name = "stripe_cache_size", .mode = S_IRUGO | S_IWUSR }, + .show = raid5_show_stripe_cache_size, + .store = raid5_store_stripe_cache_size, +}; + +static ssize_t +raid5_show_stripe_cache_active(raid5_conf_t *conf, char *page) +{ + return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); +} + +static struct raid5_sysfs_entry raid5_stripecache_active = { + .attr = {.name = "stripe_cache_active", .mode = S_IRUGO}, + .show = raid5_show_stripe_cache_active, +}; + +static struct attribute *raid5_default_attrs[] = { + &raid5_stripecache_size.attr, + &raid5_stripecache_active.attr, + NULL, +}; + +static ssize_t +raid5_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct raid5_sysfs_entry *entry = container_of(attr, struct raid5_sysfs_entry, attr); + raid5_conf_t *conf = container_of(kobj, raid5_conf_t, kobj); + + if (!entry->show) + return -EIO; + return entry->show(conf, page); +} + +static ssize_t +raid5_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + struct raid5_sysfs_entry *entry = container_of(attr, struct raid5_sysfs_entry, attr); + raid5_conf_t *conf = container_of(kobj, raid5_conf_t, kobj); + + if (!entry->store) + return -EIO; + return entry->store(conf, page, length); +} + +static void raid5_free(struct kobject *ko) +{ + raid5_conf_t *conf = container_of(ko, raid5_conf_t, kobj); + kfree(conf); +} + + +static struct sysfs_ops raid5_sysfs_ops = { + .show = raid5_attr_show, + .store = raid5_attr_store, +}; + +static struct kobj_type raid5_ktype = { + .release = raid5_free, + .sysfs_ops = &raid5_sysfs_ops, + .default_attrs = raid5_default_attrs, +}; + static int run(mddev_t *mddev) { raid5_conf_t *conf; @@ -1804,6 +1975,10 @@ memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + } /* Ok, everything is just fine now */ + conf->kobj.parent = kobject_get(&mddev->kobj); + strcpy(conf->kobj.name, "raid5"); + conf->kobj.ktype = &raid5_ktype; + kobject_register(&conf->kobj); if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; @@ -1828,7 +2003,7 @@ abort: -static int stop (mddev_t *mddev) +static int stop(mddev_t *mddev) { raid5_conf_t *conf = (raid5_conf_t *) mddev->private; @@ -1837,7 +2012,7 @@ static int stop (mddev_t *mddev) shrink_stripes(conf); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - kfree(conf); + kobject_unregister(&conf->kobj); mddev->private = NULL; return 0; } @@ -1990,7 +2165,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) found = 1; if (rdev->saved_raid_disk != disk) conf->fullsync = 1; - p->rdev = rdev; + rcu_assign_pointer(p->rdev, rdev); break; } print_raid5_conf(conf);