md/raid5: activate raid6 rmw feature
[deliverable/linux.git] / drivers / md / md.c
index d406a79f914007883eb347875334e005fe1d6d3a..d4f31e195e26ebcc4233c9b333624d8b73191826 100644 (file)
@@ -255,6 +255,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
        const int rw = bio_data_dir(bio);
        struct mddev *mddev = q->queuedata;
        unsigned int sectors;
+       int cpu;
 
        if (mddev == NULL || mddev->pers == NULL
            || !mddev->ready) {
@@ -290,7 +291,10 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
        sectors = bio_sectors(bio);
        mddev->pers->make_request(mddev, bio);
 
-       generic_start_io_acct(rw, sectors, &mddev->gendisk->part0);
+       cpu = part_stat_lock();
+       part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+       part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+       part_stat_unlock();
 
        if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
                wake_up(&mddev->sb_wait);
@@ -2375,6 +2379,36 @@ repeat:
 }
 EXPORT_SYMBOL(md_update_sb);
 
+static int add_bound_rdev(struct md_rdev *rdev)
+{
+       struct mddev *mddev = rdev->mddev;
+       int err = 0;
+
+       if (!mddev->pers->hot_remove_disk) {
+               /* If there is hot_add_disk but no hot_remove_disk
+                * then added disks for geometry changes,
+                * and should be added immediately.
+                */
+               super_types[mddev->major_version].
+                       validate_super(mddev, rdev);
+               err = mddev->pers->hot_add_disk(mddev, rdev);
+               if (err) {
+                       unbind_rdev_from_array(rdev);
+                       export_rdev(rdev);
+                       return err;
+               }
+       }
+       sysfs_notify_dirent_safe(rdev->sysfs_state);
+
+       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       if (mddev->degraded)
+               set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_new_event(mddev);
+       md_wakeup_thread(mddev->thread);
+       return 0;
+}
+
 /* words written to sysfs files may, or may not, be \n terminated.
  * We want to accept with case. For this we use cmd_match.
  */
@@ -2477,8 +2511,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                else {
                        struct mddev *mddev = rdev->mddev;
                        if (mddev_is_clustered(mddev))
-                               md_cluster_ops->metadata_update_start(mddev);
+                               md_cluster_ops->remove_disk(mddev, rdev);
                        md_kick_rdev_from_array(rdev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                        if (mddev->pers)
                                md_update_sb(mddev, 1);
                        md_new_event(mddev);
@@ -2562,13 +2598,28 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                        clear_bit(Replacement, &rdev->flags);
                        err = 0;
                }
+       } else if (cmd_match(buf, "re-add")) {
+               if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
+                       /* clear_bit is performed _after_ all the devices
+                        * have their local Faulty bit cleared. If any writes
+                        * happen in the meantime in the local node, they
+                        * will land in the local bitmap, which will be synced
+                        * by this node eventually
+                        */
+                       if (!mddev_is_clustered(rdev->mddev) ||
+                           (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
+                               clear_bit(Faulty, &rdev->flags);
+                               err = add_bound_rdev(rdev);
+                       }
+               } else
+                       err = -EBUSY;
        }
        if (!err)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
        return err ? err : len;
 }
 static struct rdev_sysfs_entry rdev_state =
-__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
+__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
 
 static ssize_t
 errors_show(struct md_rdev *rdev, char *page)
@@ -3660,7 +3711,8 @@ resync_start_store(struct mddev *mddev, const char *buf, size_t len)
        return err ?: len;
 }
 static struct md_sysfs_entry md_resync_start =
-__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
+__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
+               resync_start_show, resync_start_store);
 
 /*
  * The array state can be:
@@ -3873,7 +3925,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
        return err ?: len;
 }
 static struct md_sysfs_entry md_array_state =
-__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
+__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
 
 static ssize_t
 max_corrected_read_errors_show(struct mddev *mddev, char *page) {
@@ -4127,7 +4179,7 @@ out_unlock:
 }
 
 static struct md_sysfs_entry md_metadata =
-__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
+__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
 
 static ssize_t
 action_show(struct mddev *mddev, char *page)
@@ -4215,7 +4267,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
 }
 
 static struct md_sysfs_entry md_scan_mode =
-__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
+__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
 
 static ssize_t
 last_sync_action_show(struct mddev *mddev, char *page)
@@ -4361,7 +4413,8 @@ sync_completed_show(struct mddev *mddev, char *page)
        return sprintf(page, "%llu / %llu\n", resync, max_sectors);
 }
 
-static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
+static struct md_sysfs_entry md_sync_completed =
+       __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
 
 static ssize_t
 min_sync_show(struct mddev *mddev, char *page)
@@ -4374,7 +4427,6 @@ min_sync_store(struct mddev *mddev, const char *buf, size_t len)
 {
        unsigned long long min;
        int err;
-       int chunk;
 
        if (kstrtoull(buf, 10, &min))
                return -EINVAL;
@@ -4388,16 +4440,8 @@ min_sync_store(struct mddev *mddev, const char *buf, size_t len)
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                goto out_unlock;
 
-       /* Must be a multiple of chunk_size */
-       chunk = mddev->chunk_sectors;
-       if (chunk) {
-               sector_t temp = min;
-
-               err = -EINVAL;
-               if (sector_div(temp, chunk))
-                       goto out_unlock;
-       }
-       mddev->resync_min = min;
+       /* Round down to multiple of 4K for safety */
+       mddev->resync_min = round_down(min, 8);
        err = 0;
 
 out_unlock:
@@ -5110,7 +5154,8 @@ int md_run(struct mddev *mddev)
        }
        if (err) {
                mddev_detach(mddev);
-               pers->free(mddev, mddev->private);
+               if (mddev->private)
+                       pers->free(mddev, mddev->private);
                module_put(pers->owner);
                bitmap_destroy(mddev);
                return err;
@@ -5873,29 +5918,10 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
 
                rdev->raid_disk = -1;
                err = bind_rdev_to_array(rdev, mddev);
-               if (!err && !mddev->pers->hot_remove_disk) {
-                       /* If there is hot_add_disk but no hot_remove_disk
-                        * then added disks for geometry changes,
-                        * and should be added immediately.
-                        */
-                       super_types[mddev->major_version].
-                               validate_super(mddev, rdev);
-                       err = mddev->pers->hot_add_disk(mddev, rdev);
-                       if (err)
-                               unbind_rdev_from_array(rdev);
-               }
                if (err)
                        export_rdev(rdev);
                else
-                       sysfs_notify_dirent_safe(rdev->sysfs_state);
-
-               set_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (mddev->degraded)
-                       set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-               if (!err)
-                       md_new_event(mddev);
-               md_wakeup_thread(mddev->thread);
+                       err = add_bound_rdev(rdev);
                if (mddev_is_clustered(mddev) &&
                                (info->state & (1 << MD_DISK_CLUSTER_ADD)))
                        md_cluster_ops->add_new_disk_finish(mddev);
@@ -5968,6 +5994,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
        if (rdev->raid_disk >= 0)
                goto busy;
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->remove_disk(mddev, rdev);
+
        md_kick_rdev_from_array(rdev);
        md_update_sb(mddev, 1);
        md_new_event(mddev);
@@ -7791,8 +7820,7 @@ void md_do_sync(struct md_thread *thread)
                if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                        break;
 
-               sectors = mddev->pers->sync_request(mddev, j, &skipped,
-                                                 currspeed < speed_min(mddev));
+               sectors = mddev->pers->sync_request(mddev, j, &skipped);
                if (sectors == 0) {
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                        break;
@@ -7852,11 +7880,18 @@ void md_do_sync(struct md_thread *thread)
                        /((jiffies-mddev->resync_mark)/HZ +1) +1;
 
                if (currspeed > speed_min(mddev)) {
-                       if ((currspeed > speed_max(mddev)) ||
-                                       !is_mddev_idle(mddev, 0)) {
+                       if (currspeed > speed_max(mddev)) {
                                msleep(500);
                                goto repeat;
                        }
+                       if (!is_mddev_idle(mddev, 0)) {
+                               /*
+                                * Give other IO more of a chance.
+                                * The faster the devices, the less we wait.
+                                */
+                               wait_event(mddev->recovery_wait,
+                                          !atomic_read(&mddev->recovery_active));
+                       }
                }
        }
        printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
@@ -7869,7 +7904,7 @@ void md_do_sync(struct md_thread *thread)
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
        /* tell personality that we are finished */
-       mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
+       mddev->pers->sync_request(mddev, max_sectors, &skipped);
 
        if (mddev_is_clustered(mddev))
                md_cluster_ops->resync_finish(mddev);
This page took 0.036166 seconds and 5 git commands to generate.