block, drivers, fs: rename REQ_FLUSH to REQ_PREFLUSH
[deliverable/linux.git] / drivers / md / md.c
index 14d3b37944df031214c2c6951ed15c46da104842..1f123f5a29da2cb4859060fab44d69e32c388606 100644 (file)
@@ -307,7 +307,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
  */
 void mddev_suspend(struct mddev *mddev)
 {
-       WARN_ON_ONCE(current == mddev->thread->tsk);
+       WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
        if (mddev->suspended++)
                return;
        synchronize_rcu();
@@ -394,8 +394,9 @@ static void submit_flushes(struct work_struct *ws)
                        bi->bi_end_io = md_end_flush;
                        bi->bi_private = rdev;
                        bi->bi_bdev = rdev->bdev;
+                       bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
                        atomic_inc(&mddev->flush_pending);
-                       submit_bio(WRITE_FLUSH, bi);
+                       submit_bio(bi);
                        rcu_read_lock();
                        rdev_dec_pending(rdev, mddev);
                }
@@ -413,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws)
                /* an empty barrier - all done */
                bio_endio(bio);
        else {
-               bio->bi_rw &= ~REQ_FLUSH;
+               bio->bi_rw &= ~REQ_PREFLUSH;
                mddev->pers->make_request(mddev, bio);
        }
 
@@ -742,9 +743,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
        bio_add_page(bio, page, size, 0);
        bio->bi_private = rdev;
        bio->bi_end_io = super_written;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
 
        atomic_inc(&mddev->pending_writes);
-       submit_bio(WRITE_FLUSH_FUA, bio);
+       submit_bio(bio);
 }
 
 void md_super_wait(struct mddev *mddev)
@@ -754,13 +756,14 @@ void md_super_wait(struct mddev *mddev)
 }
 
 int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
-                struct page *page, int rw, bool metadata_op)
+                struct page *page, int op, int op_flags, bool metadata_op)
 {
        struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
        int ret;
 
        bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
                rdev->meta_bdev : rdev->bdev;
+       bio_set_op_attrs(bio, op, op_flags);
        if (metadata_op)
                bio->bi_iter.bi_sector = sector + rdev->sb_start;
        else if (rdev->mddev->reshape_position != MaxSector &&
@@ -770,7 +773,8 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
        else
                bio->bi_iter.bi_sector = sector + rdev->data_offset;
        bio_add_page(bio, page, size, 0);
-       submit_bio_wait(rw, bio);
+
+       submit_bio_wait(bio);
 
        ret = !bio->bi_error;
        bio_put(bio);
@@ -785,7 +789,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
        if (rdev->sb_loaded)
                return 0;
 
-       if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
+       if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
                goto fail;
        rdev->sb_loaded = 1;
        return 0;
@@ -1471,7 +1475,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
                        return -EINVAL;
                bb_sector = (long long)offset;
                if (!sync_page_io(rdev, bb_sector, sectors << 9,
-                                 rdev->bb_page, READ, true))
+                                 rdev->bb_page, REQ_OP_READ, 0, true))
                        return -EIO;
                bbp = (u64 *)page_address(rdev->bb_page);
                rdev->badblocks.shift = sb->bblog_shift;
@@ -2291,19 +2295,24 @@ void md_update_sb(struct mddev *mddev, int force_change)
                return;
        }
 
+repeat:
        if (mddev_is_clustered(mddev)) {
                if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
                        force_change = 1;
+               if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+                       nospares = 1;
                ret = md_cluster_ops->metadata_update_start(mddev);
                /* Has someone else has updated the sb */
                if (!does_sb_need_changing(mddev)) {
                        if (ret == 0)
                                md_cluster_ops->metadata_update_cancel(mddev);
-                       clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+                       bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+                                                        BIT(MD_CHANGE_DEVS) |
+                                                        BIT(MD_CHANGE_CLEAN));
                        return;
                }
        }
-repeat:
+
        /* First make sure individual recovery_offsets are correct */
        rdev_for_each(rdev, mddev) {
                if (rdev->raid_disk >= 0 &&
@@ -2430,15 +2439,14 @@ repeat:
        md_super_wait(mddev);
        /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
 
-       spin_lock(&mddev->lock);
+       if (mddev_is_clustered(mddev) && ret == 0)
+               md_cluster_ops->metadata_update_finish(mddev);
+
        if (mddev->in_sync != sync_req ||
-           test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
+           !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+                              BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
                /* have to write it out again */
-               spin_unlock(&mddev->lock);
                goto repeat;
-       }
-       clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-       spin_unlock(&mddev->lock);
        wake_up(&mddev->sb_wait);
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
@@ -2452,9 +2460,6 @@ repeat:
                clear_bit(BlockedBadBlocks, &rdev->flags);
                wake_up(&rdev->blocked_wait);
        }
-
-       if (mddev_is_clustered(mddev) && ret == 0)
-               md_cluster_ops->metadata_update_finish(mddev);
 }
 EXPORT_SYMBOL(md_update_sb);
 
@@ -4816,6 +4821,10 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
        if (err)
                return err;
 
+       /* cluster raid doesn't support change array_sectors */
+       if (mddev_is_clustered(mddev))
+               return -EINVAL;
+
        if (strncmp(buf, "default", 7) == 0) {
                if (mddev->pers)
                        sectors = mddev->pers->size(mddev, 0, 0);
@@ -5039,7 +5048,7 @@ static int md_alloc(dev_t dev, char *name)
        disk->fops = &md_fops;
        disk->private_data = mddev;
        disk->queue = mddev->queue;
-       blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
+       blk_queue_write_cache(mddev->queue, true, true);
        /* Allow extended partitions.  This makes the
         * 'mdp' device redundant, but we can't really
         * remove it now.
@@ -6437,6 +6446,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
        int rv;
        int fit = (num_sectors == 0);
 
+       /* cluster raid doesn't support update size */
+       if (mddev_is_clustered(mddev))
+               return -EINVAL;
+
        if (mddev->pers->resize == NULL)
                return -EINVAL;
        /* The "num_sectors" is the number of sectors of each device that
@@ -7785,7 +7798,7 @@ void md_do_sync(struct md_thread *thread)
        struct md_rdev *rdev;
        char *desc, *action = NULL;
        struct blk_plug plug;
-       bool cluster_resync_finished = false;
+       int ret;
 
        /* just incase thread restarts... */
        if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7795,6 +7808,19 @@ void md_do_sync(struct md_thread *thread)
                return;
        }
 
+       if (mddev_is_clustered(mddev)) {
+               ret = md_cluster_ops->resync_start(mddev);
+               if (ret)
+                       goto skip;
+
+               if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
+                       test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
+                       test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
+                    && ((unsigned long long)mddev->curr_resync_completed
+                        < (unsigned long long)mddev->resync_max_sectors))
+                       goto skip;
+       }
+
        if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
                        desc = "data-check";
@@ -8089,11 +8115,6 @@ void md_do_sync(struct md_thread *thread)
                mddev->curr_resync_completed = mddev->curr_resync;
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
-       /* tell personality and other nodes that we are finished */
-       if (mddev_is_clustered(mddev)) {
-               md_cluster_ops->resync_finish(mddev);
-               cluster_resync_finished = true;
-       }
        mddev->pers->sync_request(mddev, max_sectors, &skipped);
 
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
@@ -8130,12 +8151,18 @@ void md_do_sync(struct md_thread *thread)
                }
        }
  skip:
-       set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
        if (mddev_is_clustered(mddev) &&
-           test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-           !cluster_resync_finished)
+           ret == 0) {
+               /* set CHANGE_PENDING here since maybe another
+                * update is needed, so other nodes are informed */
+               set_mask_bits(&mddev->flags, 0,
+                             BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
+               md_wakeup_thread(mddev->thread);
+               wait_event(mddev->sb_wait,
+                          !test_bit(MD_CHANGE_PENDING, &mddev->flags));
                md_cluster_ops->resync_finish(mddev);
+       } else
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
        spin_lock(&mddev->lock);
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8226,18 +8253,9 @@ static void md_start_sync(struct work_struct *ws)
        struct mddev *mddev = container_of(ws, struct mddev, del_work);
        int ret = 0;
 
-       if (mddev_is_clustered(mddev)) {
-               ret = md_cluster_ops->resync_start(mddev);
-               if (ret) {
-                       mddev->sync_thread = NULL;
-                       goto out;
-               }
-       }
-
        mddev->sync_thread = md_register_thread(md_do_sync,
                                                mddev,
                                                "resync");
-out:
        if (!mddev->sync_thread) {
                if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
                        printk(KERN_ERR "%s: could not start resync"
@@ -8536,6 +8554,7 @@ EXPORT_SYMBOL(md_finish_reshape);
 int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                       int is_new)
 {
+       struct mddev *mddev = rdev->mddev;
        int rv;
        if (is_new)
                s += rdev->new_data_offset;
@@ -8545,8 +8564,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
        if (rv == 0) {
                /* Make sure they get written out promptly */
                sysfs_notify_dirent_safe(rdev->sysfs_state);
-               set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
-               set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
+               set_mask_bits(&mddev->flags, 0,
+                             BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
                md_wakeup_thread(rdev->mddev->thread);
                return 1;
        } else
@@ -8680,6 +8699,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                                ret = remove_and_add_spares(mddev, rdev2);
                                pr_info("Activated spare: %s\n",
                                                bdevname(rdev2->bdev,b));
+                               /* wakeup mddev->thread here, so array could
+                                * perform resync with the new activated disk */
+                               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                               md_wakeup_thread(mddev->thread);
+
                        }
                        /* device faulty
                         * We just want to do the minimum to mark the disk
This page took 0.035562 seconds and 5 git commands to generate.