metadata_update sends message to other nodes
[deliverable/linux.git] / drivers / md / md.c
index c8d2bac4e28be4a65edb63d613b0336b7ff35783..0052e433d8a67907198602dd0292548654efb1cc 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/slab.h>
 #include "md.h"
 #include "bitmap.h"
+#include "md-cluster.h"
 
 #ifndef MODULE
 static void autostart_arrays(int part);
@@ -66,6 +67,10 @@ static void autostart_arrays(int part);
 static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
+struct md_cluster_operations *md_cluster_ops;
+struct module *md_cluster_mod;
+EXPORT_SYMBOL(md_cluster_mod);
+
 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 static struct workqueue_struct *md_wq;
 static struct workqueue_struct *md_misc_wq;
@@ -2467,10 +2472,14 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                        err = -EBUSY;
                else {
                        struct mddev *mddev = rdev->mddev;
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                        kick_rdev_from_array(rdev);
                        if (mddev->pers)
                                md_update_sb(mddev, 1);
                        md_new_event(mddev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_finish(mddev);
                        err = 0;
                }
        } else if (cmd_match(buf, "writemostly")) {
@@ -4003,8 +4012,12 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
        if (err)
                return err;
        if (mddev->pers) {
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_start(mddev);
                err = update_size(mddev, sectors);
                md_update_sb(mddev, 1);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_finish(mddev);
        } else {
                if (mddev->dev_sectors == 0 ||
                    mddev->dev_sectors > sectors)
@@ -5071,10 +5084,16 @@ int md_run(struct mddev *mddev)
        }
        if (err == 0 && pers->sync_request &&
            (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
-               err = bitmap_create(mddev);
-               if (err)
+               struct bitmap *bitmap;
+
+               bitmap = bitmap_create(mddev, -1);
+               if (IS_ERR(bitmap)) {
+                       err = PTR_ERR(bitmap);
                        printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
                               mdname(mddev), err);
+               } else
+                       mddev->bitmap = bitmap;
+
        }
        if (err) {
                mddev_detach(mddev);
@@ -5225,6 +5244,8 @@ static void md_clean(struct mddev *mddev)
 
 static void __md_stop_writes(struct mddev *mddev)
 {
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        flush_workqueue(md_misc_wq);
        if (mddev->sync_thread) {
@@ -5243,6 +5264,8 @@ static void __md_stop_writes(struct mddev *mddev)
                mddev->in_sync = 1;
                md_update_sb(mddev, 1);
        }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
 }
 
 void md_stop_writes(struct mddev *mddev)
@@ -5629,6 +5652,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
                info.state = (1<<MD_SB_CLEAN);
        if (mddev->bitmap && mddev->bitmap_info.offset)
                info.state |= (1<<MD_SB_BITMAP_PRESENT);
+       if (mddev_is_clustered(mddev))
+               info.state |= (1<<MD_SB_CLUSTERED);
+
        info.active_disks  = insync;
        info.working_disks = working;
        info.failed_disks  = failed;
@@ -5888,6 +5914,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
        if (!rdev)
                return -ENXIO;
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
+
        clear_bit(Blocked, &rdev->flags);
        remove_and_add_spares(mddev, rdev);
 
@@ -5898,8 +5927,13 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
        md_update_sb(mddev, 1);
        md_new_event(mddev);
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
+
        return 0;
 busy:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
        printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
                bdevname(rdev->bdev,b), mdname(mddev));
        return -EBUSY;
@@ -5949,12 +5983,15 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
                err = -EINVAL;
                goto abort_export;
        }
+
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        clear_bit(In_sync, &rdev->flags);
        rdev->desc_nr = -1;
        rdev->saved_raid_disk = -1;
        err = bind_rdev_to_array(rdev, mddev);
        if (err)
-               goto abort_export;
+               goto abort_clustered;
 
        /*
         * The rest should better be atomic, we can have disk failures
@@ -5965,6 +6002,8 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
 
        md_update_sb(mddev, 1);
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
        /*
         * Kick recovery, maybe this spare has to be added to the
         * array immediately.
@@ -5974,6 +6013,9 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
        md_new_event(mddev);
        return 0;
 
+abort_clustered:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
 abort_export:
        export_rdev(rdev);
        return err;
@@ -6031,9 +6073,13 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
        if (mddev->pers) {
                mddev->pers->quiesce(mddev, 1);
                if (fd >= 0) {
-                       err = bitmap_create(mddev);
-                       if (!err)
+                       struct bitmap *bitmap;
+
+                       bitmap = bitmap_create(mddev, -1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
                                err = bitmap_load(mddev);
+                       }
                }
                if (fd < 0 || err) {
                        bitmap_destroy(mddev);
@@ -6286,6 +6332,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                        return rv;
                }
        }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
                rv = update_size(mddev, (sector_t)info->size * 2);
 
@@ -6293,33 +6341,48 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                rv = update_raid_disks(mddev, info->raid_disks);
 
        if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
-               if (mddev->pers->quiesce == NULL || mddev->thread == NULL)
-                       return -EINVAL;
-               if (mddev->recovery || mddev->sync_thread)
-                       return -EBUSY;
+               if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
+                       rv = -EINVAL;
+                       goto err;
+               }
+               if (mddev->recovery || mddev->sync_thread) {
+                       rv = -EBUSY;
+                       goto err;
+               }
                if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       struct bitmap *bitmap;
                        /* add the bitmap */
-                       if (mddev->bitmap)
-                               return -EEXIST;
-                       if (mddev->bitmap_info.default_offset == 0)
-                               return -EINVAL;
+                       if (mddev->bitmap) {
+                               rv = -EEXIST;
+                               goto err;
+                       }
+                       if (mddev->bitmap_info.default_offset == 0) {
+                               rv = -EINVAL;
+                               goto err;
+                       }
                        mddev->bitmap_info.offset =
                                mddev->bitmap_info.default_offset;
                        mddev->bitmap_info.space =
                                mddev->bitmap_info.default_space;
                        mddev->pers->quiesce(mddev, 1);
-                       rv = bitmap_create(mddev);
-                       if (!rv)
+                       bitmap = bitmap_create(mddev, -1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
                                rv = bitmap_load(mddev);
+                       }
                        if (rv)
                                bitmap_destroy(mddev);
                        mddev->pers->quiesce(mddev, 0);
                } else {
                        /* remove the bitmap */
-                       if (!mddev->bitmap)
-                               return -ENOENT;
-                       if (mddev->bitmap->storage.file)
-                               return -EINVAL;
+                       if (!mddev->bitmap) {
+                               rv = -ENOENT;
+                               goto err;
+                       }
+                       if (mddev->bitmap->storage.file) {
+                               rv = -EINVAL;
+                               goto err;
+                       }
                        mddev->pers->quiesce(mddev, 1);
                        bitmap_destroy(mddev);
                        mddev->pers->quiesce(mddev, 0);
@@ -6327,6 +6390,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                }
        }
        md_update_sb(mddev, 1);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
+       return rv;
+err:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
        return rv;
 }
 
@@ -7231,6 +7300,55 @@ int unregister_md_personality(struct md_personality *p)
 }
 EXPORT_SYMBOL(unregister_md_personality);
 
+int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
+{
+       if (md_cluster_ops != NULL)
+               return -EALREADY;
+       spin_lock(&pers_lock);
+       md_cluster_ops = ops;
+       md_cluster_mod = module;
+       spin_unlock(&pers_lock);
+       return 0;
+}
+EXPORT_SYMBOL(register_md_cluster_operations);
+
+int unregister_md_cluster_operations(void)
+{
+       spin_lock(&pers_lock);
+       md_cluster_ops = NULL;
+       spin_unlock(&pers_lock);
+       return 0;
+}
+EXPORT_SYMBOL(unregister_md_cluster_operations);
+
+int md_setup_cluster(struct mddev *mddev, int nodes)
+{
+       int err;
+
+       err = request_module("md-cluster");
+       if (err) {
+               pr_err("md-cluster module not found.\n");
+               return err;
+       }
+
+       spin_lock(&pers_lock);
+       if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
+               spin_unlock(&pers_lock);
+               return -ENOENT;
+       }
+       spin_unlock(&pers_lock);
+
+       return md_cluster_ops->join(mddev, nodes);
+}
+
+void md_cluster_stop(struct mddev *mddev)
+{
+       if (!md_cluster_ops)
+               return;
+       md_cluster_ops->leave(mddev);
+       module_put(md_cluster_mod);
+}
+
 static int is_mddev_idle(struct mddev *mddev, int init)
 {
        struct md_rdev *rdev;
@@ -7368,7 +7486,11 @@ int md_allow_write(struct mddev *mddev)
                    mddev->safemode == 0)
                        mddev->safemode = 1;
                spin_unlock(&mddev->lock);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_start(mddev);
                md_update_sb(mddev, 0);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_finish(mddev);
                sysfs_notify_dirent_safe(mddev->sysfs_state);
        } else
                spin_unlock(&mddev->lock);
@@ -7569,6 +7691,9 @@ void md_do_sync(struct md_thread *thread)
        md_new_event(mddev);
        update_time = jiffies;
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_info_update(mddev, j, max_sectors);
+
        blk_start_plug(&plug);
        while (j < max_sectors) {
                sector_t sectors;
@@ -7629,6 +7754,8 @@ void md_do_sync(struct md_thread *thread)
                j += sectors;
                if (j > 2)
                        mddev->curr_resync = j;
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->resync_info_update(mddev, j, max_sectors);
                mddev->curr_mark_cnt = io_sectors;
                if (last_check == 0)
                        /* this is the earliest that rebuild will be
@@ -7689,6 +7816,9 @@ void md_do_sync(struct md_thread *thread)
        /* tell personality that we are finished */
        mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_info_update(mddev, 0, 0);
+
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
            mddev->curr_resync > 2) {
                if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
@@ -7918,8 +8048,13 @@ void md_check_recovery(struct mddev *mddev)
                                sysfs_notify_dirent_safe(mddev->sysfs_state);
                }
 
-               if (mddev->flags & MD_UPDATE_SB_FLAGS)
+               if (mddev->flags & MD_UPDATE_SB_FLAGS) {
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                        md_update_sb(mddev, 0);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_finish(mddev);
+               }
 
                if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
                    !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
@@ -8017,6 +8152,8 @@ void md_reap_sync_thread(struct mddev *mddev)
                        set_bit(MD_CHANGE_DEVS, &mddev->flags);
                }
        }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            mddev->pers->finish_reshape)
                mddev->pers->finish_reshape(mddev);
@@ -8029,6 +8166,8 @@ void md_reap_sync_thread(struct mddev *mddev)
                        rdev->saved_raid_disk = -1;
 
        md_update_sb(mddev, 1);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
        clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
        clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
This page took 0.032778 seconds and 5 git commands to generate.