drbd: fix potential deadlock on detach
[deliverable/linux.git] / drivers / block / drbd / drbd_nl.c
index 6b35d41706e43929099b9b5186750b7899b99d1c..0cba7d3d2b5d28b0d7afb3152701e3f089f5aa35 100644 (file)
@@ -172,6 +172,10 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
                put_net_conf(mdev);
        }
 
+       /* The helper may take some time.
+        * write out any unsynced meta data changes now */
+       drbd_md_sync(mdev);
+
        dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 
        drbd_bcast_ev_helper(mdev, cmd);
@@ -205,7 +209,8 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
                put_ldev(mdev);
        } else {
                dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-               return mdev->state.pdsk;
+               nps = mdev->state.pdsk;
+               goto out;
        }
 
        r = drbd_khelper(mdev, "fence-peer");
@@ -252,6 +257,14 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
 
        dev_info(DEV, "fence-peer helper returned %d (%s)\n",
                        (r>>8) & 0xff, ex_to_string);
+
+out:
+       if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
+               /* The handler was not successful... unfreeze here, the
+                  state engine can not unfreeze... */
+               _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
+       }
+
        return nps;
 }
 
@@ -413,6 +426,39 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
        return r;
 }
 
+static struct drbd_conf *ensure_mdev(int minor, int create)
+{
+       struct drbd_conf *mdev;
+
+       if (minor >= minor_count)
+               return NULL;
+
+       mdev = minor_to_mdev(minor);
+
+       if (!mdev && create) {
+               struct gendisk *disk = NULL;
+               mdev = drbd_new_device(minor);
+
+               spin_lock_irq(&drbd_pp_lock);
+               if (minor_table[minor] == NULL) {
+                       minor_table[minor] = mdev;
+                       disk = mdev->vdisk;
+                       mdev = NULL;
+               } /* else: we lost the race */
+               spin_unlock_irq(&drbd_pp_lock);
+
+               if (disk) /* we won the race above */
+                       /* in case we ever add a drbd_delete_device(),
+                        * don't forget the del_gendisk! */
+                       add_disk(disk);
+               else /* we lost the race above */
+                       drbd_free_mdev(mdev);
+
+               mdev = minor_to_mdev(minor);
+       }
+
+       return mdev;
+}
 
 static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
                           struct drbd_nl_cfg_reply *reply)
@@ -513,7 +559,7 @@ char *ppsize(char *buf, unsigned long long size)
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
        set_bit(SUSPEND_IO, &mdev->flags);
-       if (mdev->state.susp)
+       if (is_susp(mdev->state))
                return;
        wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 }
@@ -734,9 +780,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
        blk_queue_segment_boundary(q, PAGE_SIZE-1);
        blk_stack_limits(&q->limits, &b->limits, 0);
 
-       if (b->merge_bvec_fn)
-               dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
-                    b->merge_bvec_fn);
        dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
 
        if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
@@ -750,14 +793,16 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
 /* serialize deconfig (worker exiting, doing cleanup)
  * and reconfig (drbdsetup disk, drbdsetup net)
  *
- * wait for a potentially exiting worker, then restart it,
- * or start a new one.
+ * Wait for a potentially exiting worker, then restart it,
+ * or start a new one.  Flush any pending work, there may still be an
+ * after_state_change queued.
  */
 static void drbd_reconfig_start(struct drbd_conf *mdev)
 {
        wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
        wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
        drbd_thread_start(&mdev->worker);
+       drbd_flush_workqueue(mdev);
 }
 
 /* if still unconfigured, stops worker again.
@@ -777,6 +822,29 @@ static void drbd_reconfig_done(struct drbd_conf *mdev)
        wake_up(&mdev->state_wait);
 }
 
+/* Make sure IO is suspended before calling this function(). */
+static void drbd_suspend_al(struct drbd_conf *mdev)
+{
+       int s = 0;
+
+       if (lc_try_lock(mdev->act_log)) {
+               drbd_al_shrink(mdev);
+               lc_unlock(mdev->act_log);
+       } else {
+               dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
+               return;
+       }
+
+       spin_lock_irq(&mdev->req_lock);
+       if (mdev->state.conn < C_CONNECTED)
+               s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
+
+       spin_unlock_irq(&mdev->req_lock);
+
+       if (s)
+               dev_info(DEV, "Suspended AL updates\n");
+}
+
 /* does always return 0;
  * interesting return code is in reply->ret_code */
 static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@@ -790,6 +858,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
        struct inode *inode, *inode2;
        struct lru_cache *resync_lru = NULL;
        union drbd_state ns, os;
+       unsigned int max_seg_s;
        int rv;
        int cp_discovered = 0;
        int logical_block_size;
@@ -801,6 +870,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
                retcode = ERR_DISK_CONFIGURED;
                goto fail;
        }
+       /* It may just now have detached because of IO error.  Make sure
+        * drbd_ldev_destroy is done already, we may end up here very fast,
+        * e.g. if someone calls attach from the on-io-error handler,
+        * to realize a "hot spare" feature (not that I'd recommend that) */
+       wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 
        /* allocation not in the IO path, cqueue thread context */
        nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
@@ -954,7 +1028,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
        drbd_suspend_io(mdev);
        /* also wait for the last barrier ack. */
-       wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || mdev->state.susp);
+       wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
        /* and for any other previously queued work */
        drbd_flush_workqueue(mdev);
 
@@ -1052,8 +1126,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
                clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
        if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
-           !(mdev->state.role == R_PRIMARY && mdev->state.susp &&
-             mdev->sync_conf.on_no_data == OND_SUSPEND_IO)) {
+           !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
                set_bit(CRASHED_PRIMARY, &mdev->flags);
                cp_discovered = 1;
        }
@@ -1063,7 +1136,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
        mdev->read_cnt = 0;
        mdev->writ_cnt = 0;
 
-       drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
+       max_seg_s = DRBD_MAX_SEGMENT_SIZE;
+       if (mdev->state.conn == C_CONNECTED) {
+               /* We are Primary, Connected, and now attach a new local
+                * backing store. We must not increase the user visible maximum
+                * bio size on this device to something the peer may not be
+                * able to handle. */
+               if (mdev->agreed_pro_version < 94)
+                       max_seg_s = queue_max_segment_size(mdev->rq_queue);
+               else if (mdev->agreed_pro_version == 94)
+                       max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
+               /* else: drbd 8.3.9 and later, stay with default */
+       }
+
+       drbd_setup_queue_param(mdev, max_seg_s);
 
        /* If I am currently not R_PRIMARY,
         * but meta data primary indicator is set,
@@ -1111,6 +1197,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
                drbd_al_to_on_disk_bm(mdev);
        }
 
+       if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
+               drbd_suspend_al(mdev); /* IO is still suspended here... */
+
        spin_lock_irq(&mdev->req_lock);
        os = mdev->state;
        ns.i = os.i;
@@ -1178,7 +1267,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
  force_diskless_dec:
        put_ldev(mdev);
  force_diskless:
-       drbd_force_state(mdev, NS(disk, D_DISKLESS));
+       drbd_force_state(mdev, NS(disk, D_FAILED));
        drbd_md_sync(mdev);
  release_bdev2_fail:
        if (nbc)
@@ -1201,10 +1290,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
        return 0;
 }
 
+/* Detaching the disk is a process in multiple stages.  First we need to lock
+ * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
+ * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
+ * internal references as well.
+ * Only then we have finally detached. */
 static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
                          struct drbd_nl_cfg_reply *reply)
 {
+       drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
        reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+       if (mdev->state.disk == D_DISKLESS)
+               wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+       drbd_resume_io(mdev);
        return 0;
 }
 
@@ -1683,6 +1781,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
        }
 #undef AL_MAX
 
+       /* to avoid spurious errors when configuring minors before configuring
+        * the minors they depend on: if necessary, first create the minor we
+        * depend on */
+       if (sc.after >= 0)
+               ensure_mdev(sc.after, 1);
+
        /* most sanity checks done, try to assign the new sync-after
         * dependency.  need to hold the global lock in there,
         * to avoid a race in the dependency loop check. */
@@ -1790,12 +1894,38 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
        return 0;
 }
 
+static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
+{
+       int rv;
+
+       rv = drbd_bmio_set_n_write(mdev);
+       drbd_suspend_al(mdev);
+       return rv;
+}
+
 static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
                                   struct drbd_nl_cfg_reply *reply)
 {
+       int retcode;
 
-       reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+       retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
+
+       if (retcode < SS_SUCCESS) {
+               if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
+                       /* The peer will get a resync upon connect anyways. Just make that
+                          into a full resync. */
+                       retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
+                       if (retcode >= SS_SUCCESS) {
+                               /* open coded drbd_bitmap_io() */
+                               if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
+                                                  "set_n_write from invalidate_peer"))
+                                       retcode = ERR_IO_MD_DISK;
+                       }
+               } else
+                       retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+       }
 
+       reply->ret_code = retcode;
        return 0;
 }
 
@@ -1837,10 +1967,9 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
        if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
                drbd_uuid_new_current(mdev);
                clear_bit(NEW_CUR_UUID, &mdev->flags);
-               drbd_md_sync(mdev);
        }
        drbd_suspend_io(mdev);
-       reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
+       reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
        if (reply->ret_code == SS_SUCCESS) {
                if (mdev->state.conn < C_CONNECTED)
                        tl_clear(mdev);
@@ -2024,40 +2153,6 @@ out:
        return 0;
 }
 
-static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
-{
-       struct drbd_conf *mdev;
-
-       if (nlp->drbd_minor >= minor_count)
-               return NULL;
-
-       mdev = minor_to_mdev(nlp->drbd_minor);
-
-       if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
-               struct gendisk *disk = NULL;
-               mdev = drbd_new_device(nlp->drbd_minor);
-
-               spin_lock_irq(&drbd_pp_lock);
-               if (minor_table[nlp->drbd_minor] == NULL) {
-                       minor_table[nlp->drbd_minor] = mdev;
-                       disk = mdev->vdisk;
-                       mdev = NULL;
-               } /* else: we lost the race */
-               spin_unlock_irq(&drbd_pp_lock);
-
-               if (disk) /* we won the race above */
-                       /* in case we ever add a drbd_delete_device(),
-                        * don't forget the del_gendisk! */
-                       add_disk(disk);
-               else /* we lost the race above */
-                       drbd_free_mdev(mdev);
-
-               mdev = minor_to_mdev(nlp->drbd_minor);
-       }
-
-       return mdev;
-}
-
 struct cn_handler_struct {
        int (*function)(struct drbd_conf *,
                         struct drbd_nl_cfg_req *,
@@ -2118,7 +2213,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
                goto fail;
        }
 
-       mdev = ensure_mdev(nlp);
+       mdev = ensure_mdev(nlp->drbd_minor,
+                       (nlp->flags & DRBD_NL_CREATE_DEVICE));
        if (!mdev) {
                retcode = ERR_MINOR_INVALID;
                goto fail;
This page took 0.03722 seconds and 5 git commands to generate.