drbd: always write bitmap on detach

author Lars Ellenberg <lars.ellenberg@linbit.com>

Sat, 22 Sep 2012 10:26:57 +0000 (12:26 +0200)

committer Jens Axboe <axboe@kernel.dk>

Tue, 30 Oct 2012 07:39:18 +0000 (08:39 +0100)
author Lars Ellenberg <lars.ellenberg@linbit.com>
Sat, 22 Sep 2012 10:26:57 +0000 (12:26 +0200)
committer Jens Axboe <axboe@kernel.dk>
Tue, 30 Oct 2012 07:39:18 +0000 (08:39 +0100)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h

index 125fe1481ca256ffe7efbfed022a64b174730223..277c69c9465b946b9a987e195f4823223710c558 100644 (file)
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -831,7 +831,8 @@ enum drbd_flag {
                                    once no more io in flight, start bitmap io */
         BITMAP_IO_QUEUED,       /* Started bitmap IO */
         GO_DISKLESS,            /* Disk is being detached, on io-error or admin request. */
-       WAS_IO_ERROR,           /* Local disk failed returned IO error */
+       WAS_IO_ERROR,           /* Local disk failed, returned IO error */
+       WAS_READ_ERROR,         /* Local disk READ failed (set additionally to the above) */
         FORCE_DETACH,           /* Force-detach from local disk, aborting any pending local IO */
         RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
         NET_CONGESTED,          /* The data socket is congested */
@@ -1879,30 +1880,53 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
  }
  
  enum drbd_force_detach_flags {
-       DRBD_IO_ERROR,
+       DRBD_READ_ERROR,
+       DRBD_WRITE_ERROR,
         DRBD_META_IO_ERROR,
         DRBD_FORCE_DETACH,
  };
  
  #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
  static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
-               enum drbd_force_detach_flags forcedetach,
+               enum drbd_force_detach_flags df,
                 const char *where)
  {
         switch (mdev->ldev->dc.on_io_error) {
         case EP_PASS_ON:
-               if (forcedetach == DRBD_IO_ERROR) {
+               if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
                         if (__ratelimit(&drbd_ratelimit_state))
                                 dev_err(DEV, "Local IO failed in %s.\n", where);
                         if (mdev->state.disk > D_INCONSISTENT)
                                 _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
                         break;
                 }
-               /* NOTE fall through to detach case if forcedetach set */
+               /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
         case EP_DETACH:
         case EP_CALL_HELPER:
+               /* Remember whether we saw a READ or WRITE error.
+                *
+                * Recovery of the affected area for WRITE failure is covered
+                * by the activity log.
+                * READ errors may fall outside that area though. Certain READ
+                * errors can be "healed" by writing good data to the affected
+                * blocks, which triggers block re-allocation in lower layers.
+                *
+                * If we can not write the bitmap after a READ error,
+                * we may need to trigger a full sync (see w_go_diskless()).
+                *
+                * Force-detach is not really an IO error, but rather a
+                * desperate measure to try to deal with a completely
+                * unresponsive lower level IO stack.
+                * Still it should be treated as a WRITE error.
+                *
+                * Meta IO error is always WRITE error:
+                * we read meta data only once during attach,
+                * which will fail in case of errors.
+                */
                 drbd_set_flag(mdev, WAS_IO_ERROR);
-               if (forcedetach == DRBD_FORCE_DETACH)
+               if (df == DRBD_READ_ERROR)
+                       drbd_set_flag(mdev, WAS_READ_ERROR);
+               if (df == DRBD_FORCE_DETACH)
                         drbd_set_flag(mdev, FORCE_DETACH);
                 if (mdev->state.disk > D_FAILED) {
                         _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c

index d8ba5c42670f1268cac30963816237ccae069bd2..9b833e0fb4409d4dd03ad25bbb7495aae7a928d0 100644 (file)
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
         /* first half of local IO error, failure to attach,
          * or administrative detach */
         if (os.disk != D_FAILED && ns.disk == D_FAILED) {
-               enum drbd_io_error_p eh = EP_PASS_ON;
-               int was_io_error = 0;
                 /* corresponding get_ldev was in __drbd_set_state, to serialize
                  * our cleanup here with the transition to D_DISKLESS.
-                * But is is still not save to dreference ldev here, since
-                * we might come from an failed Attach before ldev was set. */
+                * But it is still not safe to dreference ldev here, we may end
+                * up here from a failed attach, before ldev was even set.  */
                 if (mdev->ldev) {
-                       eh = mdev->ldev->dc.on_io_error;
-                       was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);
-
-                       if (was_io_error && eh == EP_CALL_HELPER)
+                       enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;
+
+                       /* In some setups, this handler triggers a suicide,
+                        * basically mapping IO error to node failure, to
+                        * reduce the number of different failure scenarios.
+                        *
+                        * This handler intentionally runs before we abort IO,
+                        * notify the peer, or try to update our meta data. */
+                       if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
                                 drbd_khelper(mdev, "local-io-error");
  
                         /* Immediately allow completion of all application IO,
@@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                          * So aborting local requests may cause crashes,
                          * or even worse, silent data corruption.
                          */
-                       if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))
+                       if (drbd_test_flag(mdev, FORCE_DETACH))
                                 tl_abort_disk_io(mdev);
  
                         /* current state still has to be D_FAILED,
@@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused
          * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
          * the protected members anymore, though, so once put_ldev reaches zero
          * again, it will be safe to free them. */
+
+       /* Try to write changed bitmap pages, read errors may have just
+        * set some bits outside the area covered by the activity log.
+        *
+        * If we have an IO error during the bitmap writeout,
+        * we will want a full sync next time, just in case.
+        * (Do we want a specific meta data flag for this?)
+        *
+        * If that does not make it to stable storage either,
+        * we cannot do anything about that anymore.  */
+       if (mdev->bitmap) {
+               if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
+                                       "detach", BM_LOCKED_MASK)) {
+                       if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
+                               drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+                               drbd_md_sync(mdev);
+                       }
+               }
+       }
+
         drbd_force_state(mdev, NS(disk, D_DISKLESS));
         return 1;
  }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c

index 42d172877aea825543db7da7bd72425d99a7fe61..c8dda4e8dfce0303d787bccad29fbe193bf8e43f 100644 (file)
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -959,6 +959,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
  
         /* make sure there is no leftover from previous force-detach attempts */
         drbd_clear_flag(mdev, FORCE_DETACH);
+       drbd_clear_flag(mdev, WAS_IO_ERROR);
+       drbd_clear_flag(mdev, WAS_READ_ERROR);
  
         /* and no leftover from previously aborted resync or verify, either */
         mdev->rs_total = 0;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c

index 9220d9f9d6cda097c589107891d971e424fdbdf4..d9e5962a9a8c0aa9ec7b81e3629e83fe3e54ed6c 100644 (file)
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                 req->rq_state |= RQ_LOCAL_COMPLETED;
                 req->rq_state &= ~RQ_LOCAL_PENDING;
  
-               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+               __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
                 _req_may_be_done_not_susp(req, m);
                 break;
  
@@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                         break;
                 }
  
-               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+               __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
  
         goto_queue_for_net_read:
  
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c

index acb614ac9fe1f4973a30cf1d2254a1327174305e..7cd32e73b0165bcbcf054ba637db7623f2befdf8 100644 (file)
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
         if (list_empty(&mdev->read_ee))
                 wake_up(&mdev->ee_wait);
         if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+               __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
         spin_unlock_irqrestore(&mdev->req_lock, flags);
  
         drbd_queue_work(&mdev->data.work, &e->w);
@@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
                 : list_empty(&mdev->active_ee);
  
         if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+               __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
         spin_unlock_irqrestore(&mdev->req_lock, flags);
  
         if (is_syncer_req)
author	Lars Ellenberg <lars.ellenberg@linbit.com>
	Sat, 22 Sep 2012 10:26:57 +0000 (12:26 +0200)
committer	Jens Axboe <axboe@kernel.dk>
	Tue, 30 Oct 2012 07:39:18 +0000 (08:39 +0100)
drivers/block/drbd/drbd_int.h		patch \| blob \| blame \| history
drivers/block/drbd/drbd_main.c		patch \| blob \| blame \| history
drivers/block/drbd/drbd_nl.c		patch \| blob \| blame \| history
drivers/block/drbd/drbd_req.c		patch \| blob \| blame \| history
drivers/block/drbd/drbd_worker.c		patch \| blob \| blame \| history