drbd: tag a few error messages with "assert failed"
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
index 45a2d610ca1db81e3d924ca9c946fe5d0152ac15..6ec922c623a1f18bc50d1068742970985dd84f8e 100644 (file)
@@ -1573,6 +1573,13 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
        if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
                return TRUE;
 
+       /* drbd_submit_ee currently fails for one reason only:
+        * not being able to allocate enough bios.
+        * Is dropping the connection going to help? */
+       spin_lock_irq(&mdev->req_lock);
+       list_del(&e->w.list);
+       spin_unlock_irq(&mdev->req_lock);
+
        drbd_free_ee(mdev, e);
 fail:
        put_ldev(mdev);
@@ -1998,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
        if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
                return TRUE;
 
+       /* drbd_submit_ee currently fails for one reason only:
+        * not being able to allocate enough bios.
+        * Is dropping the connection going to help? */
+       spin_lock_irq(&mdev->req_lock);
+       list_del(&e->w.list);
+       hlist_del_init(&e->colision);
+       spin_unlock_irq(&mdev->req_lock);
+       if (e->flags & EE_CALL_AL_COMPLETE_IO)
+               drbd_al_complete_io(mdev, e->sector);
+
 out_interrupted:
        /* yes, the epoch_size now is imbalanced.
         * but we drop the connection anyways, so we don't have a chance to
@@ -2062,7 +2079,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
        const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
        struct drbd_epoch_entry *e;
        struct digest_info *di = NULL;
-       int size;
+       int size, verb;
        unsigned int fault_type;
        struct p_block_req *p = &mdev->data.rbuf.block_req;
 
@@ -2081,11 +2098,29 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
        }
 
        if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
-               if (__ratelimit(&drbd_ratelimit_state))
+               verb = 1;
+               switch (cmd) {
+               case P_DATA_REQUEST:
+                       drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
+                       break;
+               case P_RS_DATA_REQUEST:
+               case P_CSUM_RS_REQUEST:
+               case P_OV_REQUEST:
+                       drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
+                       break;
+               case P_OV_REPLY:
+                       verb = 0;
+                       dec_rs_pending(mdev);
+                       drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
+                       break;
+               default:
+                       dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
+                               cmdname(cmd));
+               }
+               if (verb && __ratelimit(&drbd_ratelimit_state))
                        dev_err(DEV, "Can not satisfy peer's read request, "
                            "no local data.\n");
-               drbd_send_ack_rp(mdev, cmd == P_DATA_REQUEST ? P_NEG_DREPLY :
-                                P_NEG_RS_DREPLY , p);
+
                /* drain possibly payload */
                return drbd_drain_block(mdev, digest_size);
        }
@@ -2140,10 +2175,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
                break;
 
        case P_OV_REQUEST:
-               if (mdev->state.conn >= C_CONNECTED &&
-                   mdev->state.conn != C_VERIFY_T)
-                       dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
-                               drbd_conn_str(mdev->state.conn));
                if (mdev->ov_start_sector == ~(sector_t)0 &&
                    mdev->agreed_pro_version >= 90) {
                        mdev->ov_start_sector = sector;
@@ -2202,6 +2233,14 @@ submit:
        if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
                return TRUE;
 
+       /* drbd_submit_ee currently fails for one reason only:
+        * not being able to allocate enough bios.
+        * Is dropping the connection going to help? */
+       spin_lock_irq(&mdev->req_lock);
+       list_del(&e->w.list);
+       spin_unlock_irq(&mdev->req_lock);
+       /* no drbd_rs_complete_io(), we are dropping the connection anyways */
+
 out_free_e:
        put_ldev(mdev);
        drbd_free_ee(mdev, e);
@@ -3242,8 +3281,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
 static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
 {
        struct p_state *p = &mdev->data.rbuf.state;
-       enum drbd_conns nconn, oconn;
-       union drbd_state ns, peer_state;
+       union drbd_state os, ns, peer_state;
        enum drbd_disk_state real_peer_disk;
        enum chg_state_flags cs_flags;
        int rv;
@@ -3258,38 +3296,72 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
        spin_lock_irq(&mdev->req_lock);
  retry:
-       oconn = nconn = mdev->state.conn;
+       os = ns = mdev->state;
        spin_unlock_irq(&mdev->req_lock);
 
-       if (nconn == C_WF_REPORT_PARAMS)
-               nconn = C_CONNECTED;
+       /* peer says his disk is uptodate, while we think it is inconsistent,
+        * and this happens while we think we have a sync going on. */
+       if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
+           os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
+               /* If we are (becoming) SyncSource, but peer is still in sync
+                * preparation, ignore its uptodate-ness to avoid flapping, it
+                * will change to inconsistent once the peer reaches active
+                * syncing states.
+                * It may have changed syncer-paused flags, however, so we
+                * cannot ignore this completely. */
+               if (peer_state.conn > C_CONNECTED &&
+                   peer_state.conn < C_SYNC_SOURCE)
+                       real_peer_disk = D_INCONSISTENT;
+
+               /* if peer_state changes to connected at the same time,
+                * it explicitly notifies us that it finished resync.
+                * Maybe we should finish it up, too? */
+               else if (os.conn >= C_SYNC_SOURCE &&
+                        peer_state.conn == C_CONNECTED) {
+                       if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
+                               drbd_resync_finished(mdev);
+                       return TRUE;
+               }
+       }
+
+       /* peer says his disk is inconsistent, while we think it is uptodate,
+        * and this happens while the peer still thinks we have a sync going on,
+        * but we think we are already done with the sync.
+        * We ignore this to avoid flapping pdsk.
+        * This should not happen, if the peer is a recent version of drbd. */
+       if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
+           os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
+               real_peer_disk = D_UP_TO_DATE;
+
+       if (ns.conn == C_WF_REPORT_PARAMS)
+               ns.conn = C_CONNECTED;
 
        if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
            get_ldev_if_state(mdev, D_NEGOTIATING)) {
                int cr; /* consider resync */
 
                /* if we established a new connection */
-               cr  = (oconn < C_CONNECTED);
+               cr  = (os.conn < C_CONNECTED);
                /* if we had an established connection
                 * and one of the nodes newly attaches a disk */
-               cr |= (oconn == C_CONNECTED &&
+               cr |= (os.conn == C_CONNECTED &&
                       (peer_state.disk == D_NEGOTIATING ||
-                       mdev->state.disk == D_NEGOTIATING));
+                       os.disk == D_NEGOTIATING));
                /* if we have both been inconsistent, and the peer has been
                 * forced to be UpToDate with --overwrite-data */
                cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
                /* if we had been plain connected, and the admin requested to
                 * start a sync by "invalidate" or "invalidate-remote" */
-               cr |= (oconn == C_CONNECTED &&
+               cr |= (os.conn == C_CONNECTED &&
                                (peer_state.conn >= C_STARTING_SYNC_S &&
                                 peer_state.conn <= C_WF_BITMAP_T));
 
                if (cr)
-                       nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
+                       ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
 
                put_ldev(mdev);
-               if (nconn == C_MASK) {
-                       nconn = C_CONNECTED;
+               if (ns.conn == C_MASK) {
+                       ns.conn = C_CONNECTED;
                        if (mdev->state.disk == D_NEGOTIATING) {
                                drbd_force_state(mdev, NS(disk, D_DISKLESS));
                        } else if (peer_state.disk == D_NEGOTIATING) {
@@ -3299,7 +3371,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                        } else {
                                if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
                                        return FALSE;
-                               D_ASSERT(oconn == C_WF_REPORT_PARAMS);
+                               D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
                                drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
                                return FALSE;
                        }
@@ -3307,18 +3379,16 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
        }
 
        spin_lock_irq(&mdev->req_lock);
-       if (mdev->state.conn != oconn)
+       if (mdev->state.i != os.i)
                goto retry;
        clear_bit(CONSIDER_RESYNC, &mdev->flags);
-       ns.i = mdev->state.i;
-       ns.conn = nconn;
        ns.peer = peer_state.role;
        ns.pdsk = real_peer_disk;
        ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
-       if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
+       if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
                ns.disk = mdev->new_state_tmp.disk;
-       cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD);
-       if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED &&
+       cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
+       if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
            test_bit(NEW_CUR_UUID, &mdev->flags)) {
                /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
                   for temporal network outages! */
@@ -3339,8 +3409,8 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                return FALSE;
        }
 
-       if (oconn > C_WF_REPORT_PARAMS) {
-               if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
+       if (os.conn > C_WF_REPORT_PARAMS) {
+               if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
                    peer_state.disk != D_NEGOTIATING ) {
                        /* we want resync, peer has not yet decided to sync... */
                        /* Nowadays only used when forcing a node into primary role and
@@ -3734,6 +3804,9 @@ static void drbdd(struct drbd_conf *mdev)
        err_out:
                drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
        }
+       /* If we leave here, we probably want to update at least the
+        * "Connected" indicator on stable storage. Do so explicitly here. */
+       drbd_md_sync(mdev);
 }
 
 void drbd_flush_workqueue(struct drbd_conf *mdev)
@@ -4521,10 +4594,8 @@ int drbd_asender(struct drbd_thread *thi)
                while (1) {
                        clear_bit(SIGNAL_ASENDER, &mdev->flags);
                        flush_signals(current);
-                       if (!drbd_process_done_ee(mdev)) {
-                               dev_err(DEV, "process_done_ee() = NOT_OK\n");
+                       if (!drbd_process_done_ee(mdev))
                                goto reconnect;
-                       }
                        /* to avoid race with newly queued ACKs */
                        set_bit(SIGNAL_ASENDER, &mdev->flags);
                        spin_lock_irq(&mdev->req_lock);
@@ -4617,10 +4688,12 @@ int drbd_asender(struct drbd_thread *thi)
        if (0) {
 reconnect:
                drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+               drbd_md_sync(mdev);
        }
        if (0) {
 disconnect:
                drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+               drbd_md_sync(mdev);
        }
        clear_bit(SIGNAL_ASENDER, &mdev->flags);
 
This page took 0.030794 seconds and 5 git commands to generate.