4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
28 #include <linux/drbd_limits.h>
33 extern void tl_abort_disk_io(struct drbd_conf
*mdev
);
35 struct after_state_chg_work
{
39 enum chg_state_flags flags
;
40 struct completion
*done
;
43 enum sanitize_state_warnings
{
45 ABORTED_ONLINE_VERIFY
,
47 CONNECTION_LOST_NEGOTIATING
,
48 IMPLICITLY_UPGRADED_DISK
,
49 IMPLICITLY_UPGRADED_PDSK
,
52 static int w_after_state_ch(struct drbd_work
*w
, int unused
);
53 static void after_state_ch(struct drbd_conf
*mdev
, union drbd_state os
,
54 union drbd_state ns
, enum chg_state_flags flags
);
55 static enum drbd_state_rv
is_valid_state(struct drbd_conf
*, union drbd_state
);
56 static enum drbd_state_rv
is_valid_soft_transition(union drbd_state
, union drbd_state
);
57 static enum drbd_state_rv
is_valid_transition(union drbd_state os
, union drbd_state ns
);
58 static union drbd_state
sanitize_state(struct drbd_conf
*mdev
, union drbd_state ns
,
59 enum sanitize_state_warnings
*warn
);
61 static inline bool is_susp(union drbd_state s
)
63 return s
.susp
|| s
.susp_nod
|| s
.susp_fen
;
66 bool conn_all_vols_unconf(struct drbd_tconn
*tconn
)
68 struct drbd_conf
*mdev
;
73 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
74 if (mdev
->state
.disk
!= D_DISKLESS
||
75 mdev
->state
.conn
!= C_STANDALONE
||
76 mdev
->state
.role
!= R_SECONDARY
) {
86 /* Unfortunately the states where not correctly ordered, when
87 they where defined. therefore can not use max_t() here. */
88 static enum drbd_role
max_role(enum drbd_role role1
, enum drbd_role role2
)
90 if (role1
== R_PRIMARY
|| role2
== R_PRIMARY
)
92 if (role1
== R_SECONDARY
|| role2
== R_SECONDARY
)
96 static enum drbd_role
min_role(enum drbd_role role1
, enum drbd_role role2
)
98 if (role1
== R_UNKNOWN
|| role2
== R_UNKNOWN
)
100 if (role1
== R_SECONDARY
|| role2
== R_SECONDARY
)
105 enum drbd_role
conn_highest_role(struct drbd_tconn
*tconn
)
107 enum drbd_role role
= R_UNKNOWN
;
108 struct drbd_conf
*mdev
;
112 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
113 role
= max_role(role
, mdev
->state
.role
);
119 enum drbd_role
conn_highest_peer(struct drbd_tconn
*tconn
)
121 enum drbd_role peer
= R_UNKNOWN
;
122 struct drbd_conf
*mdev
;
126 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
127 peer
= max_role(peer
, mdev
->state
.peer
);
133 enum drbd_disk_state
conn_highest_disk(struct drbd_tconn
*tconn
)
135 enum drbd_disk_state ds
= D_DISKLESS
;
136 struct drbd_conf
*mdev
;
140 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
141 ds
= max_t(enum drbd_disk_state
, ds
, mdev
->state
.disk
);
147 enum drbd_disk_state
conn_lowest_disk(struct drbd_tconn
*tconn
)
149 enum drbd_disk_state ds
= D_MASK
;
150 struct drbd_conf
*mdev
;
154 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
155 ds
= min_t(enum drbd_disk_state
, ds
, mdev
->state
.disk
);
161 enum drbd_disk_state
conn_highest_pdsk(struct drbd_tconn
*tconn
)
163 enum drbd_disk_state ds
= D_DISKLESS
;
164 struct drbd_conf
*mdev
;
168 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
169 ds
= max_t(enum drbd_disk_state
, ds
, mdev
->state
.pdsk
);
175 enum drbd_conns
conn_lowest_conn(struct drbd_tconn
*tconn
)
177 enum drbd_conns conn
= C_MASK
;
178 struct drbd_conf
*mdev
;
182 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
183 conn
= min_t(enum drbd_conns
, conn
, mdev
->state
.conn
);
190 * cl_wide_st_chg() - true if the state change is a cluster wide one
191 * @mdev: DRBD device.
192 * @os: old (current) state.
193 * @ns: new (wanted) state.
195 static int cl_wide_st_chg(struct drbd_conf
*mdev
,
196 union drbd_state os
, union drbd_state ns
)
198 return (os
.conn
>= C_CONNECTED
&& ns
.conn
>= C_CONNECTED
&&
199 ((os
.role
!= R_PRIMARY
&& ns
.role
== R_PRIMARY
) ||
200 (os
.conn
!= C_STARTING_SYNC_T
&& ns
.conn
== C_STARTING_SYNC_T
) ||
201 (os
.conn
!= C_STARTING_SYNC_S
&& ns
.conn
== C_STARTING_SYNC_S
) ||
202 (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
))) ||
203 (os
.conn
>= C_CONNECTED
&& ns
.conn
== C_DISCONNECTING
) ||
204 (os
.conn
== C_CONNECTED
&& ns
.conn
== C_VERIFY_S
) ||
205 (os
.conn
== C_CONNECTED
&& ns
.conn
== C_WF_REPORT_PARAMS
);
208 static union drbd_state
209 apply_mask_val(union drbd_state os
, union drbd_state mask
, union drbd_state val
)
212 ns
.i
= (os
.i
& ~mask
.i
) | val
.i
;
217 drbd_change_state(struct drbd_conf
*mdev
, enum chg_state_flags f
,
218 union drbd_state mask
, union drbd_state val
)
222 enum drbd_state_rv rv
;
224 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
225 ns
= apply_mask_val(drbd_read_state(mdev
), mask
, val
);
226 rv
= _drbd_set_state(mdev
, ns
, f
, NULL
);
227 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
233 * drbd_force_state() - Impose a change which happens outside our control on our state
234 * @mdev: DRBD device.
235 * @mask: mask of state bits to change.
236 * @val: value of new state bits.
238 void drbd_force_state(struct drbd_conf
*mdev
,
239 union drbd_state mask
, union drbd_state val
)
241 drbd_change_state(mdev
, CS_HARD
, mask
, val
);
244 static enum drbd_state_rv
245 _req_st_cond(struct drbd_conf
*mdev
, union drbd_state mask
,
246 union drbd_state val
)
248 union drbd_state os
, ns
;
250 enum drbd_state_rv rv
;
252 if (test_and_clear_bit(CL_ST_CHG_SUCCESS
, &mdev
->flags
))
253 return SS_CW_SUCCESS
;
255 if (test_and_clear_bit(CL_ST_CHG_FAIL
, &mdev
->flags
))
256 return SS_CW_FAILED_BY_PEER
;
258 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
259 os
= drbd_read_state(mdev
);
260 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
261 rv
= is_valid_transition(os
, ns
);
262 if (rv
== SS_SUCCESS
)
263 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
265 if (!cl_wide_st_chg(mdev
, os
, ns
))
267 if (rv
== SS_UNKNOWN_ERROR
) {
268 rv
= is_valid_state(mdev
, ns
);
269 if (rv
== SS_SUCCESS
) {
270 rv
= is_valid_soft_transition(os
, ns
);
271 if (rv
== SS_SUCCESS
)
272 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
275 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
281 * drbd_req_state() - Perform an eventually cluster wide state change
282 * @mdev: DRBD device.
283 * @mask: mask of state bits to change.
284 * @val: value of new state bits.
287 * Should not be called directly, use drbd_request_state() or
288 * _drbd_request_state().
290 static enum drbd_state_rv
291 drbd_req_state(struct drbd_conf
*mdev
, union drbd_state mask
,
292 union drbd_state val
, enum chg_state_flags f
)
294 struct completion done
;
296 union drbd_state os
, ns
;
297 enum drbd_state_rv rv
;
299 init_completion(&done
);
301 if (f
& CS_SERIALIZE
)
302 mutex_lock(mdev
->state_mutex
);
304 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
305 os
= drbd_read_state(mdev
);
306 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
307 rv
= is_valid_transition(os
, ns
);
308 if (rv
< SS_SUCCESS
) {
309 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
313 if (cl_wide_st_chg(mdev
, os
, ns
)) {
314 rv
= is_valid_state(mdev
, ns
);
315 if (rv
== SS_SUCCESS
)
316 rv
= is_valid_soft_transition(os
, ns
);
317 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
319 if (rv
< SS_SUCCESS
) {
321 print_st_err(mdev
, os
, ns
, rv
);
325 if (drbd_send_state_req(mdev
, mask
, val
)) {
326 rv
= SS_CW_FAILED_BY_PEER
;
328 print_st_err(mdev
, os
, ns
, rv
);
332 wait_event(mdev
->state_wait
,
333 (rv
= _req_st_cond(mdev
, mask
, val
)));
335 if (rv
< SS_SUCCESS
) {
337 print_st_err(mdev
, os
, ns
, rv
);
340 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
341 ns
= apply_mask_val(drbd_read_state(mdev
), mask
, val
);
342 rv
= _drbd_set_state(mdev
, ns
, f
, &done
);
344 rv
= _drbd_set_state(mdev
, ns
, f
, &done
);
347 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
349 if (f
& CS_WAIT_COMPLETE
&& rv
== SS_SUCCESS
) {
350 D_ASSERT(current
!= mdev
->tconn
->worker
.task
);
351 wait_for_completion(&done
);
355 if (f
& CS_SERIALIZE
)
356 mutex_unlock(mdev
->state_mutex
);
362 * _drbd_request_state() - Request a state change (with flags)
363 * @mdev: DRBD device.
364 * @mask: mask of state bits to change.
365 * @val: value of new state bits.
368 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
369 * flag, or when logging of failed state change requests is not desired.
372 _drbd_request_state(struct drbd_conf
*mdev
, union drbd_state mask
,
373 union drbd_state val
, enum chg_state_flags f
)
375 enum drbd_state_rv rv
;
377 wait_event(mdev
->state_wait
,
378 (rv
= drbd_req_state(mdev
, mask
, val
, f
)) != SS_IN_TRANSIENT_STATE
);
383 static void print_st(struct drbd_conf
*mdev
, char *name
, union drbd_state ns
)
385 dev_err(DEV
, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
387 drbd_conn_str(ns
.conn
),
388 drbd_role_str(ns
.role
),
389 drbd_role_str(ns
.peer
),
390 drbd_disk_str(ns
.disk
),
391 drbd_disk_str(ns
.pdsk
),
392 is_susp(ns
) ? 's' : 'r',
393 ns
.aftr_isp
? 'a' : '-',
394 ns
.peer_isp
? 'p' : '-',
395 ns
.user_isp
? 'u' : '-',
396 ns
.susp_fen
? 'F' : '-',
397 ns
.susp_nod
? 'N' : '-'
401 void print_st_err(struct drbd_conf
*mdev
, union drbd_state os
,
402 union drbd_state ns
, enum drbd_state_rv err
)
404 if (err
== SS_IN_TRANSIENT_STATE
)
406 dev_err(DEV
, "State change failed: %s\n", drbd_set_st_err_str(err
));
407 print_st(mdev
, " state", os
);
408 print_st(mdev
, "wanted", ns
);
411 static long print_state_change(char *pb
, union drbd_state os
, union drbd_state ns
,
412 enum chg_state_flags flags
)
418 if (ns
.role
!= os
.role
&& flags
& CS_DC_ROLE
)
419 pbp
+= sprintf(pbp
, "role( %s -> %s ) ",
420 drbd_role_str(os
.role
),
421 drbd_role_str(ns
.role
));
422 if (ns
.peer
!= os
.peer
&& flags
& CS_DC_PEER
)
423 pbp
+= sprintf(pbp
, "peer( %s -> %s ) ",
424 drbd_role_str(os
.peer
),
425 drbd_role_str(ns
.peer
));
426 if (ns
.conn
!= os
.conn
&& flags
& CS_DC_CONN
)
427 pbp
+= sprintf(pbp
, "conn( %s -> %s ) ",
428 drbd_conn_str(os
.conn
),
429 drbd_conn_str(ns
.conn
));
430 if (ns
.disk
!= os
.disk
&& flags
& CS_DC_DISK
)
431 pbp
+= sprintf(pbp
, "disk( %s -> %s ) ",
432 drbd_disk_str(os
.disk
),
433 drbd_disk_str(ns
.disk
));
434 if (ns
.pdsk
!= os
.pdsk
&& flags
& CS_DC_PDSK
)
435 pbp
+= sprintf(pbp
, "pdsk( %s -> %s ) ",
436 drbd_disk_str(os
.pdsk
),
437 drbd_disk_str(ns
.pdsk
));
442 static void drbd_pr_state_change(struct drbd_conf
*mdev
, union drbd_state os
, union drbd_state ns
,
443 enum chg_state_flags flags
)
448 pbp
+= print_state_change(pbp
, os
, ns
, flags
^ CS_DC_MASK
);
450 if (ns
.aftr_isp
!= os
.aftr_isp
)
451 pbp
+= sprintf(pbp
, "aftr_isp( %d -> %d ) ",
454 if (ns
.peer_isp
!= os
.peer_isp
)
455 pbp
+= sprintf(pbp
, "peer_isp( %d -> %d ) ",
458 if (ns
.user_isp
!= os
.user_isp
)
459 pbp
+= sprintf(pbp
, "user_isp( %d -> %d ) ",
464 dev_info(DEV
, "%s\n", pb
);
467 static void conn_pr_state_change(struct drbd_tconn
*tconn
, union drbd_state os
, union drbd_state ns
,
468 enum chg_state_flags flags
)
473 pbp
+= print_state_change(pbp
, os
, ns
, flags
);
475 if (is_susp(ns
) != is_susp(os
) && flags
& CS_DC_SUSP
)
476 pbp
+= sprintf(pbp
, "susp( %d -> %d ) ",
481 conn_info(tconn
, "%s\n", pb
);
486 * is_valid_state() - Returns an SS_ error code if ns is not valid
487 * @mdev: DRBD device.
488 * @ns: State to consider.
490 static enum drbd_state_rv
491 is_valid_state(struct drbd_conf
*mdev
, union drbd_state ns
)
493 /* See drbd_state_sw_errors in drbd_strings.c */
495 enum drbd_fencing_p fp
;
496 enum drbd_state_rv rv
= SS_SUCCESS
;
501 if (get_ldev(mdev
)) {
502 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
506 nc
= rcu_dereference(mdev
->tconn
->net_conf
);
508 if (!nc
->two_primaries
&& ns
.role
== R_PRIMARY
) {
509 if (ns
.peer
== R_PRIMARY
)
510 rv
= SS_TWO_PRIMARIES
;
511 else if (conn_highest_peer(mdev
->tconn
) == R_PRIMARY
)
512 rv
= SS_O_VOL_PEER_PRI
;
517 /* already found a reason to abort */;
518 else if (ns
.role
== R_SECONDARY
&& mdev
->open_cnt
)
519 rv
= SS_DEVICE_IN_USE
;
521 else if (ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.disk
< D_UP_TO_DATE
)
522 rv
= SS_NO_UP_TO_DATE_DISK
;
524 else if (fp
>= FP_RESOURCE
&&
525 ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.pdsk
>= D_UNKNOWN
)
528 else if (ns
.role
== R_PRIMARY
&& ns
.disk
<= D_INCONSISTENT
&& ns
.pdsk
<= D_INCONSISTENT
)
529 rv
= SS_NO_UP_TO_DATE_DISK
;
531 else if (ns
.conn
> C_CONNECTED
&& ns
.disk
< D_INCONSISTENT
)
532 rv
= SS_NO_LOCAL_DISK
;
534 else if (ns
.conn
> C_CONNECTED
&& ns
.pdsk
< D_INCONSISTENT
)
535 rv
= SS_NO_REMOTE_DISK
;
537 else if (ns
.conn
> C_CONNECTED
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
)
538 rv
= SS_NO_UP_TO_DATE_DISK
;
540 else if ((ns
.conn
== C_CONNECTED
||
541 ns
.conn
== C_WF_BITMAP_S
||
542 ns
.conn
== C_SYNC_SOURCE
||
543 ns
.conn
== C_PAUSED_SYNC_S
) &&
544 ns
.disk
== D_OUTDATED
)
545 rv
= SS_CONNECTED_OUTDATES
;
547 else if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
548 (nc
->verify_alg
[0] == 0))
549 rv
= SS_NO_VERIFY_ALG
;
551 else if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
552 mdev
->tconn
->agreed_pro_version
< 88)
553 rv
= SS_NOT_SUPPORTED
;
555 else if (ns
.conn
>= C_CONNECTED
&& ns
.pdsk
== D_UNKNOWN
)
556 rv
= SS_CONNECTED_OUTDATES
;
564 * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
565 * This function limits state transitions that may be declined by DRBD. I.e.
566 * user requests (aka soft transitions).
567 * @mdev: DRBD device.
571 static enum drbd_state_rv
572 is_valid_soft_transition(union drbd_state os
, union drbd_state ns
)
574 enum drbd_state_rv rv
= SS_SUCCESS
;
576 if ((ns
.conn
== C_STARTING_SYNC_T
|| ns
.conn
== C_STARTING_SYNC_S
) &&
577 os
.conn
> C_CONNECTED
)
578 rv
= SS_RESYNC_RUNNING
;
580 if (ns
.conn
== C_DISCONNECTING
&& os
.conn
== C_STANDALONE
)
581 rv
= SS_ALREADY_STANDALONE
;
583 if (ns
.disk
> D_ATTACHING
&& os
.disk
== D_DISKLESS
)
586 if (ns
.conn
== C_WF_CONNECTION
&& os
.conn
< C_UNCONNECTED
)
587 rv
= SS_NO_NET_CONFIG
;
589 if (ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
&& os
.disk
!= D_ATTACHING
)
590 rv
= SS_LOWER_THAN_OUTDATED
;
592 if (ns
.conn
== C_DISCONNECTING
&& os
.conn
== C_UNCONNECTED
)
593 rv
= SS_IN_TRANSIENT_STATE
;
595 /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
596 rv = SS_IN_TRANSIENT_STATE; */
598 if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) && os
.conn
< C_CONNECTED
)
599 rv
= SS_NEED_CONNECTION
;
601 if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
602 ns
.conn
!= os
.conn
&& os
.conn
> C_CONNECTED
)
603 rv
= SS_RESYNC_RUNNING
;
605 if ((ns
.conn
== C_STARTING_SYNC_S
|| ns
.conn
== C_STARTING_SYNC_T
) &&
606 os
.conn
< C_CONNECTED
)
607 rv
= SS_NEED_CONNECTION
;
609 if ((ns
.conn
== C_SYNC_TARGET
|| ns
.conn
== C_SYNC_SOURCE
)
610 && os
.conn
< C_WF_REPORT_PARAMS
)
611 rv
= SS_NEED_CONNECTION
; /* No NetworkFailure -> SyncTarget etc... */
616 static enum drbd_state_rv
617 is_valid_conn_transition(enum drbd_conns oc
, enum drbd_conns nc
)
619 /* no change -> nothing to do, at least for the connection part */
621 return SS_NOTHING_TO_DO
;
623 /* disconnect of an unconfigured connection does not make sense */
624 if (oc
== C_STANDALONE
&& nc
== C_DISCONNECTING
)
625 return SS_ALREADY_STANDALONE
;
627 /* from C_STANDALONE, we start with C_UNCONNECTED */
628 if (oc
== C_STANDALONE
&& nc
!= C_UNCONNECTED
)
629 return SS_NEED_CONNECTION
;
631 /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
632 if (oc
>= C_TIMEOUT
&& oc
<= C_TEAR_DOWN
&& nc
!= C_UNCONNECTED
&& nc
!= C_DISCONNECTING
)
633 return SS_IN_TRANSIENT_STATE
;
635 /* After C_DISCONNECTING only C_STANDALONE may follow */
636 if (oc
== C_DISCONNECTING
&& nc
!= C_STANDALONE
)
637 return SS_IN_TRANSIENT_STATE
;
644 * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
645 * This limits hard state transitions. Hard state transitions are facts there are
646 * imposed on DRBD by the environment. E.g. disk broke or network broke down.
647 * But those hard state transitions are still not allowed to do everything.
651 static enum drbd_state_rv
652 is_valid_transition(union drbd_state os
, union drbd_state ns
)
654 enum drbd_state_rv rv
;
656 rv
= is_valid_conn_transition(os
.conn
, ns
.conn
);
658 /* we cannot fail (again) if we already detached */
659 if (ns
.disk
== D_FAILED
&& os
.disk
== D_DISKLESS
)
665 static void print_sanitize_warnings(struct drbd_conf
*mdev
, enum sanitize_state_warnings warn
)
667 static const char *msg_table
[] = {
669 [ABORTED_ONLINE_VERIFY
] = "Online-verify aborted.",
670 [ABORTED_RESYNC
] = "Resync aborted.",
671 [CONNECTION_LOST_NEGOTIATING
] = "Connection lost while negotiating, no data!",
672 [IMPLICITLY_UPGRADED_DISK
] = "Implicitly upgraded disk",
673 [IMPLICITLY_UPGRADED_PDSK
] = "Implicitly upgraded pdsk",
676 if (warn
!= NO_WARNING
)
677 dev_warn(DEV
, "%s\n", msg_table
[warn
]);
681 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
682 * @mdev: DRBD device.
687 * When we loose connection, we have to set the state of the peers disk (pdsk)
688 * to D_UNKNOWN. This rule and many more along those lines are in this function.
690 static union drbd_state
sanitize_state(struct drbd_conf
*mdev
, union drbd_state ns
,
691 enum sanitize_state_warnings
*warn
)
693 enum drbd_fencing_p fp
;
694 enum drbd_disk_state disk_min
, disk_max
, pdsk_min
, pdsk_max
;
700 if (get_ldev(mdev
)) {
702 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
707 /* Implications from connection to peer and peer_isp */
708 if (ns
.conn
< C_CONNECTED
) {
711 if (ns
.pdsk
> D_UNKNOWN
|| ns
.pdsk
< D_INCONSISTENT
)
715 /* Clear the aftr_isp when becoming unconfigured */
716 if (ns
.conn
== C_STANDALONE
&& ns
.disk
== D_DISKLESS
&& ns
.role
== R_SECONDARY
)
719 /* An implication of the disk states onto the connection state */
720 /* Abort resync if a disk fails/detaches */
721 if (ns
.conn
> C_CONNECTED
&& (ns
.disk
<= D_FAILED
|| ns
.pdsk
<= D_FAILED
)) {
723 *warn
= ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
?
724 ABORTED_ONLINE_VERIFY
: ABORTED_RESYNC
;
725 ns
.conn
= C_CONNECTED
;
728 /* Connection breaks down before we finished "Negotiating" */
729 if (ns
.conn
< C_CONNECTED
&& ns
.disk
== D_NEGOTIATING
&&
730 get_ldev_if_state(mdev
, D_NEGOTIATING
)) {
731 if (mdev
->ed_uuid
== mdev
->ldev
->md
.uuid
[UI_CURRENT
]) {
732 ns
.disk
= mdev
->new_state_tmp
.disk
;
733 ns
.pdsk
= mdev
->new_state_tmp
.pdsk
;
736 *warn
= CONNECTION_LOST_NEGOTIATING
;
737 ns
.disk
= D_DISKLESS
;
743 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
744 if (ns
.conn
>= C_CONNECTED
&& ns
.conn
< C_AHEAD
) {
745 if (ns
.disk
== D_CONSISTENT
|| ns
.disk
== D_OUTDATED
)
746 ns
.disk
= D_UP_TO_DATE
;
747 if (ns
.pdsk
== D_CONSISTENT
|| ns
.pdsk
== D_OUTDATED
)
748 ns
.pdsk
= D_UP_TO_DATE
;
751 /* Implications of the connection stat on the disk states */
752 disk_min
= D_DISKLESS
;
753 disk_max
= D_UP_TO_DATE
;
754 pdsk_min
= D_INCONSISTENT
;
755 pdsk_max
= D_UNKNOWN
;
756 switch ((enum drbd_conns
)ns
.conn
) {
758 case C_PAUSED_SYNC_T
:
759 case C_STARTING_SYNC_T
:
762 disk_min
= D_INCONSISTENT
;
763 disk_max
= D_OUTDATED
;
764 pdsk_min
= D_UP_TO_DATE
;
765 pdsk_max
= D_UP_TO_DATE
;
769 disk_min
= D_UP_TO_DATE
;
770 disk_max
= D_UP_TO_DATE
;
771 pdsk_min
= D_UP_TO_DATE
;
772 pdsk_max
= D_UP_TO_DATE
;
775 disk_min
= D_DISKLESS
;
776 disk_max
= D_UP_TO_DATE
;
777 pdsk_min
= D_DISKLESS
;
778 pdsk_max
= D_UP_TO_DATE
;
781 case C_PAUSED_SYNC_S
:
782 case C_STARTING_SYNC_S
:
784 disk_min
= D_UP_TO_DATE
;
785 disk_max
= D_UP_TO_DATE
;
786 pdsk_min
= D_INCONSISTENT
;
787 pdsk_max
= D_CONSISTENT
; /* D_OUTDATED would be nice. But explicit outdate necessary*/
790 disk_min
= D_INCONSISTENT
;
791 disk_max
= D_INCONSISTENT
;
792 pdsk_min
= D_UP_TO_DATE
;
793 pdsk_max
= D_UP_TO_DATE
;
796 disk_min
= D_UP_TO_DATE
;
797 disk_max
= D_UP_TO_DATE
;
798 pdsk_min
= D_INCONSISTENT
;
799 pdsk_max
= D_INCONSISTENT
;
802 case C_DISCONNECTING
:
806 case C_NETWORK_FAILURE
:
807 case C_PROTOCOL_ERROR
:
809 case C_WF_CONNECTION
:
810 case C_WF_REPORT_PARAMS
:
814 if (ns
.disk
> disk_max
)
817 if (ns
.disk
< disk_min
) {
819 *warn
= IMPLICITLY_UPGRADED_DISK
;
822 if (ns
.pdsk
> pdsk_max
)
825 if (ns
.pdsk
< pdsk_min
) {
827 *warn
= IMPLICITLY_UPGRADED_PDSK
;
831 if (fp
== FP_STONITH
&&
832 (ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.pdsk
> D_OUTDATED
))
833 ns
.susp_fen
= 1; /* Suspend IO while fence-peer handler runs (peer lost) */
835 if (mdev
->tconn
->res_opts
.on_no_data
== OND_SUSPEND_IO
&&
836 (ns
.role
== R_PRIMARY
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
))
837 ns
.susp_nod
= 1; /* Suspend IO while no data available (no accessible data available) */
839 if (ns
.aftr_isp
|| ns
.peer_isp
|| ns
.user_isp
) {
840 if (ns
.conn
== C_SYNC_SOURCE
)
841 ns
.conn
= C_PAUSED_SYNC_S
;
842 if (ns
.conn
== C_SYNC_TARGET
)
843 ns
.conn
= C_PAUSED_SYNC_T
;
845 if (ns
.conn
== C_PAUSED_SYNC_S
)
846 ns
.conn
= C_SYNC_SOURCE
;
847 if (ns
.conn
== C_PAUSED_SYNC_T
)
848 ns
.conn
= C_SYNC_TARGET
;
854 void drbd_resume_al(struct drbd_conf
*mdev
)
856 if (test_and_clear_bit(AL_SUSPENDED
, &mdev
->flags
))
857 dev_info(DEV
, "Resumed AL updates\n");
860 /* helper for __drbd_set_state */
861 static void set_ov_position(struct drbd_conf
*mdev
, enum drbd_conns cs
)
863 if (mdev
->tconn
->agreed_pro_version
< 90)
864 mdev
->ov_start_sector
= 0;
865 mdev
->rs_total
= drbd_bm_bits(mdev
);
866 mdev
->ov_position
= 0;
867 if (cs
== C_VERIFY_T
) {
868 /* starting online verify from an arbitrary position
869 * does not fit well into the existing protocol.
870 * on C_VERIFY_T, we initialize ov_left and friends
871 * implicitly in receive_DataRequest once the
872 * first P_OV_REQUEST is received */
873 mdev
->ov_start_sector
= ~(sector_t
)0;
875 unsigned long bit
= BM_SECT_TO_BIT(mdev
->ov_start_sector
);
876 if (bit
>= mdev
->rs_total
) {
877 mdev
->ov_start_sector
=
878 BM_BIT_TO_SECT(mdev
->rs_total
- 1);
881 mdev
->rs_total
-= bit
;
882 mdev
->ov_position
= mdev
->ov_start_sector
;
884 mdev
->ov_left
= mdev
->rs_total
;
888 * __drbd_set_state() - Set a new DRBD state
889 * @mdev: DRBD device.
892 * @done: Optional completion, that will get completed after the after_state_ch() finished
894 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
897 __drbd_set_state(struct drbd_conf
*mdev
, union drbd_state ns
,
898 enum chg_state_flags flags
, struct completion
*done
)
901 enum drbd_state_rv rv
= SS_SUCCESS
;
902 enum sanitize_state_warnings ssw
;
903 struct after_state_chg_work
*ascw
;
905 os
= drbd_read_state(mdev
);
907 ns
= sanitize_state(mdev
, ns
, &ssw
);
909 return SS_NOTHING_TO_DO
;
911 rv
= is_valid_transition(os
, ns
);
915 if (!(flags
& CS_HARD
)) {
916 /* pre-state-change checks ; only look at ns */
917 /* See drbd_state_sw_errors in drbd_strings.c */
919 rv
= is_valid_state(mdev
, ns
);
920 if (rv
< SS_SUCCESS
) {
921 /* If the old state was illegal as well, then let
924 if (is_valid_state(mdev
, os
) == rv
)
925 rv
= is_valid_soft_transition(os
, ns
);
927 rv
= is_valid_soft_transition(os
, ns
);
930 if (rv
< SS_SUCCESS
) {
931 if (flags
& CS_VERBOSE
)
932 print_st_err(mdev
, os
, ns
, rv
);
936 print_sanitize_warnings(mdev
, ssw
);
938 drbd_pr_state_change(mdev
, os
, ns
, flags
);
940 /* Display changes to the susp* flags that where caused by the call to
941 sanitize_state(). Only display it here if we where not called from
942 _conn_request_state() */
943 if (!(flags
& CS_DC_SUSP
))
944 conn_pr_state_change(mdev
->tconn
, os
, ns
, (flags
& ~CS_DC_MASK
) | CS_DC_SUSP
);
946 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
947 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
948 * drbd_ldev_destroy() won't happen before our corresponding
949 * after_state_ch works run, where we put_ldev again. */
950 if ((os
.disk
!= D_FAILED
&& ns
.disk
== D_FAILED
) ||
951 (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
))
952 atomic_inc(&mdev
->local_cnt
);
954 mdev
->state
.i
= ns
.i
;
955 mdev
->tconn
->susp
= ns
.susp
;
956 mdev
->tconn
->susp_nod
= ns
.susp_nod
;
957 mdev
->tconn
->susp_fen
= ns
.susp_fen
;
959 if (os
.disk
== D_ATTACHING
&& ns
.disk
>= D_NEGOTIATING
)
960 drbd_print_uuids(mdev
, "attached to UUIDs");
962 wake_up(&mdev
->misc_wait
);
963 wake_up(&mdev
->state_wait
);
964 wake_up(&mdev
->tconn
->ping_wait
);
966 /* aborted verify run. log the last position */
967 if ((os
.conn
== C_VERIFY_S
|| os
.conn
== C_VERIFY_T
) &&
968 ns
.conn
< C_CONNECTED
) {
969 mdev
->ov_start_sector
=
970 BM_BIT_TO_SECT(drbd_bm_bits(mdev
) - mdev
->ov_left
);
971 dev_info(DEV
, "Online Verify reached sector %llu\n",
972 (unsigned long long)mdev
->ov_start_sector
);
975 if ((os
.conn
== C_PAUSED_SYNC_T
|| os
.conn
== C_PAUSED_SYNC_S
) &&
976 (ns
.conn
== C_SYNC_TARGET
|| ns
.conn
== C_SYNC_SOURCE
)) {
977 dev_info(DEV
, "Syncer continues.\n");
978 mdev
->rs_paused
+= (long)jiffies
979 -(long)mdev
->rs_mark_time
[mdev
->rs_last_mark
];
980 if (ns
.conn
== C_SYNC_TARGET
)
981 mod_timer(&mdev
->resync_timer
, jiffies
);
984 if ((os
.conn
== C_SYNC_TARGET
|| os
.conn
== C_SYNC_SOURCE
) &&
985 (ns
.conn
== C_PAUSED_SYNC_T
|| ns
.conn
== C_PAUSED_SYNC_S
)) {
986 dev_info(DEV
, "Resync suspended\n");
987 mdev
->rs_mark_time
[mdev
->rs_last_mark
] = jiffies
;
990 if (os
.conn
== C_CONNECTED
&&
991 (ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
)) {
992 unsigned long now
= jiffies
;
995 set_ov_position(mdev
, ns
.conn
);
996 mdev
->rs_start
= now
;
997 mdev
->rs_last_events
= 0;
998 mdev
->rs_last_sect_ev
= 0;
999 mdev
->ov_last_oos_size
= 0;
1000 mdev
->ov_last_oos_start
= 0;
1002 for (i
= 0; i
< DRBD_SYNC_MARKS
; i
++) {
1003 mdev
->rs_mark_left
[i
] = mdev
->ov_left
;
1004 mdev
->rs_mark_time
[i
] = now
;
1007 drbd_rs_controller_reset(mdev
);
1009 if (ns
.conn
== C_VERIFY_S
) {
1010 dev_info(DEV
, "Starting Online Verify from sector %llu\n",
1011 (unsigned long long)mdev
->ov_position
);
1012 mod_timer(&mdev
->resync_timer
, jiffies
);
1016 if (get_ldev(mdev
)) {
1017 u32 mdf
= mdev
->ldev
->md
.flags
& ~(MDF_CONSISTENT
|MDF_PRIMARY_IND
|
1018 MDF_CONNECTED_IND
|MDF_WAS_UP_TO_DATE
|
1019 MDF_PEER_OUT_DATED
|MDF_CRASHED_PRIMARY
);
1021 mdf
&= ~MDF_AL_CLEAN
;
1022 if (test_bit(CRASHED_PRIMARY
, &mdev
->flags
))
1023 mdf
|= MDF_CRASHED_PRIMARY
;
1024 if (mdev
->state
.role
== R_PRIMARY
||
1025 (mdev
->state
.pdsk
< D_INCONSISTENT
&& mdev
->state
.peer
== R_PRIMARY
))
1026 mdf
|= MDF_PRIMARY_IND
;
1027 if (mdev
->state
.conn
> C_WF_REPORT_PARAMS
)
1028 mdf
|= MDF_CONNECTED_IND
;
1029 if (mdev
->state
.disk
> D_INCONSISTENT
)
1030 mdf
|= MDF_CONSISTENT
;
1031 if (mdev
->state
.disk
> D_OUTDATED
)
1032 mdf
|= MDF_WAS_UP_TO_DATE
;
1033 if (mdev
->state
.pdsk
<= D_OUTDATED
&& mdev
->state
.pdsk
>= D_INCONSISTENT
)
1034 mdf
|= MDF_PEER_OUT_DATED
;
1035 if (mdf
!= mdev
->ldev
->md
.flags
) {
1036 mdev
->ldev
->md
.flags
= mdf
;
1037 drbd_md_mark_dirty(mdev
);
1039 if (os
.disk
< D_CONSISTENT
&& ns
.disk
>= D_CONSISTENT
)
1040 drbd_set_ed_uuid(mdev
, mdev
->ldev
->md
.uuid
[UI_CURRENT
]);
1044 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1045 if (os
.disk
== D_INCONSISTENT
&& os
.pdsk
== D_INCONSISTENT
&&
1046 os
.peer
== R_SECONDARY
&& ns
.peer
== R_PRIMARY
)
1047 set_bit(CONSIDER_RESYNC
, &mdev
->flags
);
1049 /* Receiver should clean up itself */
1050 if (os
.conn
!= C_DISCONNECTING
&& ns
.conn
== C_DISCONNECTING
)
1051 drbd_thread_stop_nowait(&mdev
->tconn
->receiver
);
1053 /* Now the receiver finished cleaning up itself, it should die */
1054 if (os
.conn
!= C_STANDALONE
&& ns
.conn
== C_STANDALONE
)
1055 drbd_thread_stop_nowait(&mdev
->tconn
->receiver
);
1057 /* Upon network failure, we need to restart the receiver. */
1058 if (os
.conn
> C_TEAR_DOWN
&&
1059 ns
.conn
<= C_TEAR_DOWN
&& ns
.conn
>= C_TIMEOUT
)
1060 drbd_thread_restart_nowait(&mdev
->tconn
->receiver
);
1062 /* Resume AL writing if we get a connection */
1063 if (os
.conn
< C_CONNECTED
&& ns
.conn
>= C_CONNECTED
)
1064 drbd_resume_al(mdev
);
1066 ascw
= kmalloc(sizeof(*ascw
), GFP_ATOMIC
);
1070 ascw
->flags
= flags
;
1071 ascw
->w
.cb
= w_after_state_ch
;
1072 ascw
->w
.mdev
= mdev
;
1074 drbd_queue_work(&mdev
->tconn
->data
.work
, &ascw
->w
);
1076 dev_err(DEV
, "Could not kmalloc an ascw\n");
1082 static int w_after_state_ch(struct drbd_work
*w
, int unused
)
1084 struct after_state_chg_work
*ascw
=
1085 container_of(w
, struct after_state_chg_work
, w
);
1086 struct drbd_conf
*mdev
= w
->mdev
;
1088 after_state_ch(mdev
, ascw
->os
, ascw
->ns
, ascw
->flags
);
1089 if (ascw
->flags
& CS_WAIT_COMPLETE
) {
1090 D_ASSERT(ascw
->done
!= NULL
);
1091 complete(ascw
->done
);
1098 static void abw_start_sync(struct drbd_conf
*mdev
, int rv
)
1101 dev_err(DEV
, "Writing the bitmap failed not starting resync.\n");
1102 _drbd_request_state(mdev
, NS(conn
, C_CONNECTED
), CS_VERBOSE
);
1106 switch (mdev
->state
.conn
) {
1107 case C_STARTING_SYNC_T
:
1108 _drbd_request_state(mdev
, NS(conn
, C_WF_SYNC_UUID
), CS_VERBOSE
);
1110 case C_STARTING_SYNC_S
:
1111 drbd_start_resync(mdev
, C_SYNC_SOURCE
);
1116 int drbd_bitmap_io_from_worker(struct drbd_conf
*mdev
,
1117 int (*io_fn
)(struct drbd_conf
*),
1118 char *why
, enum bm_flag flags
)
1122 D_ASSERT(current
== mdev
->tconn
->worker
.task
);
1124 /* open coded non-blocking drbd_suspend_io(mdev); */
1125 set_bit(SUSPEND_IO
, &mdev
->flags
);
1127 drbd_bm_lock(mdev
, why
, flags
);
1129 drbd_bm_unlock(mdev
);
1131 drbd_resume_io(mdev
);
1137 * after_state_ch() - Perform after state change actions that may sleep
1138 * @mdev: DRBD device.
1143 static void after_state_ch(struct drbd_conf
*mdev
, union drbd_state os
,
1144 union drbd_state ns
, enum chg_state_flags flags
)
1146 enum drbd_fencing_p fp
;
1147 struct sib_info sib
;
1149 sib
.sib_reason
= SIB_STATE_CHANGE
;
1153 if (os
.conn
!= C_CONNECTED
&& ns
.conn
== C_CONNECTED
) {
1154 clear_bit(CRASHED_PRIMARY
, &mdev
->flags
);
1156 mdev
->p_uuid
[UI_FLAGS
] &= ~((u64
)2);
1160 if (get_ldev(mdev
)) {
1162 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
1167 /* Inform userspace about the change... */
1168 drbd_bcast_event(mdev
, &sib
);
1170 if (!(os
.role
== R_PRIMARY
&& os
.disk
< D_UP_TO_DATE
&& os
.pdsk
< D_UP_TO_DATE
) &&
1171 (ns
.role
== R_PRIMARY
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
))
1172 drbd_khelper(mdev
, "pri-on-incon-degr");
1174 /* Here we have the actions that are performed after a
1175 state change. This function might sleep */
1178 enum drbd_req_event what
= NOTHING
;
1180 if (os
.conn
< C_CONNECTED
&& conn_lowest_conn(mdev
->tconn
) >= C_CONNECTED
)
1183 if ((os
.disk
== D_ATTACHING
|| os
.disk
== D_NEGOTIATING
) &&
1184 conn_lowest_disk(mdev
->tconn
) > D_NEGOTIATING
)
1185 what
= RESTART_FROZEN_DISK_IO
;
1187 if (what
!= NOTHING
) {
1188 spin_lock_irq(&mdev
->tconn
->req_lock
);
1189 _tl_restart(mdev
->tconn
, what
);
1190 _drbd_set_state(_NS(mdev
, susp_nod
, 0), CS_VERBOSE
, NULL
);
1191 spin_unlock_irq(&mdev
->tconn
->req_lock
);
1195 /* Became sync source. With protocol >= 96, we still need to send out
1196 * the sync uuid now. Need to do that before any drbd_send_state, or
1197 * the other side may go "paused sync" before receiving the sync uuids,
1198 * which is unexpected. */
1199 if ((os
.conn
!= C_SYNC_SOURCE
&& os
.conn
!= C_PAUSED_SYNC_S
) &&
1200 (ns
.conn
== C_SYNC_SOURCE
|| ns
.conn
== C_PAUSED_SYNC_S
) &&
1201 mdev
->tconn
->agreed_pro_version
>= 96 && get_ldev(mdev
)) {
1202 drbd_gen_and_send_sync_uuid(mdev
);
1206 /* Do not change the order of the if above and the two below... */
1207 if (os
.pdsk
== D_DISKLESS
&&
1208 ns
.pdsk
> D_DISKLESS
&& ns
.pdsk
!= D_UNKNOWN
) { /* attach on the peer */
1209 drbd_send_uuids(mdev
);
1210 drbd_send_state(mdev
);
1212 /* No point in queuing send_bitmap if we don't have a connection
1213 * anymore, so check also the _current_ state, not only the new state
1214 * at the time this work was queued. */
1215 if (os
.conn
!= C_WF_BITMAP_S
&& ns
.conn
== C_WF_BITMAP_S
&&
1216 mdev
->state
.conn
== C_WF_BITMAP_S
)
1217 drbd_queue_bitmap_io(mdev
, &drbd_send_bitmap
, NULL
,
1218 "send_bitmap (WFBitMapS)",
1219 BM_LOCKED_TEST_ALLOWED
);
1221 /* Lost contact to peer's copy of the data */
1222 if ((os
.pdsk
>= D_INCONSISTENT
&&
1223 os
.pdsk
!= D_UNKNOWN
&&
1224 os
.pdsk
!= D_OUTDATED
)
1225 && (ns
.pdsk
< D_INCONSISTENT
||
1226 ns
.pdsk
== D_UNKNOWN
||
1227 ns
.pdsk
== D_OUTDATED
)) {
1228 if (get_ldev(mdev
)) {
1229 if ((ns
.role
== R_PRIMARY
|| ns
.peer
== R_PRIMARY
) &&
1230 mdev
->ldev
->md
.uuid
[UI_BITMAP
] == 0 && ns
.disk
>= D_UP_TO_DATE
) {
1231 if (drbd_suspended(mdev
)) {
1232 set_bit(NEW_CUR_UUID
, &mdev
->flags
);
1234 drbd_uuid_new_current(mdev
);
1235 drbd_send_uuids(mdev
);
1242 if (ns
.pdsk
< D_INCONSISTENT
&& get_ldev(mdev
)) {
1243 /* D_DISKLESS Peer becomes secondary */
1244 if (os
.peer
== R_PRIMARY
&& ns
.peer
== R_SECONDARY
)
1245 /* We may still be Primary ourselves.
1246 * No harm done if the bitmap still changes,
1247 * redirtied pages will follow later. */
1248 drbd_bitmap_io_from_worker(mdev
, &drbd_bm_write
,
1249 "demote diskless peer", BM_LOCKED_SET_ALLOWED
);
1253 /* Write out all changed bits on demote.
1254 * Though, no need to da that just yet
1255 * if there is a resync going on still */
1256 if (os
.role
== R_PRIMARY
&& ns
.role
== R_SECONDARY
&&
1257 mdev
->state
.conn
<= C_CONNECTED
&& get_ldev(mdev
)) {
1258 /* No changes to the bitmap expected this time, so assert that,
1259 * even though no harm was done if it did change. */
1260 drbd_bitmap_io_from_worker(mdev
, &drbd_bm_write
,
1261 "demote", BM_LOCKED_TEST_ALLOWED
);
1265 /* Last part of the attaching process ... */
1266 if (ns
.conn
>= C_CONNECTED
&&
1267 os
.disk
== D_ATTACHING
&& ns
.disk
== D_NEGOTIATING
) {
1268 drbd_send_sizes(mdev
, 0, 0); /* to start sync... */
1269 drbd_send_uuids(mdev
);
1270 drbd_send_state(mdev
);
1273 /* We want to pause/continue resync, tell peer. */
1274 if (ns
.conn
>= C_CONNECTED
&&
1275 ((os
.aftr_isp
!= ns
.aftr_isp
) ||
1276 (os
.user_isp
!= ns
.user_isp
)))
1277 drbd_send_state(mdev
);
1279 /* In case one of the isp bits got set, suspend other devices. */
1280 if ((!os
.aftr_isp
&& !os
.peer_isp
&& !os
.user_isp
) &&
1281 (ns
.aftr_isp
|| ns
.peer_isp
|| ns
.user_isp
))
1282 suspend_other_sg(mdev
);
1284 /* Make sure the peer gets informed about eventual state
1285 changes (ISP bits) while we were in WFReportParams. */
1286 if (os
.conn
== C_WF_REPORT_PARAMS
&& ns
.conn
>= C_CONNECTED
)
1287 drbd_send_state(mdev
);
1289 if (os
.conn
!= C_AHEAD
&& ns
.conn
== C_AHEAD
)
1290 drbd_send_state(mdev
);
1292 /* We are in the progress to start a full sync... */
1293 if ((os
.conn
!= C_STARTING_SYNC_T
&& ns
.conn
== C_STARTING_SYNC_T
) ||
1294 (os
.conn
!= C_STARTING_SYNC_S
&& ns
.conn
== C_STARTING_SYNC_S
))
1295 /* no other bitmap changes expected during this phase */
1296 drbd_queue_bitmap_io(mdev
,
1297 &drbd_bmio_set_n_write
, &abw_start_sync
,
1298 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED
);
1300 /* We are invalidating our self... */
1301 if (os
.conn
< C_CONNECTED
&& ns
.conn
< C_CONNECTED
&&
1302 os
.disk
> D_INCONSISTENT
&& ns
.disk
== D_INCONSISTENT
)
1303 /* other bitmap operation expected during this phase */
1304 drbd_queue_bitmap_io(mdev
, &drbd_bmio_set_n_write
, NULL
,
1305 "set_n_write from invalidate", BM_LOCKED_MASK
);
1307 /* first half of local IO error, failure to attach,
1308 * or administrative detach */
1309 if (os
.disk
!= D_FAILED
&& ns
.disk
== D_FAILED
) {
1310 enum drbd_io_error_p eh
;
1312 /* corresponding get_ldev was in __drbd_set_state, to serialize
1313 * our cleanup here with the transition to D_DISKLESS,
1314 * so it is safe to dreference ldev here. */
1316 eh
= rcu_dereference(mdev
->ldev
->disk_conf
)->on_io_error
;
1318 was_io_error
= test_and_clear_bit(WAS_IO_ERROR
, &mdev
->flags
);
1320 /* Immediately allow completion of all application IO, that waits
1321 for completion from the local disk. */
1322 tl_abort_disk_io(mdev
);
1324 /* current state still has to be D_FAILED,
1325 * there is only one way out: to D_DISKLESS,
1326 * and that may only happen after our put_ldev below. */
1327 if (mdev
->state
.disk
!= D_FAILED
)
1329 "ASSERT FAILED: disk is %s during detach\n",
1330 drbd_disk_str(mdev
->state
.disk
));
1332 drbd_send_state(mdev
);
1333 drbd_rs_cancel_all(mdev
);
1335 /* In case we want to get something to stable storage still,
1336 * this may be the last chance.
1337 * Following put_ldev may transition to D_DISKLESS. */
1341 if (was_io_error
&& eh
== EP_CALL_HELPER
)
1342 drbd_khelper(mdev
, "local-io-error");
1345 /* second half of local IO error, failure to attach,
1346 * or administrative detach,
1347 * after local_cnt references have reached zero again */
1348 if (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
) {
1349 /* We must still be diskless,
1350 * re-attach has to be serialized with this! */
1351 if (mdev
->state
.disk
!= D_DISKLESS
)
1353 "ASSERT FAILED: disk is %s while going diskless\n",
1354 drbd_disk_str(mdev
->state
.disk
));
1357 mdev
->rs_failed
= 0;
1358 atomic_set(&mdev
->rs_pending_cnt
, 0);
1360 drbd_send_state(mdev
);
1361 /* corresponding get_ldev in __drbd_set_state
1362 * this may finally trigger drbd_ldev_destroy. */
1366 /* Notify peer that I had a local IO error, and did not detached.. */
1367 if (os
.disk
== D_UP_TO_DATE
&& ns
.disk
== D_INCONSISTENT
)
1368 drbd_send_state(mdev
);
1370 /* Disks got bigger while they were detached */
1371 if (ns
.disk
> D_NEGOTIATING
&& ns
.pdsk
> D_NEGOTIATING
&&
1372 test_and_clear_bit(RESYNC_AFTER_NEG
, &mdev
->flags
)) {
1373 if (ns
.conn
== C_CONNECTED
)
1374 resync_after_online_grow(mdev
);
1377 /* A resync finished or aborted, wake paused devices... */
1378 if ((os
.conn
> C_CONNECTED
&& ns
.conn
<= C_CONNECTED
) ||
1379 (os
.peer_isp
&& !ns
.peer_isp
) ||
1380 (os
.user_isp
&& !ns
.user_isp
))
1381 resume_next_sg(mdev
);
1383 /* sync target done with resync. Explicitly notify peer, even though
1384 * it should (at least for non-empty resyncs) already know itself. */
1385 if (os
.disk
< D_UP_TO_DATE
&& os
.conn
>= C_SYNC_SOURCE
&& ns
.conn
== C_CONNECTED
)
1386 drbd_send_state(mdev
);
1388 /* This triggers bitmap writeout of potentially still unwritten pages
1389 * if the resync finished cleanly, or aborted because of peer disk
1390 * failure, or because of connection loss.
1391 * For resync aborted because of local disk failure, we cannot do
1392 * any bitmap writeout anymore.
1393 * No harm done if some bits change during this phase.
1395 if (os
.conn
> C_CONNECTED
&& ns
.conn
<= C_CONNECTED
&& get_ldev(mdev
)) {
1396 drbd_queue_bitmap_io(mdev
, &drbd_bm_write
, NULL
,
1397 "write from resync_finished", BM_LOCKED_SET_ALLOWED
);
1401 if (ns
.disk
== D_DISKLESS
&&
1402 ns
.conn
== C_STANDALONE
&&
1403 ns
.role
== R_SECONDARY
) {
1404 if (os
.aftr_isp
!= ns
.aftr_isp
)
1405 resume_next_sg(mdev
);
1411 struct after_conn_state_chg_work
{
1414 union drbd_state ns_min
;
1415 union drbd_state ns_max
; /* new, max state, over all mdevs */
1416 enum chg_state_flags flags
;
1419 static int w_after_conn_state_ch(struct drbd_work
*w
, int unused
)
1421 struct after_conn_state_chg_work
*acscw
=
1422 container_of(w
, struct after_conn_state_chg_work
, w
);
1423 struct drbd_tconn
*tconn
= w
->tconn
;
1424 enum drbd_conns oc
= acscw
->oc
;
1425 union drbd_state ns_max
= acscw
->ns_max
;
1426 union drbd_state ns_min
= acscw
->ns_min
;
1427 struct drbd_conf
*mdev
;
1432 /* Upon network configuration, we need to start the receiver */
1433 if (oc
== C_STANDALONE
&& ns_max
.conn
== C_UNCONNECTED
)
1434 drbd_thread_start(&tconn
->receiver
);
1436 if (oc
== C_DISCONNECTING
&& ns_max
.conn
== C_STANDALONE
) {
1437 struct net_conf
*old_conf
;
1439 mutex_lock(&tconn
->conf_update
);
1440 old_conf
= tconn
->net_conf
;
1441 tconn
->my_addr_len
= 0;
1442 tconn
->peer_addr_len
= 0;
1443 rcu_assign_pointer(tconn
->net_conf
, NULL
);
1444 conn_free_crypto(tconn
);
1445 mutex_unlock(&tconn
->conf_update
);
1451 if (ns_max
.susp_fen
) {
1452 /* case1: The outdate peer handler is successful: */
1453 if (ns_max
.pdsk
<= D_OUTDATED
) {
1456 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1457 if (test_bit(NEW_CUR_UUID
, &mdev
->flags
)) {
1458 drbd_uuid_new_current(mdev
);
1459 clear_bit(NEW_CUR_UUID
, &mdev
->flags
);
1463 conn_request_state(tconn
,
1464 (union drbd_state
) { { .susp_fen
= 1 } },
1465 (union drbd_state
) { { .susp_fen
= 0 } },
1468 /* case2: The connection was established again: */
1469 if (ns_min
.conn
>= C_CONNECTED
) {
1471 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
1472 clear_bit(NEW_CUR_UUID
, &mdev
->flags
);
1474 spin_lock_irq(&tconn
->req_lock
);
1475 _tl_restart(tconn
, RESEND
);
1476 _conn_request_state(tconn
,
1477 (union drbd_state
) { { .susp_fen
= 1 } },
1478 (union drbd_state
) { { .susp_fen
= 0 } },
1480 spin_unlock_irq(&tconn
->req_lock
);
1483 kref_put(&tconn
->kref
, &conn_destroy
);
1487 void conn_old_common_state(struct drbd_tconn
*tconn
, union drbd_state
*pcs
, enum chg_state_flags
*pf
)
1489 enum chg_state_flags flags
= ~0;
1490 struct drbd_conf
*mdev
;
1491 int vnr
, first_vol
= 1;
1492 union drbd_dev_state os
, cs
= {
1493 { .role
= R_SECONDARY
,
1495 .conn
= tconn
->cstate
,
1501 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1510 if (cs
.role
!= os
.role
)
1511 flags
&= ~CS_DC_ROLE
;
1513 if (cs
.peer
!= os
.peer
)
1514 flags
&= ~CS_DC_PEER
;
1516 if (cs
.conn
!= os
.conn
)
1517 flags
&= ~CS_DC_CONN
;
1519 if (cs
.disk
!= os
.disk
)
1520 flags
&= ~CS_DC_DISK
;
1522 if (cs
.pdsk
!= os
.pdsk
)
1523 flags
&= ~CS_DC_PDSK
;
1532 static enum drbd_state_rv
1533 conn_is_valid_transition(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1534 enum chg_state_flags flags
)
1536 enum drbd_state_rv rv
= SS_SUCCESS
;
1537 union drbd_state ns
, os
;
1538 struct drbd_conf
*mdev
;
1542 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1543 os
= drbd_read_state(mdev
);
1544 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
1546 if (flags
& CS_IGN_OUTD_FAIL
&& ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
)
1552 rv
= is_valid_transition(os
, ns
);
1553 if (rv
< SS_SUCCESS
)
1556 if (!(flags
& CS_HARD
)) {
1557 rv
= is_valid_state(mdev
, ns
);
1558 if (rv
< SS_SUCCESS
) {
1559 if (is_valid_state(mdev
, os
) == rv
)
1560 rv
= is_valid_soft_transition(os
, ns
);
1562 rv
= is_valid_soft_transition(os
, ns
);
1564 if (rv
< SS_SUCCESS
)
1569 if (rv
< SS_SUCCESS
&& flags
& CS_VERBOSE
)
1570 print_st_err(mdev
, os
, ns
, rv
);
1576 conn_set_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1577 union drbd_state
*pns_min
, union drbd_state
*pns_max
, enum chg_state_flags flags
)
1579 union drbd_state ns
, os
, ns_max
= {
1580 { .role
= R_SECONDARY
,
1586 union drbd_state ns_min
= {
1593 struct drbd_conf
*mdev
;
1594 enum drbd_state_rv rv
;
1597 if (mask
.conn
== C_MASK
)
1598 tconn
->cstate
= val
.conn
;
1601 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1602 os
= drbd_read_state(mdev
);
1603 ns
= apply_mask_val(os
, mask
, val
);
1604 ns
= sanitize_state(mdev
, ns
, NULL
);
1606 if (flags
& CS_IGN_OUTD_FAIL
&& ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
)
1609 rv
= __drbd_set_state(mdev
, ns
, flags
, NULL
);
1610 if (rv
< SS_SUCCESS
)
1613 ns
.i
= mdev
->state
.i
;
1614 ns_max
.role
= max_role(ns
.role
, ns_max
.role
);
1615 ns_max
.peer
= max_role(ns
.peer
, ns_max
.peer
);
1616 ns_max
.conn
= max_t(enum drbd_conns
, ns
.conn
, ns_max
.conn
);
1617 ns_max
.disk
= max_t(enum drbd_disk_state
, ns
.disk
, ns_max
.disk
);
1618 ns_max
.pdsk
= max_t(enum drbd_disk_state
, ns
.pdsk
, ns_max
.pdsk
);
1620 ns_min
.role
= min_role(ns
.role
, ns_min
.role
);
1621 ns_min
.peer
= min_role(ns
.peer
, ns_min
.peer
);
1622 ns_min
.conn
= min_t(enum drbd_conns
, ns
.conn
, ns_min
.conn
);
1623 ns_min
.disk
= min_t(enum drbd_disk_state
, ns
.disk
, ns_min
.disk
);
1624 ns_min
.pdsk
= min_t(enum drbd_disk_state
, ns
.pdsk
, ns_min
.pdsk
);
1628 ns_min
.susp
= ns_max
.susp
= tconn
->susp
;
1629 ns_min
.susp_nod
= ns_max
.susp_nod
= tconn
->susp_nod
;
1630 ns_min
.susp_fen
= ns_max
.susp_fen
= tconn
->susp_fen
;
1636 static enum drbd_state_rv
1637 _conn_rq_cond(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
)
1639 enum drbd_state_rv rv
;
1641 if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY
, &tconn
->flags
))
1642 return SS_CW_SUCCESS
;
1644 if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL
, &tconn
->flags
))
1645 return SS_CW_FAILED_BY_PEER
;
1647 spin_lock_irq(&tconn
->req_lock
);
1648 rv
= tconn
->cstate
!= C_WF_REPORT_PARAMS
? SS_CW_NO_NEED
: SS_UNKNOWN_ERROR
;
1650 if (rv
== SS_UNKNOWN_ERROR
)
1651 rv
= conn_is_valid_transition(tconn
, mask
, val
, 0);
1653 if (rv
== SS_SUCCESS
)
1654 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
1656 spin_unlock_irq(&tconn
->req_lock
);
1661 static enum drbd_state_rv
1662 conn_cl_wide(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1663 enum chg_state_flags f
)
1665 enum drbd_state_rv rv
;
1667 spin_unlock_irq(&tconn
->req_lock
);
1668 mutex_lock(&tconn
->cstate_mutex
);
1670 if (conn_send_state_req(tconn
, mask
, val
)) {
1671 rv
= SS_CW_FAILED_BY_PEER
;
1672 /* if (f & CS_VERBOSE)
1673 print_st_err(mdev, os, ns, rv); */
1677 wait_event(tconn
->ping_wait
, (rv
= _conn_rq_cond(tconn
, mask
, val
)));
1680 mutex_unlock(&tconn
->cstate_mutex
);
1681 spin_lock_irq(&tconn
->req_lock
);
1687 _conn_request_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1688 enum chg_state_flags flags
)
1690 enum drbd_state_rv rv
= SS_SUCCESS
;
1691 struct after_conn_state_chg_work
*acscw
;
1692 enum drbd_conns oc
= tconn
->cstate
;
1693 union drbd_state ns_max
, ns_min
, os
;
1695 rv
= is_valid_conn_transition(oc
, val
.conn
);
1696 if (rv
< SS_SUCCESS
)
1699 rv
= conn_is_valid_transition(tconn
, mask
, val
, flags
);
1700 if (rv
< SS_SUCCESS
)
1703 if (oc
== C_WF_REPORT_PARAMS
&& val
.conn
== C_DISCONNECTING
&&
1704 !(flags
& (CS_LOCAL_ONLY
| CS_HARD
))) {
1705 rv
= conn_cl_wide(tconn
, mask
, val
, flags
);
1706 if (rv
< SS_SUCCESS
)
1710 conn_old_common_state(tconn
, &os
, &flags
);
1711 flags
|= CS_DC_SUSP
;
1712 conn_set_state(tconn
, mask
, val
, &ns_min
, &ns_max
, flags
);
1713 conn_pr_state_change(tconn
, os
, ns_max
, flags
);
1715 acscw
= kmalloc(sizeof(*acscw
), GFP_ATOMIC
);
1717 acscw
->oc
= os
.conn
;
1718 acscw
->ns_min
= ns_min
;
1719 acscw
->ns_max
= ns_max
;
1720 acscw
->flags
= flags
;
1721 acscw
->w
.cb
= w_after_conn_state_ch
;
1722 kref_get(&tconn
->kref
);
1723 acscw
->w
.tconn
= tconn
;
1724 drbd_queue_work(&tconn
->data
.work
, &acscw
->w
);
1726 conn_err(tconn
, "Could not kmalloc an acscw\n");
1734 conn_request_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1735 enum chg_state_flags flags
)
1737 enum drbd_state_rv rv
;
1739 spin_lock_irq(&tconn
->req_lock
);
1740 rv
= _conn_request_state(tconn
, mask
, val
, flags
);
1741 spin_unlock_irq(&tconn
->req_lock
);