4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
28 #include <linux/drbd_limits.h>
33 extern void tl_apply(struct drbd_conf
*mdev
, enum drbd_req_event what
);
35 struct after_state_chg_work
{
39 enum chg_state_flags flags
;
40 struct completion
*done
;
43 enum sanitize_state_warnings
{
45 ABORTED_ONLINE_VERIFY
,
47 CONNECTION_LOST_NEGOTIATING
,
48 IMPLICITLY_UPGRADED_DISK
,
49 IMPLICITLY_UPGRADED_PDSK
,
52 static int w_after_state_ch(struct drbd_work
*w
, int unused
);
53 static void after_state_ch(struct drbd_conf
*mdev
, union drbd_state os
,
54 union drbd_state ns
, enum chg_state_flags flags
);
55 static enum drbd_state_rv
is_valid_state(struct drbd_conf
*, union drbd_state
);
56 static enum drbd_state_rv
is_valid_soft_transition(union drbd_state
, union drbd_state
);
57 static enum drbd_state_rv
is_valid_transition(union drbd_state os
, union drbd_state ns
);
58 static union drbd_state
sanitize_state(struct drbd_conf
*mdev
, union drbd_state ns
,
59 enum sanitize_state_warnings
*warn
);
61 static inline bool is_susp(union drbd_state s
)
63 return s
.susp
|| s
.susp_nod
|| s
.susp_fen
;
66 bool conn_all_vols_unconf(struct drbd_tconn
*tconn
)
68 struct drbd_conf
*mdev
;
73 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
74 if (mdev
->state
.disk
!= D_DISKLESS
||
75 mdev
->state
.conn
!= C_STANDALONE
||
76 mdev
->state
.role
!= R_SECONDARY
) {
86 /* Unfortunately the states where not correctly ordered, when
87 they where defined. therefore can not use max_t() here. */
88 static enum drbd_role
max_role(enum drbd_role role1
, enum drbd_role role2
)
90 if (role1
== R_PRIMARY
|| role2
== R_PRIMARY
)
92 if (role1
== R_SECONDARY
|| role2
== R_SECONDARY
)
96 static enum drbd_role
min_role(enum drbd_role role1
, enum drbd_role role2
)
98 if (role1
== R_UNKNOWN
|| role2
== R_UNKNOWN
)
100 if (role1
== R_SECONDARY
|| role2
== R_SECONDARY
)
105 enum drbd_role
conn_highest_role(struct drbd_tconn
*tconn
)
107 enum drbd_role role
= R_UNKNOWN
;
108 struct drbd_conf
*mdev
;
112 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
113 role
= max_role(role
, mdev
->state
.role
);
119 enum drbd_role
conn_highest_peer(struct drbd_tconn
*tconn
)
121 enum drbd_role peer
= R_UNKNOWN
;
122 struct drbd_conf
*mdev
;
126 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
127 peer
= max_role(peer
, mdev
->state
.peer
);
133 enum drbd_disk_state
conn_highest_disk(struct drbd_tconn
*tconn
)
135 enum drbd_disk_state ds
= D_DISKLESS
;
136 struct drbd_conf
*mdev
;
140 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
141 ds
= max_t(enum drbd_disk_state
, ds
, mdev
->state
.disk
);
147 enum drbd_disk_state
conn_lowest_disk(struct drbd_tconn
*tconn
)
149 enum drbd_disk_state ds
= D_MASK
;
150 struct drbd_conf
*mdev
;
154 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
155 ds
= min_t(enum drbd_disk_state
, ds
, mdev
->state
.disk
);
161 enum drbd_disk_state
conn_highest_pdsk(struct drbd_tconn
*tconn
)
163 enum drbd_disk_state ds
= D_DISKLESS
;
164 struct drbd_conf
*mdev
;
168 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
169 ds
= max_t(enum drbd_disk_state
, ds
, mdev
->state
.pdsk
);
175 enum drbd_conns
conn_lowest_conn(struct drbd_tconn
*tconn
)
177 enum drbd_conns conn
= C_MASK
;
178 struct drbd_conf
*mdev
;
182 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
183 conn
= min_t(enum drbd_conns
, conn
, mdev
->state
.conn
);
190 * cl_wide_st_chg() - true if the state change is a cluster wide one
191 * @mdev: DRBD device.
192 * @os: old (current) state.
193 * @ns: new (wanted) state.
195 static int cl_wide_st_chg(struct drbd_conf
*mdev
,
196 union drbd_state os
, union drbd_state ns
)
198 return (os
.conn
>= C_CONNECTED
&& ns
.conn
>= C_CONNECTED
&&
199 ((os
.role
!= R_PRIMARY
&& ns
.role
== R_PRIMARY
) ||
200 (os
.conn
!= C_STARTING_SYNC_T
&& ns
.conn
== C_STARTING_SYNC_T
) ||
201 (os
.conn
!= C_STARTING_SYNC_S
&& ns
.conn
== C_STARTING_SYNC_S
) ||
202 (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
))) ||
203 (os
.conn
>= C_CONNECTED
&& ns
.conn
== C_DISCONNECTING
) ||
204 (os
.conn
== C_CONNECTED
&& ns
.conn
== C_VERIFY_S
);
207 static union drbd_state
208 apply_mask_val(union drbd_state os
, union drbd_state mask
, union drbd_state val
)
211 ns
.i
= (os
.i
& ~mask
.i
) | val
.i
;
216 drbd_change_state(struct drbd_conf
*mdev
, enum chg_state_flags f
,
217 union drbd_state mask
, union drbd_state val
)
221 enum drbd_state_rv rv
;
223 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
224 ns
= apply_mask_val(drbd_read_state(mdev
), mask
, val
);
225 rv
= _drbd_set_state(mdev
, ns
, f
, NULL
);
226 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
232 * drbd_force_state() - Impose a change which happens outside our control on our state
233 * @mdev: DRBD device.
234 * @mask: mask of state bits to change.
235 * @val: value of new state bits.
237 void drbd_force_state(struct drbd_conf
*mdev
,
238 union drbd_state mask
, union drbd_state val
)
240 drbd_change_state(mdev
, CS_HARD
, mask
, val
);
243 static enum drbd_state_rv
244 _req_st_cond(struct drbd_conf
*mdev
, union drbd_state mask
,
245 union drbd_state val
)
247 union drbd_state os
, ns
;
249 enum drbd_state_rv rv
;
251 if (test_and_clear_bit(CL_ST_CHG_SUCCESS
, &mdev
->flags
))
252 return SS_CW_SUCCESS
;
254 if (test_and_clear_bit(CL_ST_CHG_FAIL
, &mdev
->flags
))
255 return SS_CW_FAILED_BY_PEER
;
257 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
258 os
= drbd_read_state(mdev
);
259 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
260 rv
= is_valid_transition(os
, ns
);
261 if (rv
== SS_SUCCESS
)
262 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
264 if (!cl_wide_st_chg(mdev
, os
, ns
))
266 if (rv
== SS_UNKNOWN_ERROR
) {
267 rv
= is_valid_state(mdev
, ns
);
268 if (rv
== SS_SUCCESS
) {
269 rv
= is_valid_soft_transition(os
, ns
);
270 if (rv
== SS_SUCCESS
)
271 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
274 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
280 * drbd_req_state() - Perform an eventually cluster wide state change
281 * @mdev: DRBD device.
282 * @mask: mask of state bits to change.
283 * @val: value of new state bits.
286 * Should not be called directly, use drbd_request_state() or
287 * _drbd_request_state().
289 static enum drbd_state_rv
290 drbd_req_state(struct drbd_conf
*mdev
, union drbd_state mask
,
291 union drbd_state val
, enum chg_state_flags f
)
293 struct completion done
;
295 union drbd_state os
, ns
;
296 enum drbd_state_rv rv
;
298 init_completion(&done
);
300 if (f
& CS_SERIALIZE
)
301 mutex_lock(mdev
->state_mutex
);
303 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
304 os
= drbd_read_state(mdev
);
305 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
306 rv
= is_valid_transition(os
, ns
);
307 if (rv
< SS_SUCCESS
) {
308 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
312 if (cl_wide_st_chg(mdev
, os
, ns
)) {
313 rv
= is_valid_state(mdev
, ns
);
314 if (rv
== SS_SUCCESS
)
315 rv
= is_valid_soft_transition(os
, ns
);
316 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
318 if (rv
< SS_SUCCESS
) {
320 print_st_err(mdev
, os
, ns
, rv
);
324 if (drbd_send_state_req(mdev
, mask
, val
)) {
325 rv
= SS_CW_FAILED_BY_PEER
;
327 print_st_err(mdev
, os
, ns
, rv
);
331 wait_event(mdev
->state_wait
,
332 (rv
= _req_st_cond(mdev
, mask
, val
)));
334 if (rv
< SS_SUCCESS
) {
336 print_st_err(mdev
, os
, ns
, rv
);
339 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
340 ns
= apply_mask_val(drbd_read_state(mdev
), mask
, val
);
341 rv
= _drbd_set_state(mdev
, ns
, f
, &done
);
343 rv
= _drbd_set_state(mdev
, ns
, f
, &done
);
346 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
348 if (f
& CS_WAIT_COMPLETE
&& rv
== SS_SUCCESS
) {
349 D_ASSERT(current
!= mdev
->tconn
->worker
.task
);
350 wait_for_completion(&done
);
354 if (f
& CS_SERIALIZE
)
355 mutex_unlock(mdev
->state_mutex
);
361 * _drbd_request_state() - Request a state change (with flags)
362 * @mdev: DRBD device.
363 * @mask: mask of state bits to change.
364 * @val: value of new state bits.
367 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
368 * flag, or when logging of failed state change requests is not desired.
371 _drbd_request_state(struct drbd_conf
*mdev
, union drbd_state mask
,
372 union drbd_state val
, enum chg_state_flags f
)
374 enum drbd_state_rv rv
;
376 wait_event(mdev
->state_wait
,
377 (rv
= drbd_req_state(mdev
, mask
, val
, f
)) != SS_IN_TRANSIENT_STATE
);
382 static void print_st(struct drbd_conf
*mdev
, char *name
, union drbd_state ns
)
384 dev_err(DEV
, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
386 drbd_conn_str(ns
.conn
),
387 drbd_role_str(ns
.role
),
388 drbd_role_str(ns
.peer
),
389 drbd_disk_str(ns
.disk
),
390 drbd_disk_str(ns
.pdsk
),
391 is_susp(ns
) ? 's' : 'r',
392 ns
.aftr_isp
? 'a' : '-',
393 ns
.peer_isp
? 'p' : '-',
394 ns
.user_isp
? 'u' : '-',
395 ns
.susp_fen
? 'F' : '-',
396 ns
.susp_nod
? 'N' : '-'
400 void print_st_err(struct drbd_conf
*mdev
, union drbd_state os
,
401 union drbd_state ns
, enum drbd_state_rv err
)
403 if (err
== SS_IN_TRANSIENT_STATE
)
405 dev_err(DEV
, "State change failed: %s\n", drbd_set_st_err_str(err
));
406 print_st(mdev
, " state", os
);
407 print_st(mdev
, "wanted", ns
);
410 static long print_state_change(char *pb
, union drbd_state os
, union drbd_state ns
,
411 enum chg_state_flags flags
)
417 if (ns
.role
!= os
.role
&& flags
& CS_DC_ROLE
)
418 pbp
+= sprintf(pbp
, "role( %s -> %s ) ",
419 drbd_role_str(os
.role
),
420 drbd_role_str(ns
.role
));
421 if (ns
.peer
!= os
.peer
&& flags
& CS_DC_PEER
)
422 pbp
+= sprintf(pbp
, "peer( %s -> %s ) ",
423 drbd_role_str(os
.peer
),
424 drbd_role_str(ns
.peer
));
425 if (ns
.conn
!= os
.conn
&& flags
& CS_DC_CONN
)
426 pbp
+= sprintf(pbp
, "conn( %s -> %s ) ",
427 drbd_conn_str(os
.conn
),
428 drbd_conn_str(ns
.conn
));
429 if (ns
.disk
!= os
.disk
&& flags
& CS_DC_DISK
)
430 pbp
+= sprintf(pbp
, "disk( %s -> %s ) ",
431 drbd_disk_str(os
.disk
),
432 drbd_disk_str(ns
.disk
));
433 if (ns
.pdsk
!= os
.pdsk
&& flags
& CS_DC_PDSK
)
434 pbp
+= sprintf(pbp
, "pdsk( %s -> %s ) ",
435 drbd_disk_str(os
.pdsk
),
436 drbd_disk_str(ns
.pdsk
));
441 static void drbd_pr_state_change(struct drbd_conf
*mdev
, union drbd_state os
, union drbd_state ns
,
442 enum chg_state_flags flags
)
447 pbp
+= print_state_change(pbp
, os
, ns
, flags
^ CS_DC_MASK
);
449 if (ns
.aftr_isp
!= os
.aftr_isp
)
450 pbp
+= sprintf(pbp
, "aftr_isp( %d -> %d ) ",
453 if (ns
.peer_isp
!= os
.peer_isp
)
454 pbp
+= sprintf(pbp
, "peer_isp( %d -> %d ) ",
457 if (ns
.user_isp
!= os
.user_isp
)
458 pbp
+= sprintf(pbp
, "user_isp( %d -> %d ) ",
463 dev_info(DEV
, "%s\n", pb
);
466 static void conn_pr_state_change(struct drbd_tconn
*tconn
, union drbd_state os
, union drbd_state ns
,
467 enum chg_state_flags flags
)
472 pbp
+= print_state_change(pbp
, os
, ns
, flags
);
474 if (is_susp(ns
) != is_susp(os
) && flags
& CS_DC_SUSP
)
475 pbp
+= sprintf(pbp
, "susp( %d -> %d ) ",
480 conn_info(tconn
, "%s\n", pb
);
485 * is_valid_state() - Returns an SS_ error code if ns is not valid
486 * @mdev: DRBD device.
487 * @ns: State to consider.
489 static enum drbd_state_rv
490 is_valid_state(struct drbd_conf
*mdev
, union drbd_state ns
)
492 /* See drbd_state_sw_errors in drbd_strings.c */
494 enum drbd_fencing_p fp
;
495 enum drbd_state_rv rv
= SS_SUCCESS
;
500 if (get_ldev(mdev
)) {
501 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
505 nc
= rcu_dereference(mdev
->tconn
->net_conf
);
507 if (!nc
->two_primaries
&& ns
.role
== R_PRIMARY
) {
508 if (ns
.peer
== R_PRIMARY
)
509 rv
= SS_TWO_PRIMARIES
;
510 else if (conn_highest_peer(mdev
->tconn
) == R_PRIMARY
)
511 rv
= SS_O_VOL_PEER_PRI
;
516 /* already found a reason to abort */;
517 else if (ns
.role
== R_SECONDARY
&& mdev
->open_cnt
)
518 rv
= SS_DEVICE_IN_USE
;
520 else if (ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.disk
< D_UP_TO_DATE
)
521 rv
= SS_NO_UP_TO_DATE_DISK
;
523 else if (fp
>= FP_RESOURCE
&&
524 ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.pdsk
>= D_UNKNOWN
)
527 else if (ns
.role
== R_PRIMARY
&& ns
.disk
<= D_INCONSISTENT
&& ns
.pdsk
<= D_INCONSISTENT
)
528 rv
= SS_NO_UP_TO_DATE_DISK
;
530 else if (ns
.conn
> C_CONNECTED
&& ns
.disk
< D_INCONSISTENT
)
531 rv
= SS_NO_LOCAL_DISK
;
533 else if (ns
.conn
> C_CONNECTED
&& ns
.pdsk
< D_INCONSISTENT
)
534 rv
= SS_NO_REMOTE_DISK
;
536 else if (ns
.conn
> C_CONNECTED
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
)
537 rv
= SS_NO_UP_TO_DATE_DISK
;
539 else if ((ns
.conn
== C_CONNECTED
||
540 ns
.conn
== C_WF_BITMAP_S
||
541 ns
.conn
== C_SYNC_SOURCE
||
542 ns
.conn
== C_PAUSED_SYNC_S
) &&
543 ns
.disk
== D_OUTDATED
)
544 rv
= SS_CONNECTED_OUTDATES
;
546 else if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
547 (nc
->verify_alg
[0] == 0))
548 rv
= SS_NO_VERIFY_ALG
;
550 else if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
551 mdev
->tconn
->agreed_pro_version
< 88)
552 rv
= SS_NOT_SUPPORTED
;
554 else if (ns
.conn
>= C_CONNECTED
&& ns
.pdsk
== D_UNKNOWN
)
555 rv
= SS_CONNECTED_OUTDATES
;
563 * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
564 * This function limits state transitions that may be declined by DRBD. I.e.
565 * user requests (aka soft transitions).
566 * @mdev: DRBD device.
570 static enum drbd_state_rv
571 is_valid_soft_transition(union drbd_state os
, union drbd_state ns
)
573 enum drbd_state_rv rv
= SS_SUCCESS
;
575 if ((ns
.conn
== C_STARTING_SYNC_T
|| ns
.conn
== C_STARTING_SYNC_S
) &&
576 os
.conn
> C_CONNECTED
)
577 rv
= SS_RESYNC_RUNNING
;
579 if (ns
.conn
== C_DISCONNECTING
&& os
.conn
== C_STANDALONE
)
580 rv
= SS_ALREADY_STANDALONE
;
582 if (ns
.disk
> D_ATTACHING
&& os
.disk
== D_DISKLESS
)
585 if (ns
.conn
== C_WF_CONNECTION
&& os
.conn
< C_UNCONNECTED
)
586 rv
= SS_NO_NET_CONFIG
;
588 if (ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
&& os
.disk
!= D_ATTACHING
)
589 rv
= SS_LOWER_THAN_OUTDATED
;
591 if (ns
.conn
== C_DISCONNECTING
&& os
.conn
== C_UNCONNECTED
)
592 rv
= SS_IN_TRANSIENT_STATE
;
594 /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
595 rv = SS_IN_TRANSIENT_STATE; */
597 if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) && os
.conn
< C_CONNECTED
)
598 rv
= SS_NEED_CONNECTION
;
600 if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
601 ns
.conn
!= os
.conn
&& os
.conn
> C_CONNECTED
)
602 rv
= SS_RESYNC_RUNNING
;
604 if ((ns
.conn
== C_STARTING_SYNC_S
|| ns
.conn
== C_STARTING_SYNC_T
) &&
605 os
.conn
< C_CONNECTED
)
606 rv
= SS_NEED_CONNECTION
;
608 if ((ns
.conn
== C_SYNC_TARGET
|| ns
.conn
== C_SYNC_SOURCE
)
609 && os
.conn
< C_WF_REPORT_PARAMS
)
610 rv
= SS_NEED_CONNECTION
; /* No NetworkFailure -> SyncTarget etc... */
615 static enum drbd_state_rv
616 is_valid_conn_transition(enum drbd_conns oc
, enum drbd_conns nc
)
618 /* no change -> nothing to do, at least for the connection part */
620 return SS_NOTHING_TO_DO
;
622 /* disconnect of an unconfigured connection does not make sense */
623 if (oc
== C_STANDALONE
&& nc
== C_DISCONNECTING
)
624 return SS_ALREADY_STANDALONE
;
626 /* from C_STANDALONE, we start with C_UNCONNECTED */
627 if (oc
== C_STANDALONE
&& nc
!= C_UNCONNECTED
)
628 return SS_NEED_CONNECTION
;
630 /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
631 if (oc
>= C_TIMEOUT
&& oc
<= C_TEAR_DOWN
&& nc
!= C_UNCONNECTED
&& nc
!= C_DISCONNECTING
)
632 return SS_IN_TRANSIENT_STATE
;
634 /* After C_DISCONNECTING only C_STANDALONE may follow */
635 if (oc
== C_DISCONNECTING
&& nc
!= C_STANDALONE
)
636 return SS_IN_TRANSIENT_STATE
;
643 * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
644 * This limits hard state transitions. Hard state transitions are facts there are
645 * imposed on DRBD by the environment. E.g. disk broke or network broke down.
646 * But those hard state transitions are still not allowed to do everything.
650 static enum drbd_state_rv
651 is_valid_transition(union drbd_state os
, union drbd_state ns
)
653 enum drbd_state_rv rv
;
655 rv
= is_valid_conn_transition(os
.conn
, ns
.conn
);
657 /* we cannot fail (again) if we already detached */
658 if (ns
.disk
== D_FAILED
&& os
.disk
== D_DISKLESS
)
664 static void print_sanitize_warnings(struct drbd_conf
*mdev
, enum sanitize_state_warnings warn
)
666 static const char *msg_table
[] = {
668 [ABORTED_ONLINE_VERIFY
] = "Online-verify aborted.",
669 [ABORTED_RESYNC
] = "Resync aborted.",
670 [CONNECTION_LOST_NEGOTIATING
] = "Connection lost while negotiating, no data!",
671 [IMPLICITLY_UPGRADED_DISK
] = "Implicitly upgraded disk",
672 [IMPLICITLY_UPGRADED_PDSK
] = "Implicitly upgraded pdsk",
675 if (warn
!= NO_WARNING
)
676 dev_warn(DEV
, "%s\n", msg_table
[warn
]);
680 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
681 * @mdev: DRBD device.
686 * When we loose connection, we have to set the state of the peers disk (pdsk)
687 * to D_UNKNOWN. This rule and many more along those lines are in this function.
689 static union drbd_state
sanitize_state(struct drbd_conf
*mdev
, union drbd_state ns
,
690 enum sanitize_state_warnings
*warn
)
692 enum drbd_fencing_p fp
;
693 enum drbd_disk_state disk_min
, disk_max
, pdsk_min
, pdsk_max
;
699 if (get_ldev(mdev
)) {
701 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
706 /* Implications from connection to peer and peer_isp */
707 if (ns
.conn
< C_CONNECTED
) {
710 if (ns
.pdsk
> D_UNKNOWN
|| ns
.pdsk
< D_INCONSISTENT
)
714 /* Clear the aftr_isp when becoming unconfigured */
715 if (ns
.conn
== C_STANDALONE
&& ns
.disk
== D_DISKLESS
&& ns
.role
== R_SECONDARY
)
718 /* An implication of the disk states onto the connection state */
719 /* Abort resync if a disk fails/detaches */
720 if (ns
.conn
> C_CONNECTED
&& (ns
.disk
<= D_FAILED
|| ns
.pdsk
<= D_FAILED
)) {
722 *warn
= ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
?
723 ABORTED_ONLINE_VERIFY
: ABORTED_RESYNC
;
724 ns
.conn
= C_CONNECTED
;
727 /* Connection breaks down before we finished "Negotiating" */
728 if (ns
.conn
< C_CONNECTED
&& ns
.disk
== D_NEGOTIATING
&&
729 get_ldev_if_state(mdev
, D_NEGOTIATING
)) {
730 if (mdev
->ed_uuid
== mdev
->ldev
->md
.uuid
[UI_CURRENT
]) {
731 ns
.disk
= mdev
->new_state_tmp
.disk
;
732 ns
.pdsk
= mdev
->new_state_tmp
.pdsk
;
735 *warn
= CONNECTION_LOST_NEGOTIATING
;
736 ns
.disk
= D_DISKLESS
;
742 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
743 if (ns
.conn
>= C_CONNECTED
&& ns
.conn
< C_AHEAD
) {
744 if (ns
.disk
== D_CONSISTENT
|| ns
.disk
== D_OUTDATED
)
745 ns
.disk
= D_UP_TO_DATE
;
746 if (ns
.pdsk
== D_CONSISTENT
|| ns
.pdsk
== D_OUTDATED
)
747 ns
.pdsk
= D_UP_TO_DATE
;
750 /* Implications of the connection stat on the disk states */
751 disk_min
= D_DISKLESS
;
752 disk_max
= D_UP_TO_DATE
;
753 pdsk_min
= D_INCONSISTENT
;
754 pdsk_max
= D_UNKNOWN
;
755 switch ((enum drbd_conns
)ns
.conn
) {
757 case C_PAUSED_SYNC_T
:
758 case C_STARTING_SYNC_T
:
761 disk_min
= D_INCONSISTENT
;
762 disk_max
= D_OUTDATED
;
763 pdsk_min
= D_UP_TO_DATE
;
764 pdsk_max
= D_UP_TO_DATE
;
768 disk_min
= D_UP_TO_DATE
;
769 disk_max
= D_UP_TO_DATE
;
770 pdsk_min
= D_UP_TO_DATE
;
771 pdsk_max
= D_UP_TO_DATE
;
774 disk_min
= D_DISKLESS
;
775 disk_max
= D_UP_TO_DATE
;
776 pdsk_min
= D_DISKLESS
;
777 pdsk_max
= D_UP_TO_DATE
;
780 case C_PAUSED_SYNC_S
:
781 case C_STARTING_SYNC_S
:
783 disk_min
= D_UP_TO_DATE
;
784 disk_max
= D_UP_TO_DATE
;
785 pdsk_min
= D_INCONSISTENT
;
786 pdsk_max
= D_CONSISTENT
; /* D_OUTDATED would be nice. But explicit outdate necessary*/
789 disk_min
= D_INCONSISTENT
;
790 disk_max
= D_INCONSISTENT
;
791 pdsk_min
= D_UP_TO_DATE
;
792 pdsk_max
= D_UP_TO_DATE
;
795 disk_min
= D_UP_TO_DATE
;
796 disk_max
= D_UP_TO_DATE
;
797 pdsk_min
= D_INCONSISTENT
;
798 pdsk_max
= D_INCONSISTENT
;
801 case C_DISCONNECTING
:
805 case C_NETWORK_FAILURE
:
806 case C_PROTOCOL_ERROR
:
808 case C_WF_CONNECTION
:
809 case C_WF_REPORT_PARAMS
:
813 if (ns
.disk
> disk_max
)
816 if (ns
.disk
< disk_min
) {
818 *warn
= IMPLICITLY_UPGRADED_DISK
;
821 if (ns
.pdsk
> pdsk_max
)
824 if (ns
.pdsk
< pdsk_min
) {
826 *warn
= IMPLICITLY_UPGRADED_PDSK
;
830 if (fp
== FP_STONITH
&&
831 (ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.pdsk
> D_OUTDATED
))
832 ns
.susp_fen
= 1; /* Suspend IO while fence-peer handler runs (peer lost) */
834 if (mdev
->tconn
->res_opts
.on_no_data
== OND_SUSPEND_IO
&&
835 (ns
.role
== R_PRIMARY
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
))
836 ns
.susp_nod
= 1; /* Suspend IO while no data available (no accessible data available) */
838 if (ns
.aftr_isp
|| ns
.peer_isp
|| ns
.user_isp
) {
839 if (ns
.conn
== C_SYNC_SOURCE
)
840 ns
.conn
= C_PAUSED_SYNC_S
;
841 if (ns
.conn
== C_SYNC_TARGET
)
842 ns
.conn
= C_PAUSED_SYNC_T
;
844 if (ns
.conn
== C_PAUSED_SYNC_S
)
845 ns
.conn
= C_SYNC_SOURCE
;
846 if (ns
.conn
== C_PAUSED_SYNC_T
)
847 ns
.conn
= C_SYNC_TARGET
;
853 void drbd_resume_al(struct drbd_conf
*mdev
)
855 if (test_and_clear_bit(AL_SUSPENDED
, &mdev
->flags
))
856 dev_info(DEV
, "Resumed AL updates\n");
859 /* helper for __drbd_set_state */
860 static void set_ov_position(struct drbd_conf
*mdev
, enum drbd_conns cs
)
862 if (mdev
->tconn
->agreed_pro_version
< 90)
863 mdev
->ov_start_sector
= 0;
864 mdev
->rs_total
= drbd_bm_bits(mdev
);
865 mdev
->ov_position
= 0;
866 if (cs
== C_VERIFY_T
) {
867 /* starting online verify from an arbitrary position
868 * does not fit well into the existing protocol.
869 * on C_VERIFY_T, we initialize ov_left and friends
870 * implicitly in receive_DataRequest once the
871 * first P_OV_REQUEST is received */
872 mdev
->ov_start_sector
= ~(sector_t
)0;
874 unsigned long bit
= BM_SECT_TO_BIT(mdev
->ov_start_sector
);
875 if (bit
>= mdev
->rs_total
) {
876 mdev
->ov_start_sector
=
877 BM_BIT_TO_SECT(mdev
->rs_total
- 1);
880 mdev
->rs_total
-= bit
;
881 mdev
->ov_position
= mdev
->ov_start_sector
;
883 mdev
->ov_left
= mdev
->rs_total
;
887 * __drbd_set_state() - Set a new DRBD state
888 * @mdev: DRBD device.
891 * @done: Optional completion, that will get completed after the after_state_ch() finished
893 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
896 __drbd_set_state(struct drbd_conf
*mdev
, union drbd_state ns
,
897 enum chg_state_flags flags
, struct completion
*done
)
900 enum drbd_state_rv rv
= SS_SUCCESS
;
901 enum sanitize_state_warnings ssw
;
902 struct after_state_chg_work
*ascw
;
904 os
= drbd_read_state(mdev
);
906 ns
= sanitize_state(mdev
, ns
, &ssw
);
908 return SS_NOTHING_TO_DO
;
910 rv
= is_valid_transition(os
, ns
);
914 if (!(flags
& CS_HARD
)) {
915 /* pre-state-change checks ; only look at ns */
916 /* See drbd_state_sw_errors in drbd_strings.c */
918 rv
= is_valid_state(mdev
, ns
);
919 if (rv
< SS_SUCCESS
) {
920 /* If the old state was illegal as well, then let
923 if (is_valid_state(mdev
, os
) == rv
)
924 rv
= is_valid_soft_transition(os
, ns
);
926 rv
= is_valid_soft_transition(os
, ns
);
929 if (rv
< SS_SUCCESS
) {
930 if (flags
& CS_VERBOSE
)
931 print_st_err(mdev
, os
, ns
, rv
);
935 print_sanitize_warnings(mdev
, ssw
);
937 drbd_pr_state_change(mdev
, os
, ns
, flags
);
939 /* Display changes to the susp* flags that where caused by the call to
940 sanitize_state(). Only display it here if we where not called from
941 _conn_request_state() */
942 if (!(flags
& CS_DC_SUSP
))
943 conn_pr_state_change(mdev
->tconn
, os
, ns
, (flags
& ~CS_DC_MASK
) | CS_DC_SUSP
);
945 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
946 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
947 * drbd_ldev_destroy() won't happen before our corresponding
948 * after_state_ch works run, where we put_ldev again. */
949 if ((os
.disk
!= D_FAILED
&& ns
.disk
== D_FAILED
) ||
950 (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
))
951 atomic_inc(&mdev
->local_cnt
);
953 mdev
->state
.i
= ns
.i
;
954 mdev
->tconn
->susp
= ns
.susp
;
955 mdev
->tconn
->susp_nod
= ns
.susp_nod
;
956 mdev
->tconn
->susp_fen
= ns
.susp_fen
;
958 if (os
.disk
== D_ATTACHING
&& ns
.disk
>= D_NEGOTIATING
)
959 drbd_print_uuids(mdev
, "attached to UUIDs");
961 wake_up(&mdev
->misc_wait
);
962 wake_up(&mdev
->state_wait
);
963 wake_up(&mdev
->tconn
->ping_wait
);
965 /* aborted verify run. log the last position */
966 if ((os
.conn
== C_VERIFY_S
|| os
.conn
== C_VERIFY_T
) &&
967 ns
.conn
< C_CONNECTED
) {
968 mdev
->ov_start_sector
=
969 BM_BIT_TO_SECT(drbd_bm_bits(mdev
) - mdev
->ov_left
);
970 dev_info(DEV
, "Online Verify reached sector %llu\n",
971 (unsigned long long)mdev
->ov_start_sector
);
974 if ((os
.conn
== C_PAUSED_SYNC_T
|| os
.conn
== C_PAUSED_SYNC_S
) &&
975 (ns
.conn
== C_SYNC_TARGET
|| ns
.conn
== C_SYNC_SOURCE
)) {
976 dev_info(DEV
, "Syncer continues.\n");
977 mdev
->rs_paused
+= (long)jiffies
978 -(long)mdev
->rs_mark_time
[mdev
->rs_last_mark
];
979 if (ns
.conn
== C_SYNC_TARGET
)
980 mod_timer(&mdev
->resync_timer
, jiffies
);
983 if ((os
.conn
== C_SYNC_TARGET
|| os
.conn
== C_SYNC_SOURCE
) &&
984 (ns
.conn
== C_PAUSED_SYNC_T
|| ns
.conn
== C_PAUSED_SYNC_S
)) {
985 dev_info(DEV
, "Resync suspended\n");
986 mdev
->rs_mark_time
[mdev
->rs_last_mark
] = jiffies
;
989 if (os
.conn
== C_CONNECTED
&&
990 (ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
)) {
991 unsigned long now
= jiffies
;
994 set_ov_position(mdev
, ns
.conn
);
995 mdev
->rs_start
= now
;
996 mdev
->rs_last_events
= 0;
997 mdev
->rs_last_sect_ev
= 0;
998 mdev
->ov_last_oos_size
= 0;
999 mdev
->ov_last_oos_start
= 0;
1001 for (i
= 0; i
< DRBD_SYNC_MARKS
; i
++) {
1002 mdev
->rs_mark_left
[i
] = mdev
->ov_left
;
1003 mdev
->rs_mark_time
[i
] = now
;
1006 drbd_rs_controller_reset(mdev
);
1008 if (ns
.conn
== C_VERIFY_S
) {
1009 dev_info(DEV
, "Starting Online Verify from sector %llu\n",
1010 (unsigned long long)mdev
->ov_position
);
1011 mod_timer(&mdev
->resync_timer
, jiffies
);
1015 if (get_ldev(mdev
)) {
1016 u32 mdf
= mdev
->ldev
->md
.flags
& ~(MDF_CONSISTENT
|MDF_PRIMARY_IND
|
1017 MDF_CONNECTED_IND
|MDF_WAS_UP_TO_DATE
|
1018 MDF_PEER_OUT_DATED
|MDF_CRASHED_PRIMARY
);
1020 if (test_bit(CRASHED_PRIMARY
, &mdev
->flags
))
1021 mdf
|= MDF_CRASHED_PRIMARY
;
1022 if (mdev
->state
.role
== R_PRIMARY
||
1023 (mdev
->state
.pdsk
< D_INCONSISTENT
&& mdev
->state
.peer
== R_PRIMARY
))
1024 mdf
|= MDF_PRIMARY_IND
;
1025 if (mdev
->state
.conn
> C_WF_REPORT_PARAMS
)
1026 mdf
|= MDF_CONNECTED_IND
;
1027 if (mdev
->state
.disk
> D_INCONSISTENT
)
1028 mdf
|= MDF_CONSISTENT
;
1029 if (mdev
->state
.disk
> D_OUTDATED
)
1030 mdf
|= MDF_WAS_UP_TO_DATE
;
1031 if (mdev
->state
.pdsk
<= D_OUTDATED
&& mdev
->state
.pdsk
>= D_INCONSISTENT
)
1032 mdf
|= MDF_PEER_OUT_DATED
;
1033 if (mdf
!= mdev
->ldev
->md
.flags
) {
1034 mdev
->ldev
->md
.flags
= mdf
;
1035 drbd_md_mark_dirty(mdev
);
1037 if (os
.disk
< D_CONSISTENT
&& ns
.disk
>= D_CONSISTENT
)
1038 drbd_set_ed_uuid(mdev
, mdev
->ldev
->md
.uuid
[UI_CURRENT
]);
1042 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1043 if (os
.disk
== D_INCONSISTENT
&& os
.pdsk
== D_INCONSISTENT
&&
1044 os
.peer
== R_SECONDARY
&& ns
.peer
== R_PRIMARY
)
1045 set_bit(CONSIDER_RESYNC
, &mdev
->flags
);
1047 /* Receiver should clean up itself */
1048 if (os
.conn
!= C_DISCONNECTING
&& ns
.conn
== C_DISCONNECTING
)
1049 drbd_thread_stop_nowait(&mdev
->tconn
->receiver
);
1051 /* Now the receiver finished cleaning up itself, it should die */
1052 if (os
.conn
!= C_STANDALONE
&& ns
.conn
== C_STANDALONE
)
1053 drbd_thread_stop_nowait(&mdev
->tconn
->receiver
);
1055 /* Upon network failure, we need to restart the receiver. */
1056 if (os
.conn
> C_TEAR_DOWN
&&
1057 ns
.conn
<= C_TEAR_DOWN
&& ns
.conn
>= C_TIMEOUT
)
1058 drbd_thread_restart_nowait(&mdev
->tconn
->receiver
);
1060 /* Resume AL writing if we get a connection */
1061 if (os
.conn
< C_CONNECTED
&& ns
.conn
>= C_CONNECTED
)
1062 drbd_resume_al(mdev
);
1064 ascw
= kmalloc(sizeof(*ascw
), GFP_ATOMIC
);
1068 ascw
->flags
= flags
;
1069 ascw
->w
.cb
= w_after_state_ch
;
1070 ascw
->w
.mdev
= mdev
;
1072 drbd_queue_work(&mdev
->tconn
->data
.work
, &ascw
->w
);
1074 dev_err(DEV
, "Could not kmalloc an ascw\n");
1080 static int w_after_state_ch(struct drbd_work
*w
, int unused
)
1082 struct after_state_chg_work
*ascw
=
1083 container_of(w
, struct after_state_chg_work
, w
);
1084 struct drbd_conf
*mdev
= w
->mdev
;
1086 after_state_ch(mdev
, ascw
->os
, ascw
->ns
, ascw
->flags
);
1087 if (ascw
->flags
& CS_WAIT_COMPLETE
) {
1088 D_ASSERT(ascw
->done
!= NULL
);
1089 complete(ascw
->done
);
1096 static void abw_start_sync(struct drbd_conf
*mdev
, int rv
)
1099 dev_err(DEV
, "Writing the bitmap failed not starting resync.\n");
1100 _drbd_request_state(mdev
, NS(conn
, C_CONNECTED
), CS_VERBOSE
);
1104 switch (mdev
->state
.conn
) {
1105 case C_STARTING_SYNC_T
:
1106 _drbd_request_state(mdev
, NS(conn
, C_WF_SYNC_UUID
), CS_VERBOSE
);
1108 case C_STARTING_SYNC_S
:
1109 drbd_start_resync(mdev
, C_SYNC_SOURCE
);
1114 int drbd_bitmap_io_from_worker(struct drbd_conf
*mdev
,
1115 int (*io_fn
)(struct drbd_conf
*),
1116 char *why
, enum bm_flag flags
)
1120 D_ASSERT(current
== mdev
->tconn
->worker
.task
);
1122 /* open coded non-blocking drbd_suspend_io(mdev); */
1123 set_bit(SUSPEND_IO
, &mdev
->flags
);
1125 drbd_bm_lock(mdev
, why
, flags
);
1127 drbd_bm_unlock(mdev
);
1129 drbd_resume_io(mdev
);
1135 * after_state_ch() - Perform after state change actions that may sleep
1136 * @mdev: DRBD device.
1141 static void after_state_ch(struct drbd_conf
*mdev
, union drbd_state os
,
1142 union drbd_state ns
, enum chg_state_flags flags
)
1144 enum drbd_fencing_p fp
;
1145 struct sib_info sib
;
1147 sib
.sib_reason
= SIB_STATE_CHANGE
;
1151 if (os
.conn
!= C_CONNECTED
&& ns
.conn
== C_CONNECTED
) {
1152 clear_bit(CRASHED_PRIMARY
, &mdev
->flags
);
1154 mdev
->p_uuid
[UI_FLAGS
] &= ~((u64
)2);
1158 if (get_ldev(mdev
)) {
1160 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
1165 /* Inform userspace about the change... */
1166 drbd_bcast_event(mdev
, &sib
);
1168 if (!(os
.role
== R_PRIMARY
&& os
.disk
< D_UP_TO_DATE
&& os
.pdsk
< D_UP_TO_DATE
) &&
1169 (ns
.role
== R_PRIMARY
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
))
1170 drbd_khelper(mdev
, "pri-on-incon-degr");
1172 /* Here we have the actions that are performed after a
1173 state change. This function might sleep */
1176 enum drbd_req_event what
= NOTHING
;
1178 if (os
.conn
< C_CONNECTED
&& conn_lowest_conn(mdev
->tconn
) >= C_CONNECTED
)
1181 if (os
.disk
== D_ATTACHING
&& conn_lowest_disk(mdev
->tconn
) > D_ATTACHING
)
1182 what
= RESTART_FROZEN_DISK_IO
;
1184 if (what
!= NOTHING
) {
1185 spin_lock_irq(&mdev
->tconn
->req_lock
);
1186 _tl_restart(mdev
->tconn
, what
);
1187 _drbd_set_state(_NS(mdev
, susp_nod
, 0), CS_VERBOSE
, NULL
);
1188 spin_unlock_irq(&mdev
->tconn
->req_lock
);
1192 /* Became sync source. With protocol >= 96, we still need to send out
1193 * the sync uuid now. Need to do that before any drbd_send_state, or
1194 * the other side may go "paused sync" before receiving the sync uuids,
1195 * which is unexpected. */
1196 if ((os
.conn
!= C_SYNC_SOURCE
&& os
.conn
!= C_PAUSED_SYNC_S
) &&
1197 (ns
.conn
== C_SYNC_SOURCE
|| ns
.conn
== C_PAUSED_SYNC_S
) &&
1198 mdev
->tconn
->agreed_pro_version
>= 96 && get_ldev(mdev
)) {
1199 drbd_gen_and_send_sync_uuid(mdev
);
1203 /* Do not change the order of the if above and the two below... */
1204 if (os
.pdsk
== D_DISKLESS
&& ns
.pdsk
> D_DISKLESS
) { /* attach on the peer */
1205 drbd_send_uuids(mdev
);
1206 drbd_send_state(mdev
);
1208 /* No point in queuing send_bitmap if we don't have a connection
1209 * anymore, so check also the _current_ state, not only the new state
1210 * at the time this work was queued. */
1211 if (os
.conn
!= C_WF_BITMAP_S
&& ns
.conn
== C_WF_BITMAP_S
&&
1212 mdev
->state
.conn
== C_WF_BITMAP_S
)
1213 drbd_queue_bitmap_io(mdev
, &drbd_send_bitmap
, NULL
,
1214 "send_bitmap (WFBitMapS)",
1215 BM_LOCKED_TEST_ALLOWED
);
1217 /* Lost contact to peer's copy of the data */
1218 if ((os
.pdsk
>= D_INCONSISTENT
&&
1219 os
.pdsk
!= D_UNKNOWN
&&
1220 os
.pdsk
!= D_OUTDATED
)
1221 && (ns
.pdsk
< D_INCONSISTENT
||
1222 ns
.pdsk
== D_UNKNOWN
||
1223 ns
.pdsk
== D_OUTDATED
)) {
1224 if (get_ldev(mdev
)) {
1225 if ((ns
.role
== R_PRIMARY
|| ns
.peer
== R_PRIMARY
) &&
1226 mdev
->ldev
->md
.uuid
[UI_BITMAP
] == 0 && ns
.disk
>= D_UP_TO_DATE
) {
1227 if (drbd_suspended(mdev
)) {
1228 set_bit(NEW_CUR_UUID
, &mdev
->flags
);
1230 drbd_uuid_new_current(mdev
);
1231 drbd_send_uuids(mdev
);
1238 if (ns
.pdsk
< D_INCONSISTENT
&& get_ldev(mdev
)) {
1239 if (ns
.peer
== R_PRIMARY
&& mdev
->ldev
->md
.uuid
[UI_BITMAP
] == 0) {
1240 drbd_uuid_new_current(mdev
);
1241 drbd_send_uuids(mdev
);
1244 /* D_DISKLESS Peer becomes secondary */
1245 if (os
.peer
== R_PRIMARY
&& ns
.peer
== R_SECONDARY
)
1246 /* We may still be Primary ourselves.
1247 * No harm done if the bitmap still changes,
1248 * redirtied pages will follow later. */
1249 drbd_bitmap_io_from_worker(mdev
, &drbd_bm_write
,
1250 "demote diskless peer", BM_LOCKED_SET_ALLOWED
);
1254 /* Write out all changed bits on demote.
1255 * Though, no need to da that just yet
1256 * if there is a resync going on still */
1257 if (os
.role
== R_PRIMARY
&& ns
.role
== R_SECONDARY
&&
1258 mdev
->state
.conn
<= C_CONNECTED
&& get_ldev(mdev
)) {
1259 /* No changes to the bitmap expected this time, so assert that,
1260 * even though no harm was done if it did change. */
1261 drbd_bitmap_io_from_worker(mdev
, &drbd_bm_write
,
1262 "demote", BM_LOCKED_TEST_ALLOWED
);
1266 /* Last part of the attaching process ... */
1267 if (ns
.conn
>= C_CONNECTED
&&
1268 os
.disk
== D_ATTACHING
&& ns
.disk
== D_NEGOTIATING
) {
1269 drbd_send_sizes(mdev
, 0, 0); /* to start sync... */
1270 drbd_send_uuids(mdev
);
1271 drbd_send_state(mdev
);
1274 /* We want to pause/continue resync, tell peer. */
1275 if (ns
.conn
>= C_CONNECTED
&&
1276 ((os
.aftr_isp
!= ns
.aftr_isp
) ||
1277 (os
.user_isp
!= ns
.user_isp
)))
1278 drbd_send_state(mdev
);
1280 /* In case one of the isp bits got set, suspend other devices. */
1281 if ((!os
.aftr_isp
&& !os
.peer_isp
&& !os
.user_isp
) &&
1282 (ns
.aftr_isp
|| ns
.peer_isp
|| ns
.user_isp
))
1283 suspend_other_sg(mdev
);
1285 /* Make sure the peer gets informed about eventual state
1286 changes (ISP bits) while we were in WFReportParams. */
1287 if (os
.conn
== C_WF_REPORT_PARAMS
&& ns
.conn
>= C_CONNECTED
)
1288 drbd_send_state(mdev
);
1290 if (os
.conn
!= C_AHEAD
&& ns
.conn
== C_AHEAD
)
1291 drbd_send_state(mdev
);
1293 /* We are in the progress to start a full sync... */
1294 if ((os
.conn
!= C_STARTING_SYNC_T
&& ns
.conn
== C_STARTING_SYNC_T
) ||
1295 (os
.conn
!= C_STARTING_SYNC_S
&& ns
.conn
== C_STARTING_SYNC_S
))
1296 /* no other bitmap changes expected during this phase */
1297 drbd_queue_bitmap_io(mdev
,
1298 &drbd_bmio_set_n_write
, &abw_start_sync
,
1299 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED
);
1301 /* We are invalidating our self... */
1302 if (os
.conn
< C_CONNECTED
&& ns
.conn
< C_CONNECTED
&&
1303 os
.disk
> D_INCONSISTENT
&& ns
.disk
== D_INCONSISTENT
)
1304 /* other bitmap operation expected during this phase */
1305 drbd_queue_bitmap_io(mdev
, &drbd_bmio_set_n_write
, NULL
,
1306 "set_n_write from invalidate", BM_LOCKED_MASK
);
1308 /* first half of local IO error, failure to attach,
1309 * or administrative detach */
1310 if (os
.disk
!= D_FAILED
&& ns
.disk
== D_FAILED
) {
1311 enum drbd_io_error_p eh
;
1313 /* corresponding get_ldev was in __drbd_set_state, to serialize
1314 * our cleanup here with the transition to D_DISKLESS,
1315 * so it is safe to dreference ldev here. */
1317 eh
= rcu_dereference(mdev
->ldev
->disk_conf
)->on_io_error
;
1319 was_io_error
= test_and_clear_bit(WAS_IO_ERROR
, &mdev
->flags
);
1321 /* Immediately allow completion of all application IO, that waits
1322 for completion from the local disk. */
1323 tl_apply(mdev
, ABORT_DISK_IO
);
1325 /* current state still has to be D_FAILED,
1326 * there is only one way out: to D_DISKLESS,
1327 * and that may only happen after our put_ldev below. */
1328 if (mdev
->state
.disk
!= D_FAILED
)
1330 "ASSERT FAILED: disk is %s during detach\n",
1331 drbd_disk_str(mdev
->state
.disk
));
1333 if (!drbd_send_state(mdev
))
1334 dev_info(DEV
, "Notified peer that I am detaching my disk\n");
1336 drbd_rs_cancel_all(mdev
);
1338 /* In case we want to get something to stable storage still,
1339 * this may be the last chance.
1340 * Following put_ldev may transition to D_DISKLESS. */
1344 if (was_io_error
&& eh
== EP_CALL_HELPER
)
1345 drbd_khelper(mdev
, "local-io-error");
1348 /* second half of local IO error, failure to attach,
1349 * or administrative detach,
1350 * after local_cnt references have reached zero again */
1351 if (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
) {
1352 /* We must still be diskless,
1353 * re-attach has to be serialized with this! */
1354 if (mdev
->state
.disk
!= D_DISKLESS
)
1356 "ASSERT FAILED: disk is %s while going diskless\n",
1357 drbd_disk_str(mdev
->state
.disk
));
1360 mdev
->rs_failed
= 0;
1361 atomic_set(&mdev
->rs_pending_cnt
, 0);
1363 if (!drbd_send_state(mdev
))
1364 dev_info(DEV
, "Notified peer that I'm now diskless.\n");
1365 /* corresponding get_ldev in __drbd_set_state
1366 * this may finally trigger drbd_ldev_destroy. */
1370 /* Notify peer that I had a local IO error, and did not detached.. */
1371 if (os
.disk
== D_UP_TO_DATE
&& ns
.disk
== D_INCONSISTENT
)
1372 drbd_send_state(mdev
);
1374 /* Disks got bigger while they were detached */
1375 if (ns
.disk
> D_NEGOTIATING
&& ns
.pdsk
> D_NEGOTIATING
&&
1376 test_and_clear_bit(RESYNC_AFTER_NEG
, &mdev
->flags
)) {
1377 if (ns
.conn
== C_CONNECTED
)
1378 resync_after_online_grow(mdev
);
1381 /* A resync finished or aborted, wake paused devices... */
1382 if ((os
.conn
> C_CONNECTED
&& ns
.conn
<= C_CONNECTED
) ||
1383 (os
.peer_isp
&& !ns
.peer_isp
) ||
1384 (os
.user_isp
&& !ns
.user_isp
))
1385 resume_next_sg(mdev
);
1387 /* sync target done with resync. Explicitly notify peer, even though
1388 * it should (at least for non-empty resyncs) already know itself. */
1389 if (os
.disk
< D_UP_TO_DATE
&& os
.conn
>= C_SYNC_SOURCE
&& ns
.conn
== C_CONNECTED
)
1390 drbd_send_state(mdev
);
1392 /* This triggers bitmap writeout of potentially still unwritten pages
1393 * if the resync finished cleanly, or aborted because of peer disk
1394 * failure, or because of connection loss.
1395 * For resync aborted because of local disk failure, we cannot do
1396 * any bitmap writeout anymore.
1397 * No harm done if some bits change during this phase.
1399 if (os
.conn
> C_CONNECTED
&& ns
.conn
<= C_CONNECTED
&& get_ldev(mdev
)) {
1400 drbd_queue_bitmap_io(mdev
, &drbd_bm_write
, NULL
,
1401 "write from resync_finished", BM_LOCKED_SET_ALLOWED
);
1405 if (ns
.disk
== D_DISKLESS
&&
1406 ns
.conn
== C_STANDALONE
&&
1407 ns
.role
== R_SECONDARY
) {
1408 if (os
.aftr_isp
!= ns
.aftr_isp
)
1409 resume_next_sg(mdev
);
1415 struct after_conn_state_chg_work
{
1418 union drbd_state ns_min
;
1419 union drbd_state ns_max
; /* new, max state, over all mdevs */
1420 enum chg_state_flags flags
;
1423 static int w_after_conn_state_ch(struct drbd_work
*w
, int unused
)
1425 struct after_conn_state_chg_work
*acscw
=
1426 container_of(w
, struct after_conn_state_chg_work
, w
);
1427 struct drbd_tconn
*tconn
= w
->tconn
;
1428 enum drbd_conns oc
= acscw
->oc
;
1429 union drbd_state ns_max
= acscw
->ns_max
;
1430 union drbd_state ns_min
= acscw
->ns_min
;
1431 struct drbd_conf
*mdev
;
1436 /* Upon network configuration, we need to start the receiver */
1437 if (oc
== C_STANDALONE
&& ns_max
.conn
== C_UNCONNECTED
)
1438 drbd_thread_start(&tconn
->receiver
);
1440 if (oc
== C_DISCONNECTING
&& ns_max
.conn
== C_STANDALONE
) {
1441 struct net_conf
*old_conf
;
1443 mutex_lock(&tconn
->conf_update
);
1444 old_conf
= tconn
->net_conf
;
1445 tconn
->my_addr_len
= 0;
1446 tconn
->peer_addr_len
= 0;
1447 rcu_assign_pointer(tconn
->net_conf
, NULL
);
1448 conn_free_crypto(tconn
);
1449 mutex_unlock(&tconn
->conf_update
);
1455 if (ns_max
.susp_fen
) {
1456 /* case1: The outdate peer handler is successful: */
1457 if (ns_max
.pdsk
<= D_OUTDATED
) {
1460 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1461 if (test_bit(NEW_CUR_UUID
, &mdev
->flags
)) {
1462 drbd_uuid_new_current(mdev
);
1463 clear_bit(NEW_CUR_UUID
, &mdev
->flags
);
1467 conn_request_state(tconn
,
1468 (union drbd_state
) { { .susp_fen
= 1 } },
1469 (union drbd_state
) { { .susp_fen
= 0 } },
1472 /* case2: The connection was established again: */
1473 if (ns_min
.conn
>= C_CONNECTED
) {
1475 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
1476 clear_bit(NEW_CUR_UUID
, &mdev
->flags
);
1478 spin_lock_irq(&tconn
->req_lock
);
1479 _tl_restart(tconn
, RESEND
);
1480 _conn_request_state(tconn
,
1481 (union drbd_state
) { { .susp_fen
= 1 } },
1482 (union drbd_state
) { { .susp_fen
= 0 } },
1484 spin_unlock_irq(&tconn
->req_lock
);
1487 kref_put(&tconn
->kref
, &conn_destroy
);
1491 void conn_old_common_state(struct drbd_tconn
*tconn
, union drbd_state
*pcs
, enum chg_state_flags
*pf
)
1493 enum chg_state_flags flags
= ~0;
1494 union drbd_dev_state os
, cs
= {}; /* old_state, common_state */
1495 struct drbd_conf
*mdev
;
1496 int vnr
, first_vol
= 1;
1499 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1508 if (cs
.role
!= os
.role
)
1509 flags
&= ~CS_DC_ROLE
;
1511 if (cs
.peer
!= os
.peer
)
1512 flags
&= ~CS_DC_PEER
;
1514 if (cs
.conn
!= os
.conn
)
1515 flags
&= ~CS_DC_CONN
;
1517 if (cs
.disk
!= os
.disk
)
1518 flags
&= ~CS_DC_DISK
;
1520 if (cs
.pdsk
!= os
.pdsk
)
1521 flags
&= ~CS_DC_PDSK
;
1530 static enum drbd_state_rv
1531 conn_is_valid_transition(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1532 enum chg_state_flags flags
)
1534 enum drbd_state_rv rv
= SS_SUCCESS
;
1535 union drbd_state ns
, os
;
1536 struct drbd_conf
*mdev
;
1540 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1541 os
= drbd_read_state(mdev
);
1542 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
1544 if (flags
& CS_IGN_OUTD_FAIL
&& ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
)
1550 rv
= is_valid_transition(os
, ns
);
1551 if (rv
< SS_SUCCESS
)
1554 if (!(flags
& CS_HARD
)) {
1555 rv
= is_valid_state(mdev
, ns
);
1556 if (rv
< SS_SUCCESS
) {
1557 if (is_valid_state(mdev
, os
) == rv
)
1558 rv
= is_valid_soft_transition(os
, ns
);
1560 rv
= is_valid_soft_transition(os
, ns
);
1562 if (rv
< SS_SUCCESS
)
1567 if (rv
< SS_SUCCESS
&& flags
& CS_VERBOSE
)
1568 print_st_err(mdev
, os
, ns
, rv
);
1574 conn_set_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1575 union drbd_state
*pns_min
, union drbd_state
*pns_max
, enum chg_state_flags flags
)
1577 union drbd_state ns
, os
, ns_max
= { };
1578 union drbd_state ns_min
= {
1584 struct drbd_conf
*mdev
;
1585 enum drbd_state_rv rv
;
1588 if (mask
.conn
== C_MASK
)
1589 tconn
->cstate
= val
.conn
;
1592 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1593 os
= drbd_read_state(mdev
);
1594 ns
= apply_mask_val(os
, mask
, val
);
1595 ns
= sanitize_state(mdev
, ns
, NULL
);
1597 if (flags
& CS_IGN_OUTD_FAIL
&& ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
)
1600 rv
= __drbd_set_state(mdev
, ns
, flags
, NULL
);
1601 if (rv
< SS_SUCCESS
)
1604 ns
.i
= mdev
->state
.i
;
1605 ns_max
.role
= max_role(ns
.role
, ns_max
.role
);
1606 ns_max
.peer
= max_role(ns
.peer
, ns_max
.peer
);
1607 ns_max
.conn
= max_t(enum drbd_conns
, ns
.conn
, ns_max
.conn
);
1608 ns_max
.disk
= max_t(enum drbd_disk_state
, ns
.disk
, ns_max
.disk
);
1609 ns_max
.pdsk
= max_t(enum drbd_disk_state
, ns
.pdsk
, ns_max
.pdsk
);
1611 ns_min
.role
= min_role(ns
.role
, ns_min
.role
);
1612 ns_min
.peer
= min_role(ns
.peer
, ns_min
.peer
);
1613 ns_min
.conn
= min_t(enum drbd_conns
, ns
.conn
, ns_min
.conn
);
1614 ns_min
.disk
= min_t(enum drbd_disk_state
, ns
.disk
, ns_min
.disk
);
1615 ns_min
.pdsk
= min_t(enum drbd_disk_state
, ns
.pdsk
, ns_min
.pdsk
);
1619 ns_min
.susp
= ns_max
.susp
= tconn
->susp
;
1620 ns_min
.susp_nod
= ns_max
.susp_nod
= tconn
->susp_nod
;
1621 ns_min
.susp_fen
= ns_max
.susp_fen
= tconn
->susp_fen
;
1627 static enum drbd_state_rv
1628 _conn_rq_cond(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
)
1630 enum drbd_state_rv rv
;
1632 if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY
, &tconn
->flags
))
1633 return SS_CW_SUCCESS
;
1635 if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL
, &tconn
->flags
))
1636 return SS_CW_FAILED_BY_PEER
;
1638 spin_lock_irq(&tconn
->req_lock
);
1639 rv
= tconn
->cstate
!= C_WF_REPORT_PARAMS
? SS_CW_NO_NEED
: SS_UNKNOWN_ERROR
;
1641 if (rv
== SS_UNKNOWN_ERROR
)
1642 rv
= conn_is_valid_transition(tconn
, mask
, val
, 0);
1644 if (rv
== SS_SUCCESS
)
1645 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
1647 spin_unlock_irq(&tconn
->req_lock
);
1652 static enum drbd_state_rv
1653 conn_cl_wide(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1654 enum chg_state_flags f
)
1656 enum drbd_state_rv rv
;
1658 spin_unlock_irq(&tconn
->req_lock
);
1659 mutex_lock(&tconn
->cstate_mutex
);
1661 if (conn_send_state_req(tconn
, mask
, val
)) {
1662 rv
= SS_CW_FAILED_BY_PEER
;
1663 /* if (f & CS_VERBOSE)
1664 print_st_err(mdev, os, ns, rv); */
1668 wait_event(tconn
->ping_wait
, (rv
= _conn_rq_cond(tconn
, mask
, val
)));
1671 mutex_unlock(&tconn
->cstate_mutex
);
1672 spin_lock_irq(&tconn
->req_lock
);
1678 _conn_request_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1679 enum chg_state_flags flags
)
1681 enum drbd_state_rv rv
= SS_SUCCESS
;
1682 struct after_conn_state_chg_work
*acscw
;
1683 enum drbd_conns oc
= tconn
->cstate
;
1684 union drbd_state ns_max
, ns_min
, os
;
1686 rv
= is_valid_conn_transition(oc
, val
.conn
);
1687 if (rv
< SS_SUCCESS
)
1690 rv
= conn_is_valid_transition(tconn
, mask
, val
, flags
);
1691 if (rv
< SS_SUCCESS
)
1694 if (oc
== C_WF_REPORT_PARAMS
&& val
.conn
== C_DISCONNECTING
&&
1695 !(flags
& (CS_LOCAL_ONLY
| CS_HARD
))) {
1696 rv
= conn_cl_wide(tconn
, mask
, val
, flags
);
1697 if (rv
< SS_SUCCESS
)
1701 conn_old_common_state(tconn
, &os
, &flags
);
1702 flags
|= CS_DC_SUSP
;
1703 conn_set_state(tconn
, mask
, val
, &ns_min
, &ns_max
, flags
);
1704 conn_pr_state_change(tconn
, os
, ns_max
, flags
);
1706 acscw
= kmalloc(sizeof(*acscw
), GFP_ATOMIC
);
1708 acscw
->oc
= os
.conn
;
1709 acscw
->ns_min
= ns_min
;
1710 acscw
->ns_max
= ns_max
;
1711 acscw
->flags
= flags
;
1712 acscw
->w
.cb
= w_after_conn_state_ch
;
1713 kref_get(&tconn
->kref
);
1714 acscw
->w
.tconn
= tconn
;
1715 drbd_queue_work(&tconn
->data
.work
, &acscw
->w
);
1717 conn_err(tconn
, "Could not kmalloc an acscw\n");
1725 conn_request_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1726 enum chg_state_flags flags
)
1728 enum drbd_state_rv rv
;
1730 spin_lock_irq(&tconn
->req_lock
);
1731 rv
= _conn_request_state(tconn
, mask
, val
, flags
);
1732 spin_unlock_irq(&tconn
->req_lock
);