drbd: improvements to activate/deactivate multiple activity log extents
[deliverable/linux.git] / drivers / block / drbd / drbd_nl.c
CommitLineData
b411b363
PR
1/*
2 drbd_nl.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363
PR
26#include <linux/module.h>
27#include <linux/drbd.h>
28#include <linux/in.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/slab.h>
b411b363
PR
32#include <linux/blkpg.h>
33#include <linux/cpumask.h>
34#include "drbd_int.h"
265be2d0 35#include "drbd_req.h"
b411b363
PR
36#include "drbd_wrappers.h"
37#include <asm/unaligned.h>
b411b363 38#include <linux/drbd_limits.h>
87f7be4c 39#include <linux/kthread.h>
b411b363 40
3b98c0c2
LE
41#include <net/genetlink.h>
42
43/* .doit */
44// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
48int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
49
50int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
51int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
85f75dd7 52int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
53
54int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
f399002e 56int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
57int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
f399002e 59int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
60int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
f399002e 71int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
72int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74/* .dumpit */
75int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77#include <linux/drbd_genl_api.h>
78#include <linux/genl_magic_func.h>
79
80/* used blkdev_get_by_path, to claim our meta data device(s) */
b411b363
PR
81static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
82
3b98c0c2
LE
83/* Configuration is strictly serialized, because generic netlink message
84 * processing is strictly serialized by the genl_lock().
85 * Which means we can use one static global drbd_config_context struct.
86 */
87static struct drbd_config_context {
88 /* assigned from drbd_genlmsghdr */
89 unsigned int minor;
90 /* assigned from request attributes, if present */
91 unsigned int volume;
92#define VOLUME_UNSPECIFIED (-1U)
93 /* pointer into the request skb,
94 * limited lifetime! */
95 char *conn_name;
96
97 /* reply buffer */
98 struct sk_buff *reply_skb;
99 /* pointer into reply buffer */
100 struct drbd_genlmsghdr *reply_dh;
101 /* resolved from attributes, if possible */
102 struct drbd_conf *mdev;
103 struct drbd_tconn *tconn;
104} adm_ctx;
105
106static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
107{
108 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
109 if (genlmsg_reply(skb, info))
110 printk(KERN_ERR "drbd: error sending genl reply\n");
b411b363 111}
3b98c0c2
LE
112
113/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
114 * reason it could fail was no space in skb, and there are 4k available. */
8432b314 115int drbd_msg_put_info(const char *info)
3b98c0c2
LE
116{
117 struct sk_buff *skb = adm_ctx.reply_skb;
118 struct nlattr *nla;
119 int err = -EMSGSIZE;
120
121 if (!info || !info[0])
122 return 0;
123
124 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
125 if (!nla)
126 return err;
127
128 err = nla_put_string(skb, T_info_text, info);
129 if (err) {
130 nla_nest_cancel(skb, nla);
131 return err;
132 } else
133 nla_nest_end(skb, nla);
134 return 0;
b411b363
PR
135}
136
3b98c0c2
LE
137/* This would be a good candidate for a "pre_doit" hook,
138 * and per-family private info->pointers.
139 * But we need to stay compatible with older kernels.
140 * If it returns successfully, adm_ctx members are valid.
141 */
142#define DRBD_ADM_NEED_MINOR 1
143#define DRBD_ADM_NEED_CONN 2
144static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
145 unsigned flags)
146{
147 struct drbd_genlmsghdr *d_in = info->userhdr;
148 const u8 cmd = info->genlhdr->cmd;
149 int err;
150
151 memset(&adm_ctx, 0, sizeof(adm_ctx));
152
153 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
154 if (cmd != DRBD_ADM_GET_STATUS
155 && security_netlink_recv(skb, CAP_SYS_ADMIN))
156 return -EPERM;
157
158 adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
159 if (!adm_ctx.reply_skb)
160 goto fail;
161
162 adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
163 info, &drbd_genl_family, 0, cmd);
164 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
165 * but anyways */
166 if (!adm_ctx.reply_dh)
167 goto fail;
168
169 adm_ctx.reply_dh->minor = d_in->minor;
170 adm_ctx.reply_dh->ret_code = NO_ERROR;
171
172 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
173 struct nlattr *nla;
174 /* parse and validate only */
f399002e 175 err = drbd_cfg_context_from_attrs(NULL, info);
3b98c0c2
LE
176 if (err)
177 goto fail;
178
179 /* It was present, and valid,
180 * copy it over to the reply skb. */
181 err = nla_put_nohdr(adm_ctx.reply_skb,
182 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
183 info->attrs[DRBD_NLA_CFG_CONTEXT]);
184 if (err)
185 goto fail;
186
187 /* and assign stuff to the global adm_ctx */
188 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
189 adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
190 nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
191 if (nla)
192 adm_ctx.conn_name = nla_data(nla);
193 } else
194 adm_ctx.volume = VOLUME_UNSPECIFIED;
195
196 adm_ctx.minor = d_in->minor;
197 adm_ctx.mdev = minor_to_mdev(d_in->minor);
198 adm_ctx.tconn = conn_by_name(adm_ctx.conn_name);
199
200 if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
201 drbd_msg_put_info("unknown minor");
202 return ERR_MINOR_INVALID;
203 }
204 if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) {
205 drbd_msg_put_info("unknown connection");
206 return ERR_INVALID_REQUEST;
207 }
208
209 /* some more paranoia, if the request was over-determined */
527f4b24
LE
210 if (adm_ctx.mdev && adm_ctx.tconn &&
211 adm_ctx.mdev->tconn != adm_ctx.tconn) {
212 pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
213 adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
214 drbd_msg_put_info("minor exists in different connection");
215 return ERR_INVALID_REQUEST;
216 }
3b98c0c2
LE
217 if (adm_ctx.mdev &&
218 adm_ctx.volume != VOLUME_UNSPECIFIED &&
219 adm_ctx.volume != adm_ctx.mdev->vnr) {
220 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
221 adm_ctx.minor, adm_ctx.volume,
222 adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
527f4b24 223 drbd_msg_put_info("minor exists as different volume");
3b98c0c2
LE
224 return ERR_INVALID_REQUEST;
225 }
cffec5b2
LE
226 if (adm_ctx.mdev && !adm_ctx.tconn)
227 adm_ctx.tconn = adm_ctx.mdev->tconn;
3b98c0c2
LE
228 return NO_ERROR;
229
230fail:
231 nlmsg_free(adm_ctx.reply_skb);
232 adm_ctx.reply_skb = NULL;
233 return -ENOMEM;
234}
235
236static int drbd_adm_finish(struct genl_info *info, int retcode)
237{
238 struct nlattr *nla;
239 const char *conn_name = NULL;
240
241 if (!adm_ctx.reply_skb)
242 return -ENOMEM;
243
244 adm_ctx.reply_dh->ret_code = retcode;
245
246 nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
247 if (nla) {
248 nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
249 if (nla)
250 conn_name = nla_data(nla);
251 }
252
253 drbd_adm_send_reply(adm_ctx.reply_skb, info);
254 return 0;
255}
b411b363 256
6b75dced 257static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
b411b363 258{
6b75dced 259 char *afs;
b411b363 260
6b75dced
PR
261 if (get_net_conf(tconn)) {
262 switch (((struct sockaddr *)tconn->net_conf->peer_addr)->sa_family) {
b411b363
PR
263 case AF_INET6:
264 afs = "ipv6";
6b75dced
PR
265 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
266 &((struct sockaddr_in6 *)tconn->net_conf->peer_addr)->sin6_addr);
b411b363
PR
267 break;
268 case AF_INET:
269 afs = "ipv4";
6b75dced
PR
270 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
271 &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr);
b411b363
PR
272 break;
273 default:
274 afs = "ssocks";
6b75dced
PR
275 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
276 &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr);
b411b363 277 }
6b75dced
PR
278 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
279 put_net_conf(tconn);
b411b363 280 }
6b75dced
PR
281}
282
283int drbd_khelper(struct drbd_conf *mdev, char *cmd)
284{
285 char *envp[] = { "HOME=/",
286 "TERM=linux",
287 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
288 (char[20]) { }, /* address family */
289 (char[60]) { }, /* address */
290 NULL };
291 char mb[12];
292 char *argv[] = {usermode_helper, cmd, mb, NULL };
293 struct sib_info sib;
294 int ret;
295
296 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
297 setup_khelper_env(mdev->tconn, envp);
b411b363 298
1090c056
LE
299 /* The helper may take some time.
300 * write out any unsynced meta data changes now */
301 drbd_md_sync(mdev);
302
b411b363 303 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
3b98c0c2
LE
304 sib.sib_reason = SIB_HELPER_PRE;
305 sib.helper_name = cmd;
306 drbd_bcast_event(mdev, &sib);
b411b363
PR
307 ret = call_usermodehelper(usermode_helper, argv, envp, 1);
308 if (ret)
309 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
310 usermode_helper, cmd, mb,
311 (ret >> 8) & 0xff, ret);
312 else
313 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
314 usermode_helper, cmd, mb,
315 (ret >> 8) & 0xff, ret);
3b98c0c2
LE
316 sib.sib_reason = SIB_HELPER_POST;
317 sib.helper_exit_code = ret;
318 drbd_bcast_event(mdev, &sib);
b411b363
PR
319
320 if (ret < 0) /* Ignore any ERRNOs we got. */
321 ret = 0;
322
323 return ret;
324}
325
6b75dced
PR
326static void conn_md_sync(struct drbd_tconn *tconn)
327{
328 struct drbd_conf *mdev;
e90285e0 329 int vnr;
6b75dced 330
e90285e0 331 idr_for_each_entry(&tconn->volumes, mdev, vnr)
6b75dced
PR
332 drbd_md_sync(mdev);
333}
334
335int conn_khelper(struct drbd_tconn *tconn, char *cmd)
336{
337 char *envp[] = { "HOME=/",
338 "TERM=linux",
339 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
340 (char[20]) { }, /* address family */
341 (char[60]) { }, /* address */
342 NULL };
343 char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
344 int ret;
345
346 setup_khelper_env(tconn, envp);
347 conn_md_sync(tconn);
348
349 conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
350 /* TODO: conn_bcast_event() ?? */
351
352 ret = call_usermodehelper(usermode_helper, argv, envp, 1);
353 if (ret)
354 conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
355 usermode_helper, cmd, tconn->name,
356 (ret >> 8) & 0xff, ret);
357 else
358 conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
359 usermode_helper, cmd, tconn->name,
360 (ret >> 8) & 0xff, ret);
361 /* TODO: conn_bcast_event() ?? */
362
363 if (ret < 0) /* Ignore any ERRNOs we got. */
364 ret = 0;
365
366 return ret;
367}
368
cb703454 369static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
b411b363 370{
cb703454
PR
371 enum drbd_fencing_p fp = FP_NOT_AVAIL;
372 struct drbd_conf *mdev;
373 int vnr;
374
375 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
376 if (get_ldev_if_state(mdev, D_CONSISTENT)) {
377 fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing);
378 put_ldev(mdev);
379 }
380 }
381
382 return fp;
383}
384
385bool conn_try_outdate_peer(struct drbd_tconn *tconn)
386{
387 union drbd_state mask = { };
388 union drbd_state val = { };
389 enum drbd_fencing_p fp;
b411b363
PR
390 char *ex_to_string;
391 int r;
b411b363 392
cb703454
PR
393 if (tconn->cstate >= C_WF_REPORT_PARAMS) {
394 conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
395 return false;
396 }
b411b363 397
cb703454
PR
398 fp = highest_fencing_policy(tconn);
399 switch (fp) {
400 case FP_NOT_AVAIL:
401 conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
fb22c402 402 goto out;
cb703454
PR
403 case FP_DONT_CARE:
404 return true;
405 default: ;
b411b363
PR
406 }
407
cb703454 408 r = conn_khelper(tconn, "fence-peer");
b411b363
PR
409
410 switch ((r>>8) & 0xff) {
411 case 3: /* peer is inconsistent */
412 ex_to_string = "peer is inconsistent or worse";
cb703454
PR
413 mask.pdsk = D_MASK;
414 val.pdsk = D_INCONSISTENT;
b411b363
PR
415 break;
416 case 4: /* peer got outdated, or was already outdated */
417 ex_to_string = "peer was fenced";
cb703454
PR
418 mask.pdsk = D_MASK;
419 val.pdsk = D_OUTDATED;
b411b363
PR
420 break;
421 case 5: /* peer was down */
cb703454 422 if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
b411b363
PR
423 /* we will(have) create(d) a new UUID anyways... */
424 ex_to_string = "peer is unreachable, assumed to be dead";
cb703454
PR
425 mask.pdsk = D_MASK;
426 val.pdsk = D_OUTDATED;
b411b363
PR
427 } else {
428 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
b411b363
PR
429 }
430 break;
431 case 6: /* Peer is primary, voluntarily outdate myself.
432 * This is useful when an unconnected R_SECONDARY is asked to
433 * become R_PRIMARY, but finds the other peer being active. */
434 ex_to_string = "peer is active";
cb703454
PR
435 conn_warn(tconn, "Peer is primary, outdating myself.\n");
436 mask.disk = D_MASK;
437 val.disk = D_OUTDATED;
b411b363
PR
438 break;
439 case 7:
440 if (fp != FP_STONITH)
cb703454 441 conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
b411b363 442 ex_to_string = "peer was stonithed";
cb703454
PR
443 mask.pdsk = D_MASK;
444 val.pdsk = D_OUTDATED;
b411b363
PR
445 break;
446 default:
447 /* The script is broken ... */
cb703454
PR
448 conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
449 return false; /* Eventually leave IO frozen */
b411b363
PR
450 }
451
cb703454
PR
452 conn_info(tconn, "fence-peer helper returned %d (%s)\n",
453 (r>>8) & 0xff, ex_to_string);
fb22c402 454
cb703454 455 out:
fb22c402 456
cb703454
PR
457 /* Not using
458 conn_request_state(tconn, mask, val, CS_VERBOSE);
459 here, because we might were able to re-establish the connection in the
460 meantime. */
461 spin_lock_irq(&tconn->req_lock);
462 if (tconn->cstate < C_WF_REPORT_PARAMS)
463 _conn_request_state(tconn, mask, val, CS_VERBOSE);
464 spin_unlock_irq(&tconn->req_lock);
465
466 return conn_highest_pdsk(tconn) <= D_OUTDATED;
b411b363
PR
467}
468
87f7be4c
PR
469static int _try_outdate_peer_async(void *data)
470{
cb703454 471 struct drbd_tconn *tconn = (struct drbd_tconn *)data;
87f7be4c 472
cb703454 473 conn_try_outdate_peer(tconn);
87f7be4c
PR
474
475 return 0;
476}
477
cb703454 478void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
87f7be4c
PR
479{
480 struct task_struct *opa;
481
cb703454 482 opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
87f7be4c 483 if (IS_ERR(opa))
cb703454 484 conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
87f7be4c 485}
b411b363 486
bf885f8a
AG
487enum drbd_state_rv
488drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
b411b363
PR
489{
490 const int max_tries = 4;
bf885f8a 491 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
b411b363
PR
492 int try = 0;
493 int forced = 0;
494 union drbd_state mask, val;
b411b363
PR
495
496 if (new_role == R_PRIMARY)
0625ac19 497 request_ping(mdev->tconn); /* Detect a dead peer ASAP */
b411b363 498
8410da8f 499 mutex_lock(mdev->state_mutex);
b411b363
PR
500
501 mask.i = 0; mask.role = R_MASK;
502 val.i = 0; val.role = new_role;
503
504 while (try++ < max_tries) {
bf885f8a 505 rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
b411b363
PR
506
507 /* in case we first succeeded to outdate,
508 * but now suddenly could establish a connection */
bf885f8a 509 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
b411b363
PR
510 val.pdsk = 0;
511 mask.pdsk = 0;
512 continue;
513 }
514
bf885f8a 515 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
d10a33c6
PR
516 (mdev->state.disk < D_UP_TO_DATE &&
517 mdev->state.disk >= D_INCONSISTENT)) {
b411b363
PR
518 mask.disk = D_MASK;
519 val.disk = D_UP_TO_DATE;
520 forced = 1;
521 continue;
522 }
523
bf885f8a 524 if (rv == SS_NO_UP_TO_DATE_DISK &&
b411b363
PR
525 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
526 D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
b411b363 527
cb703454 528 if (conn_try_outdate_peer(mdev->tconn)) {
b411b363
PR
529 val.disk = D_UP_TO_DATE;
530 mask.disk = D_MASK;
531 }
b411b363
PR
532 continue;
533 }
534
bf885f8a 535 if (rv == SS_NOTHING_TO_DO)
3b98c0c2 536 goto out;
bf885f8a 537 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
cb703454 538 if (!conn_try_outdate_peer(mdev->tconn) && force) {
b411b363 539 dev_warn(DEV, "Forced into split brain situation!\n");
cb703454
PR
540 mask.pdsk = D_MASK;
541 val.pdsk = D_OUTDATED;
b411b363 542
cb703454 543 }
b411b363
PR
544 continue;
545 }
bf885f8a 546 if (rv == SS_TWO_PRIMARIES) {
b411b363
PR
547 /* Maybe the peer is detected as dead very soon...
548 retry at most once more in this case. */
89e58e75 549 schedule_timeout_interruptible((mdev->tconn->net_conf->ping_timeo+1)*HZ/10);
b411b363
PR
550 if (try < max_tries)
551 try = max_tries - 1;
552 continue;
553 }
bf885f8a
AG
554 if (rv < SS_SUCCESS) {
555 rv = _drbd_request_state(mdev, mask, val,
b411b363 556 CS_VERBOSE + CS_WAIT_COMPLETE);
bf885f8a 557 if (rv < SS_SUCCESS)
3b98c0c2 558 goto out;
b411b363
PR
559 }
560 break;
561 }
562
bf885f8a 563 if (rv < SS_SUCCESS)
3b98c0c2 564 goto out;
b411b363
PR
565
566 if (forced)
567 dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
568
569 /* Wait until nothing is on the fly :) */
570 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
571
572 if (new_role == R_SECONDARY) {
81e84650 573 set_disk_ro(mdev->vdisk, true);
b411b363
PR
574 if (get_ldev(mdev)) {
575 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
576 put_ldev(mdev);
577 }
578 } else {
b2fb6dbe 579 if (get_net_conf(mdev->tconn)) {
89e58e75 580 mdev->tconn->net_conf->want_lose = 0;
b2fb6dbe 581 put_net_conf(mdev->tconn);
b411b363 582 }
81e84650 583 set_disk_ro(mdev->vdisk, false);
b411b363
PR
584 if (get_ldev(mdev)) {
585 if (((mdev->state.conn < C_CONNECTED ||
586 mdev->state.pdsk <= D_FAILED)
587 && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
588 drbd_uuid_new_current(mdev);
589
590 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
591 put_ldev(mdev);
592 }
593 }
594
19f843aa
LE
595 /* writeout of activity log covered areas of the bitmap
596 * to stable storage done in after state change already */
b411b363
PR
597
598 if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
599 /* if this was forced, we should consider sync */
600 if (forced)
601 drbd_send_uuids(mdev);
602 drbd_send_state(mdev);
603 }
604
605 drbd_md_sync(mdev);
606
607 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
3b98c0c2 608out:
8410da8f 609 mutex_unlock(mdev->state_mutex);
bf885f8a 610 return rv;
b411b363
PR
611}
612
3b98c0c2 613static const char *from_attrs_err_to_txt(int err)
b411b363 614{
3b98c0c2
LE
615 return err == -ENOMSG ? "required attribute missing" :
616 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
f399002e 617 err == -EEXIST ? "can not change invariant setting" :
3b98c0c2 618 "invalid attribute value";
b411b363
PR
619}
620
3b98c0c2 621int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
b411b363 622{
3b98c0c2
LE
623 struct set_role_parms parms;
624 int err;
625 enum drbd_ret_code retcode;
b411b363 626
3b98c0c2
LE
627 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
628 if (!adm_ctx.reply_skb)
629 return retcode;
630 if (retcode != NO_ERROR)
631 goto out;
632
633 memset(&parms, 0, sizeof(parms));
634 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
f399002e 635 err = set_role_parms_from_attrs(&parms, info);
3b98c0c2
LE
636 if (err) {
637 retcode = ERR_MANDATORY_TAG;
638 drbd_msg_put_info(from_attrs_err_to_txt(err));
639 goto out;
640 }
641 }
642
643 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
644 retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
645 else
646 retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
647out:
648 drbd_adm_finish(info, retcode);
b411b363
PR
649 return 0;
650}
651
652/* initializes the md.*_offset members, so we are able to find
653 * the on disk meta data */
654static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
655 struct drbd_backing_dev *bdev)
656{
657 sector_t md_size_sect = 0;
658 switch (bdev->dc.meta_dev_idx) {
659 default:
660 /* v07 style fixed size indexed meta data */
661 bdev->md.md_size_sect = MD_RESERVED_SECT;
662 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
663 bdev->md.al_offset = MD_AL_OFFSET;
664 bdev->md.bm_offset = MD_BM_OFFSET;
665 break;
666 case DRBD_MD_INDEX_FLEX_EXT:
667 /* just occupy the full device; unit: sectors */
668 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
669 bdev->md.md_offset = 0;
670 bdev->md.al_offset = MD_AL_OFFSET;
671 bdev->md.bm_offset = MD_BM_OFFSET;
672 break;
673 case DRBD_MD_INDEX_INTERNAL:
674 case DRBD_MD_INDEX_FLEX_INT:
675 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
676 /* al size is still fixed */
7ad651b5 677 bdev->md.al_offset = -MD_AL_SECTORS;
b411b363
PR
678 /* we need (slightly less than) ~ this much bitmap sectors: */
679 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
680 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
681 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
682 md_size_sect = ALIGN(md_size_sect, 8);
683
684 /* plus the "drbd meta data super block",
685 * and the activity log; */
686 md_size_sect += MD_BM_OFFSET;
687
688 bdev->md.md_size_sect = md_size_sect;
689 /* bitmap offset is adjusted by 'super' block size */
690 bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
691 break;
692 }
693}
694
4b0715f0 695/* input size is expected to be in KB */
b411b363
PR
696char *ppsize(char *buf, unsigned long long size)
697{
4b0715f0
LE
698 /* Needs 9 bytes at max including trailing NUL:
699 * -1ULL ==> "16384 EB" */
b411b363
PR
700 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
701 int base = 0;
4b0715f0 702 while (size >= 10000 && base < sizeof(units)-1) {
b411b363
PR
703 /* shift + round */
704 size = (size >> 10) + !!(size & (1<<9));
705 base++;
706 }
4b0715f0 707 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
b411b363
PR
708
709 return buf;
710}
711
712/* there is still a theoretical deadlock when called from receiver
713 * on an D_INCONSISTENT R_PRIMARY:
714 * remote READ does inc_ap_bio, receiver would need to receive answer
715 * packet from remote to dec_ap_bio again.
716 * receiver receive_sizes(), comes here,
717 * waits for ap_bio_cnt == 0. -> deadlock.
718 * but this cannot happen, actually, because:
719 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
720 * (not connected, or bad/no disk on peer):
721 * see drbd_fail_request_early, ap_bio_cnt is zero.
722 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
723 * peer may not initiate a resize.
724 */
3b98c0c2
LE
725/* Note these are not to be confused with
726 * drbd_adm_suspend_io/drbd_adm_resume_io,
727 * which are (sub) state changes triggered by admin (drbdsetup),
728 * and can be long lived.
729 * This changes an mdev->flag, is triggered by drbd internals,
730 * and should be short-lived. */
b411b363
PR
731void drbd_suspend_io(struct drbd_conf *mdev)
732{
733 set_bit(SUSPEND_IO, &mdev->flags);
fb22c402 734 if (is_susp(mdev->state))
265be2d0 735 return;
b411b363
PR
736 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
737}
738
739void drbd_resume_io(struct drbd_conf *mdev)
740{
741 clear_bit(SUSPEND_IO, &mdev->flags);
742 wake_up(&mdev->misc_wait);
743}
744
745/**
746 * drbd_determine_dev_size() - Sets the right device size obeying all constraints
747 * @mdev: DRBD device.
748 *
749 * Returns 0 on success, negative return values indicate errors.
750 * You should call drbd_md_sync() after calling this function.
751 */
24c4830c 752enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
b411b363
PR
753{
754 sector_t prev_first_sect, prev_size; /* previous meta location */
755 sector_t la_size;
756 sector_t size;
757 char ppb[10];
758
759 int md_moved, la_size_changed;
760 enum determine_dev_size rv = unchanged;
761
762 /* race:
763 * application request passes inc_ap_bio,
764 * but then cannot get an AL-reference.
765 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
766 *
767 * to avoid that:
768 * Suspend IO right here.
769 * still lock the act_log to not trigger ASSERTs there.
770 */
771 drbd_suspend_io(mdev);
772
773 /* no wait necessary anymore, actually we could assert that */
774 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
775
776 prev_first_sect = drbd_md_first_sector(mdev->ldev);
777 prev_size = mdev->ldev->md.md_size_sect;
778 la_size = mdev->ldev->md.la_size_sect;
779
780 /* TODO: should only be some assert here, not (re)init... */
781 drbd_md_set_sector_offsets(mdev, mdev->ldev);
782
d845030f 783 size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
b411b363
PR
784
785 if (drbd_get_capacity(mdev->this_bdev) != size ||
786 drbd_bm_capacity(mdev) != size) {
787 int err;
02d9a94b 788 err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC));
b411b363
PR
789 if (unlikely(err)) {
790 /* currently there is only one error: ENOMEM! */
791 size = drbd_bm_capacity(mdev)>>1;
792 if (size == 0) {
793 dev_err(DEV, "OUT OF MEMORY! "
794 "Could not allocate bitmap!\n");
795 } else {
796 dev_err(DEV, "BM resizing failed. "
797 "Leaving size unchanged at size = %lu KB\n",
798 (unsigned long)size);
799 }
800 rv = dev_size_error;
801 }
802 /* racy, see comments above. */
803 drbd_set_my_capacity(mdev, size);
804 mdev->ldev->md.la_size_sect = size;
805 dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
806 (unsigned long long)size>>1);
807 }
808 if (rv == dev_size_error)
809 goto out;
810
811 la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
812
813 md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
814 || prev_size != mdev->ldev->md.md_size_sect;
815
816 if (la_size_changed || md_moved) {
24dccabb
AG
817 int err;
818
b411b363
PR
819 drbd_al_shrink(mdev); /* All extents inactive. */
820 dev_info(DEV, "Writing the whole bitmap, %s\n",
821 la_size_changed && md_moved ? "size changed and md moved" :
822 la_size_changed ? "size changed" : "md moved");
20ceb2b2
LE
823 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
824 err = drbd_bitmap_io(mdev, &drbd_bm_write,
825 "size changed", BM_LOCKED_MASK);
24dccabb
AG
826 if (err) {
827 rv = dev_size_error;
828 goto out;
829 }
b411b363
PR
830 drbd_md_mark_dirty(mdev);
831 }
832
833 if (size > la_size)
834 rv = grew;
835 if (size < la_size)
836 rv = shrunk;
837out:
838 lc_unlock(mdev->act_log);
839 wake_up(&mdev->al_wait);
840 drbd_resume_io(mdev);
841
842 return rv;
843}
844
845sector_t
a393db6f 846drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
b411b363
PR
847{
848 sector_t p_size = mdev->p_size; /* partner's disk size. */
849 sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
850 sector_t m_size; /* my size */
851 sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
852 sector_t size = 0;
853
854 m_size = drbd_get_max_capacity(bdev);
855
a393db6f
PR
856 if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
857 dev_warn(DEV, "Resize while not connected was forced by the user!\n");
858 p_size = m_size;
859 }
860
b411b363
PR
861 if (p_size && m_size) {
862 size = min_t(sector_t, p_size, m_size);
863 } else {
864 if (la_size) {
865 size = la_size;
866 if (m_size && m_size < size)
867 size = m_size;
868 if (p_size && p_size < size)
869 size = p_size;
870 } else {
871 if (m_size)
872 size = m_size;
873 if (p_size)
874 size = p_size;
875 }
876 }
877
878 if (size == 0)
879 dev_err(DEV, "Both nodes diskless!\n");
880
881 if (u_size) {
882 if (u_size > size)
883 dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
884 (unsigned long)u_size>>1, (unsigned long)size>>1);
885 else
886 size = u_size;
887 }
888
889 return size;
890}
891
892/**
893 * drbd_check_al_size() - Ensures that the AL is of the right size
894 * @mdev: DRBD device.
895 *
896 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
897 * failed, and 0 on success. You should call drbd_md_sync() after you called
898 * this function.
899 */
f399002e 900static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
b411b363
PR
901{
902 struct lru_cache *n, *t;
903 struct lc_element *e;
904 unsigned int in_use;
905 int i;
906
f399002e
LE
907 if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN))
908 dc->al_extents = DRBD_AL_EXTENTS_MIN;
b411b363
PR
909
910 if (mdev->act_log &&
f399002e 911 mdev->act_log->nr_elements == dc->al_extents)
b411b363
PR
912 return 0;
913
914 in_use = 0;
915 t = mdev->act_log;
7ad651b5 916 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
f399002e 917 dc->al_extents, sizeof(struct lc_element), 0);
b411b363
PR
918
919 if (n == NULL) {
920 dev_err(DEV, "Cannot allocate act_log lru!\n");
921 return -ENOMEM;
922 }
923 spin_lock_irq(&mdev->al_lock);
924 if (t) {
925 for (i = 0; i < t->nr_elements; i++) {
926 e = lc_element_by_index(t, i);
927 if (e->refcnt)
928 dev_err(DEV, "refcnt(%d)==%d\n",
929 e->lc_number, e->refcnt);
930 in_use += e->refcnt;
931 }
932 }
933 if (!in_use)
934 mdev->act_log = n;
935 spin_unlock_irq(&mdev->al_lock);
936 if (in_use) {
937 dev_err(DEV, "Activity log still in use!\n");
938 lc_destroy(n);
939 return -EBUSY;
940 } else {
941 if (t)
942 lc_destroy(t);
943 }
944 drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
945 return 0;
946}
947
99432fcc 948static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
b411b363
PR
949{
950 struct request_queue * const q = mdev->rq_queue;
99432fcc
PR
951 int max_hw_sectors = max_bio_size >> 9;
952 int max_segments = 0;
953
954 if (get_ldev_if_state(mdev, D_ATTACHING)) {
955 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
956
957 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
958 max_segments = mdev->ldev->dc.max_bio_bvecs;
959 put_ldev(mdev);
960 }
b411b363 961
b411b363 962 blk_queue_logical_block_size(q, 512);
1816a2b4
LE
963 blk_queue_max_hw_sectors(q, max_hw_sectors);
964 /* This is the workaround for "bio would need to, but cannot, be split" */
965 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
966 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
b411b363 967
99432fcc
PR
968 if (get_ldev_if_state(mdev, D_ATTACHING)) {
969 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
970
971 blk_queue_stack_limits(q, b);
972
973 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
974 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
975 q->backing_dev_info.ra_pages,
976 b->backing_dev_info.ra_pages);
977 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
978 }
979 put_ldev(mdev);
980 }
981}
982
983void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
984{
985 int now, new, local, peer;
986
987 now = queue_max_hw_sectors(mdev->rq_queue) << 9;
988 local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
989 peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
b411b363 990
99432fcc
PR
991 if (get_ldev_if_state(mdev, D_ATTACHING)) {
992 local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
993 mdev->local_max_bio_size = local;
994 put_ldev(mdev);
b411b363 995 }
99432fcc
PR
996
997 /* We may ignore peer limits if the peer is modern enough.
998 Because new from 8.3.8 onwards the peer can use multiple
999 BIOs for a single peer_request */
1000 if (mdev->state.conn >= C_CONNECTED) {
31890f4a 1001 if (mdev->tconn->agreed_pro_version < 94)
99432fcc 1002 peer = mdev->peer_max_bio_size;
31890f4a 1003 else if (mdev->tconn->agreed_pro_version == 94)
99432fcc
PR
1004 peer = DRBD_MAX_SIZE_H80_PACKET;
1005 else /* drbd 8.3.8 onwards */
1006 peer = DRBD_MAX_BIO_SIZE;
1007 }
1008
1009 new = min_t(int, local, peer);
1010
1011 if (mdev->state.role == R_PRIMARY && new < now)
1012 dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
1013
1014 if (new != now)
1015 dev_info(DEV, "max BIO size = %u\n", new);
1016
1017 drbd_setup_queue_param(mdev, new);
b411b363
PR
1018}
1019
1020/* serialize deconfig (worker exiting, doing cleanup)
1021 * and reconfig (drbdsetup disk, drbdsetup net)
1022 *
c518d04f
LE
1023 * Wait for a potentially exiting worker, then restart it,
1024 * or start a new one. Flush any pending work, there may still be an
1025 * after_state_change queued.
b411b363 1026 */
0e29d163 1027static void conn_reconfig_start(struct drbd_tconn *tconn)
b411b363 1028{
0e29d163
PR
1029 wait_event(tconn->ping_wait, !test_and_set_bit(CONFIG_PENDING, &tconn->flags));
1030 wait_event(tconn->ping_wait, !test_bit(OBJECT_DYING, &tconn->flags));
1031 drbd_thread_start(&tconn->worker);
1032 conn_flush_workqueue(tconn);
b411b363
PR
1033}
1034
1035/* if still unconfigured, stops worker again.
1036 * if configured now, clears CONFIG_PENDING.
1037 * wakes potential waiters */
0e29d163 1038static void conn_reconfig_done(struct drbd_tconn *tconn)
b411b363 1039{
0e29d163
PR
1040 spin_lock_irq(&tconn->req_lock);
1041 if (conn_all_vols_unconf(tconn)) {
1042 set_bit(OBJECT_DYING, &tconn->flags);
1043 drbd_thread_stop_nowait(&tconn->worker);
b411b363 1044 } else
0e29d163
PR
1045 clear_bit(CONFIG_PENDING, &tconn->flags);
1046 spin_unlock_irq(&tconn->req_lock);
1047 wake_up(&tconn->ping_wait);
b411b363
PR
1048}
1049
0778286a
PR
1050/* Make sure IO is suspended before calling this function(). */
1051static void drbd_suspend_al(struct drbd_conf *mdev)
1052{
1053 int s = 0;
1054
61610420 1055 if (!lc_try_lock(mdev->act_log)) {
0778286a
PR
1056 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
1057 return;
1058 }
1059
61610420 1060 drbd_al_shrink(mdev);
87eeee41 1061 spin_lock_irq(&mdev->tconn->req_lock);
0778286a
PR
1062 if (mdev->state.conn < C_CONNECTED)
1063 s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
87eeee41 1064 spin_unlock_irq(&mdev->tconn->req_lock);
61610420 1065 lc_unlock(mdev->act_log);
0778286a
PR
1066
1067 if (s)
1068 dev_info(DEV, "Suspended AL updates\n");
1069}
1070
f399002e
LE
1071int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1072{
1073 enum drbd_ret_code retcode;
1074 struct drbd_conf *mdev;
1075 struct disk_conf *ndc; /* new disk conf */
1076 int err, fifo_size;
1077 int *rs_plan_s = NULL;
1078
1079 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1080 if (!adm_ctx.reply_skb)
1081 return retcode;
1082 if (retcode != NO_ERROR)
1083 goto out;
1084
1085 mdev = adm_ctx.mdev;
1086
1087 /* we also need a disk
1088 * to change the options on */
1089 if (!get_ldev(mdev)) {
1090 retcode = ERR_NO_DISK;
1091 goto out;
1092 }
1093
1094/* FIXME freeze IO, cluster wide.
1095 *
1096 * We should make sure no-one uses
1097 * some half-updated struct when we
1098 * assign it later. */
1099
1100 ndc = kmalloc(sizeof(*ndc), GFP_KERNEL);
1101 if (!ndc) {
1102 retcode = ERR_NOMEM;
1103 goto fail;
1104 }
1105
1106 memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc));
1107 err = disk_conf_from_attrs_for_change(ndc, info);
1108 if (err) {
1109 retcode = ERR_MANDATORY_TAG;
1110 drbd_msg_put_info(from_attrs_err_to_txt(err));
1111 }
1112
1113 if (!expect(ndc->resync_rate >= 1))
1114 ndc->resync_rate = 1;
1115
1116 /* clip to allowed range */
1117 if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN))
1118 ndc->al_extents = DRBD_AL_EXTENTS_MIN;
1119 if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX))
1120 ndc->al_extents = DRBD_AL_EXTENTS_MAX;
1121
1122 /* most sanity checks done, try to assign the new sync-after
1123 * dependency. need to hold the global lock in there,
1124 * to avoid a race in the dependency loop check. */
1125 retcode = drbd_alter_sa(mdev, ndc->resync_after);
1126 if (retcode != NO_ERROR)
1127 goto fail;
1128
1129 fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1130 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
1131 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
1132 if (!rs_plan_s) {
1133 dev_err(DEV, "kmalloc of fifo_buffer failed");
1134 retcode = ERR_NOMEM;
1135 goto fail;
1136 }
1137 }
1138
1139 if (fifo_size != mdev->rs_plan_s.size) {
1140 kfree(mdev->rs_plan_s.values);
1141 mdev->rs_plan_s.values = rs_plan_s;
1142 mdev->rs_plan_s.size = fifo_size;
1143 mdev->rs_planed = 0;
1144 rs_plan_s = NULL;
1145 }
1146
1147 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
1148 drbd_al_shrink(mdev);
1149 err = drbd_check_al_size(mdev, ndc);
1150 lc_unlock(mdev->act_log);
1151 wake_up(&mdev->al_wait);
1152
1153 if (err) {
1154 retcode = ERR_NOMEM;
1155 goto fail;
1156 }
1157
1158 /* FIXME
1159 * To avoid someone looking at a half-updated struct, we probably
1160 * should have a rw-semaphor on net_conf and disk_conf.
1161 */
1162 mdev->ldev->dc = *ndc;
1163
1164 drbd_md_sync(mdev);
1165
1166
1167 if (mdev->state.conn >= C_CONNECTED)
1168 drbd_send_sync_param(mdev);
1169
1170 fail:
1171 put_ldev(mdev);
1172 kfree(ndc);
1173 kfree(rs_plan_s);
1174 out:
1175 drbd_adm_finish(info, retcode);
1176 return 0;
1177}
1178
3b98c0c2 1179int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
b411b363 1180{
3b98c0c2
LE
1181 struct drbd_conf *mdev;
1182 int err;
116676ca 1183 enum drbd_ret_code retcode;
b411b363
PR
1184 enum determine_dev_size dd;
1185 sector_t max_possible_sectors;
1186 sector_t min_md_device_sectors;
1187 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
e525fd89 1188 struct block_device *bdev;
b411b363
PR
1189 struct lru_cache *resync_lru = NULL;
1190 union drbd_state ns, os;
f2024e7c 1191 enum drbd_state_rv rv;
b411b363 1192 int cp_discovered = 0;
b411b363 1193
3b98c0c2
LE
1194 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1195 if (!adm_ctx.reply_skb)
1196 return retcode;
1197 if (retcode != NO_ERROR)
40cbf085 1198 goto finish;
3b98c0c2
LE
1199
1200 mdev = adm_ctx.mdev;
0e29d163 1201 conn_reconfig_start(mdev->tconn);
b411b363
PR
1202
1203 /* if you want to reconfigure, please tear down first */
1204 if (mdev->state.disk > D_DISKLESS) {
1205 retcode = ERR_DISK_CONFIGURED;
1206 goto fail;
1207 }
82f59cc6
LE
1208 /* It may just now have detached because of IO error. Make sure
1209 * drbd_ldev_destroy is done already, we may end up here very fast,
1210 * e.g. if someone calls attach from the on-io-error handler,
1211 * to realize a "hot spare" feature (not that I'd recommend that) */
1212 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
b411b363 1213
3b98c0c2 1214 /* allocation not in the IO path, drbdsetup context */
b411b363
PR
1215 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1216 if (!nbc) {
1217 retcode = ERR_NOMEM;
1218 goto fail;
1219 }
1220
f399002e
LE
1221 nbc->dc = (struct disk_conf) {
1222 {}, 0, /* backing_dev */
1223 {}, 0, /* meta_dev */
1224 0, /* meta_dev_idx */
1225 DRBD_DISK_SIZE_SECT_DEF, /* disk_size */
1226 DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */
1227 DRBD_ON_IO_ERROR_DEF, /* on_io_error */
1228 DRBD_FENCING_DEF, /* fencing */
1229 DRBD_RATE_DEF, /* resync_rate */
1230 DRBD_AFTER_DEF, /* resync_after */
1231 DRBD_AL_EXTENTS_DEF, /* al_extents */
1232 DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */
1233 DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */
1234 DRBD_C_FILL_TARGET_DEF, /* c_fill_target */
1235 DRBD_C_MAX_RATE_DEF, /* c_max_rate */
1236 DRBD_C_MIN_RATE_DEF, /* c_min_rate */
1237 0, /* no_disk_barrier */
1238 0, /* no_disk_flush */
1239 0, /* no_disk_drain */
1240 0, /* no_md_flush */
1241 };
1242
1243 err = disk_conf_from_attrs(&nbc->dc, info);
3b98c0c2 1244 if (err) {
b411b363 1245 retcode = ERR_MANDATORY_TAG;
3b98c0c2 1246 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
1247 goto fail;
1248 }
1249
3b98c0c2 1250 if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
b411b363
PR
1251 retcode = ERR_MD_IDX_INVALID;
1252 goto fail;
1253 }
1254
b2fb6dbe 1255 if (get_net_conf(mdev->tconn)) {
89e58e75 1256 int prot = mdev->tconn->net_conf->wire_protocol;
b2fb6dbe 1257 put_net_conf(mdev->tconn);
47ff2d0a
PR
1258 if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
1259 retcode = ERR_STONITH_AND_PROT_A;
1260 goto fail;
1261 }
1262 }
1263
d4d77629
TH
1264 bdev = blkdev_get_by_path(nbc->dc.backing_dev,
1265 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
e525fd89 1266 if (IS_ERR(bdev)) {
b411b363 1267 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
e525fd89 1268 PTR_ERR(bdev));
b411b363
PR
1269 retcode = ERR_OPEN_DISK;
1270 goto fail;
1271 }
e525fd89
TH
1272 nbc->backing_bdev = bdev;
1273
1274 /*
1275 * meta_dev_idx >= 0: external fixed size, possibly multiple
1276 * drbd sharing one meta device. TODO in that case, paranoia
1277 * check that [md_bdev, meta_dev_idx] is not yet used by some
1278 * other drbd minor! (if you use drbd.conf + drbdadm, that
1279 * should check it for you already; but if you don't, or
1280 * someone fooled it, we need to double check here)
1281 */
d4d77629
TH
1282 bdev = blkdev_get_by_path(nbc->dc.meta_dev,
1283 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
3b98c0c2 1284 ((int)nbc->dc.meta_dev_idx < 0) ?
d4d77629 1285 (void *)mdev : (void *)drbd_m_holder);
e525fd89 1286 if (IS_ERR(bdev)) {
b411b363 1287 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
e525fd89 1288 PTR_ERR(bdev));
b411b363
PR
1289 retcode = ERR_OPEN_MD_DISK;
1290 goto fail;
1291 }
e525fd89 1292 nbc->md_bdev = bdev;
b411b363 1293
e525fd89
TH
1294 if ((nbc->backing_bdev == nbc->md_bdev) !=
1295 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1296 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1297 retcode = ERR_MD_IDX_INVALID;
b411b363
PR
1298 goto fail;
1299 }
1300
1301 resync_lru = lc_create("resync", drbd_bm_ext_cache,
46a15bc3 1302 1, 61, sizeof(struct bm_extent),
b411b363
PR
1303 offsetof(struct bm_extent, lce));
1304 if (!resync_lru) {
1305 retcode = ERR_NOMEM;
e525fd89 1306 goto fail;
b411b363
PR
1307 }
1308
1309 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
1310 drbd_md_set_sector_offsets(mdev, nbc);
1311
1312 if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
1313 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1314 (unsigned long long) drbd_get_max_capacity(nbc),
1315 (unsigned long long) nbc->dc.disk_size);
1316 retcode = ERR_DISK_TO_SMALL;
e525fd89 1317 goto fail;
b411b363
PR
1318 }
1319
3b98c0c2 1320 if ((int)nbc->dc.meta_dev_idx < 0) {
b411b363
PR
1321 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1322 /* at least one MB, otherwise it does not make sense */
1323 min_md_device_sectors = (2<<10);
1324 } else {
1325 max_possible_sectors = DRBD_MAX_SECTORS;
1326 min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
1327 }
1328
b411b363
PR
1329 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1330 retcode = ERR_MD_DISK_TO_SMALL;
1331 dev_warn(DEV, "refusing attach: md-device too small, "
1332 "at least %llu sectors needed for this meta-disk type\n",
1333 (unsigned long long) min_md_device_sectors);
e525fd89 1334 goto fail;
b411b363
PR
1335 }
1336
1337 /* Make sure the new disk is big enough
1338 * (we may currently be R_PRIMARY with no local disk...) */
1339 if (drbd_get_max_capacity(nbc) <
1340 drbd_get_capacity(mdev->this_bdev)) {
1341 retcode = ERR_DISK_TO_SMALL;
e525fd89 1342 goto fail;
b411b363
PR
1343 }
1344
1345 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1346
1352994b
LE
1347 if (nbc->known_size > max_possible_sectors) {
1348 dev_warn(DEV, "==> truncating very big lower level device "
1349 "to currently maximum possible %llu sectors <==\n",
1350 (unsigned long long) max_possible_sectors);
3b98c0c2 1351 if ((int)nbc->dc.meta_dev_idx >= 0)
1352994b
LE
1352 dev_warn(DEV, "==>> using internal or flexible "
1353 "meta data may help <<==\n");
1354 }
1355
b411b363
PR
1356 drbd_suspend_io(mdev);
1357 /* also wait for the last barrier ack. */
fb22c402 1358 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
b411b363 1359 /* and for any other previously queued work */
a21e9298 1360 drbd_flush_workqueue(mdev);
b411b363 1361
f2024e7c
AG
1362 rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1363 retcode = rv; /* FIXME: Type mismatch. */
b411b363 1364 drbd_resume_io(mdev);
f2024e7c 1365 if (rv < SS_SUCCESS)
e525fd89 1366 goto fail;
b411b363
PR
1367
1368 if (!get_ldev_if_state(mdev, D_ATTACHING))
1369 goto force_diskless;
1370
1371 drbd_md_set_sector_offsets(mdev, nbc);
1372
1373 if (!mdev->bitmap) {
1374 if (drbd_bm_init(mdev)) {
1375 retcode = ERR_NOMEM;
1376 goto force_diskless_dec;
1377 }
1378 }
1379
1380 retcode = drbd_md_read(mdev, nbc);
1381 if (retcode != NO_ERROR)
1382 goto force_diskless_dec;
1383
1384 if (mdev->state.conn < C_CONNECTED &&
1385 mdev->state.role == R_PRIMARY &&
1386 (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1387 dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
1388 (unsigned long long)mdev->ed_uuid);
1389 retcode = ERR_DATA_NOT_CURRENT;
1390 goto force_diskless_dec;
1391 }
1392
1393 /* Since we are diskless, fix the activity log first... */
f399002e 1394 if (drbd_check_al_size(mdev, &nbc->dc)) {
b411b363
PR
1395 retcode = ERR_NOMEM;
1396 goto force_diskless_dec;
1397 }
1398
1399 /* Prevent shrinking of consistent devices ! */
1400 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
a393db6f 1401 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
b411b363
PR
1402 dev_warn(DEV, "refusing to truncate a consistent device\n");
1403 retcode = ERR_DISK_TO_SMALL;
1404 goto force_diskless_dec;
1405 }
1406
1407 if (!drbd_al_read_log(mdev, nbc)) {
1408 retcode = ERR_IO_MD_DISK;
1409 goto force_diskless_dec;
1410 }
1411
b411b363
PR
1412 /* Reset the "barriers don't work" bits here, then force meta data to
1413 * be written, to ensure we determine if barriers are supported. */
1414 if (nbc->dc.no_md_flush)
a8a4e51e 1415 set_bit(MD_NO_FUA, &mdev->flags);
b411b363 1416 else
a8a4e51e 1417 clear_bit(MD_NO_FUA, &mdev->flags);
b411b363
PR
1418
1419 /* Point of no return reached.
1420 * Devices and memory are no longer released by error cleanup below.
1421 * now mdev takes over responsibility, and the state engine should
1422 * clean it up somewhere. */
1423 D_ASSERT(mdev->ldev == NULL);
1424 mdev->ldev = nbc;
1425 mdev->resync = resync_lru;
1426 nbc = NULL;
1427 resync_lru = NULL;
1428
2451fc3b
PR
1429 mdev->write_ordering = WO_bdev_flush;
1430 drbd_bump_write_ordering(mdev, WO_bdev_flush);
b411b363
PR
1431
1432 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
1433 set_bit(CRASHED_PRIMARY, &mdev->flags);
1434 else
1435 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1436
894c6a94 1437 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
fb22c402 1438 !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
b411b363
PR
1439 set_bit(CRASHED_PRIMARY, &mdev->flags);
1440 cp_discovered = 1;
1441 }
1442
1443 mdev->send_cnt = 0;
1444 mdev->recv_cnt = 0;
1445 mdev->read_cnt = 0;
1446 mdev->writ_cnt = 0;
1447
99432fcc 1448 drbd_reconsider_max_bio_size(mdev);
b411b363
PR
1449
1450 /* If I am currently not R_PRIMARY,
1451 * but meta data primary indicator is set,
1452 * I just now recover from a hard crash,
1453 * and have been R_PRIMARY before that crash.
1454 *
1455 * Now, if I had no connection before that crash
1456 * (have been degraded R_PRIMARY), chances are that
1457 * I won't find my peer now either.
1458 *
1459 * In that case, and _only_ in that case,
1460 * we use the degr-wfc-timeout instead of the default,
1461 * so we can automatically recover from a crash of a
1462 * degraded but active "cluster" after a certain timeout.
1463 */
1464 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
1465 if (mdev->state.role != R_PRIMARY &&
1466 drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1467 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1468 set_bit(USE_DEGR_WFC_T, &mdev->flags);
1469
24c4830c 1470 dd = drbd_determine_dev_size(mdev, 0);
b411b363
PR
1471 if (dd == dev_size_error) {
1472 retcode = ERR_NOMEM_BITMAP;
1473 goto force_diskless_dec;
1474 } else if (dd == grew)
1475 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
1476
1477 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1478 dev_info(DEV, "Assuming that all blocks are out of sync "
1479 "(aka FullSync)\n");
20ceb2b2
LE
1480 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
1481 "set_n_write from attaching", BM_LOCKED_MASK)) {
b411b363
PR
1482 retcode = ERR_IO_MD_DISK;
1483 goto force_diskless_dec;
1484 }
1485 } else {
20ceb2b2 1486 if (drbd_bitmap_io(mdev, &drbd_bm_read,
22ab6a30 1487 "read from attaching", BM_LOCKED_MASK)) {
b411b363
PR
1488 retcode = ERR_IO_MD_DISK;
1489 goto force_diskless_dec;
1490 }
1491 }
1492
1493 if (cp_discovered) {
1494 drbd_al_apply_to_bm(mdev);
20ceb2b2
LE
1495 if (drbd_bitmap_io(mdev, &drbd_bm_write,
1496 "crashed primary apply AL", BM_LOCKED_MASK)) {
19f843aa
LE
1497 retcode = ERR_IO_MD_DISK;
1498 goto force_diskless_dec;
1499 }
b411b363
PR
1500 }
1501
0778286a
PR
1502 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1503 drbd_suspend_al(mdev); /* IO is still suspended here... */
1504
87eeee41 1505 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
1506 os = mdev->state;
1507 ns.i = os.i;
1508 /* If MDF_CONSISTENT is not set go into inconsistent state,
1509 otherwise investigate MDF_WasUpToDate...
1510 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1511 otherwise into D_CONSISTENT state.
1512 */
1513 if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
1514 if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
1515 ns.disk = D_CONSISTENT;
1516 else
1517 ns.disk = D_OUTDATED;
1518 } else {
1519 ns.disk = D_INCONSISTENT;
1520 }
1521
1522 if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
1523 ns.pdsk = D_OUTDATED;
1524
1525 if ( ns.disk == D_CONSISTENT &&
1526 (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
1527 ns.disk = D_UP_TO_DATE;
1528
1529 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1530 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1531 this point, because drbd_request_state() modifies these
1532 flags. */
1533
1534 /* In case we are C_CONNECTED postpone any decision on the new disk
1535 state after the negotiation phase. */
1536 if (mdev->state.conn == C_CONNECTED) {
1537 mdev->new_state_tmp.i = ns.i;
1538 ns.i = os.i;
1539 ns.disk = D_NEGOTIATING;
dc66c74d
PR
1540
1541 /* We expect to receive up-to-date UUIDs soon.
1542 To avoid a race in receive_state, free p_uuid while
1543 holding req_lock. I.e. atomic with the state change */
1544 kfree(mdev->p_uuid);
1545 mdev->p_uuid = NULL;
b411b363
PR
1546 }
1547
1548 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
1549 ns = mdev->state;
87eeee41 1550 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
1551
1552 if (rv < SS_SUCCESS)
1553 goto force_diskless_dec;
1554
1555 if (mdev->state.role == R_PRIMARY)
1556 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1557 else
1558 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1559
1560 drbd_md_mark_dirty(mdev);
1561 drbd_md_sync(mdev);
1562
1563 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1564 put_ldev(mdev);
0e29d163 1565 conn_reconfig_done(mdev->tconn);
3b98c0c2 1566 drbd_adm_finish(info, retcode);
b411b363
PR
1567 return 0;
1568
1569 force_diskless_dec:
1570 put_ldev(mdev);
1571 force_diskless:
82f59cc6 1572 drbd_force_state(mdev, NS(disk, D_FAILED));
b411b363 1573 drbd_md_sync(mdev);
b411b363 1574 fail:
40cbf085 1575 conn_reconfig_done(mdev->tconn);
b411b363 1576 if (nbc) {
e525fd89
TH
1577 if (nbc->backing_bdev)
1578 blkdev_put(nbc->backing_bdev,
1579 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1580 if (nbc->md_bdev)
1581 blkdev_put(nbc->md_bdev,
1582 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
b411b363
PR
1583 kfree(nbc);
1584 }
1585 lc_destroy(resync_lru);
1586
40cbf085 1587 finish:
3b98c0c2 1588 drbd_adm_finish(info, retcode);
b411b363
PR
1589 return 0;
1590}
1591
85f75dd7
LE
1592static int adm_detach(struct drbd_conf *mdev)
1593{
1594 enum drbd_ret_code retcode;
1595 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1596 retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1597 wait_event(mdev->misc_wait,
1598 mdev->state.disk != D_DISKLESS ||
1599 !atomic_read(&mdev->local_cnt));
1600 drbd_resume_io(mdev);
1601 return retcode;
1602}
1603
82f59cc6
LE
1604/* Detaching the disk is a process in multiple stages. First we need to lock
1605 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1606 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1607 * internal references as well.
1608 * Only then we have finally detached. */
3b98c0c2 1609int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
b411b363 1610{
9a0d9d03 1611 enum drbd_ret_code retcode;
3b98c0c2
LE
1612
1613 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1614 if (!adm_ctx.reply_skb)
1615 return retcode;
1616 if (retcode != NO_ERROR)
1617 goto out;
1618
85f75dd7 1619 retcode = adm_detach(adm_ctx.mdev);
3b98c0c2
LE
1620out:
1621 drbd_adm_finish(info, retcode);
b411b363
PR
1622 return 0;
1623}
1624
f399002e
LE
1625static bool conn_resync_running(struct drbd_tconn *tconn)
1626{
1627 struct drbd_conf *mdev;
1628 int vnr;
1629
1630 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1631 if (mdev->state.conn == C_SYNC_SOURCE ||
1632 mdev->state.conn == C_SYNC_TARGET ||
1633 mdev->state.conn == C_PAUSED_SYNC_S ||
1634 mdev->state.conn == C_PAUSED_SYNC_T)
1635 return true;
1636 }
1637 return false;
1638}
1639
1640static bool conn_ov_running(struct drbd_tconn *tconn)
1641{
1642 struct drbd_conf *mdev;
1643 int vnr;
1644
1645 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1646 if (mdev->state.conn == C_VERIFY_S ||
1647 mdev->state.conn == C_VERIFY_T)
1648 return true;
1649 }
1650 return false;
1651}
1652
1653int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
1654{
1655 enum drbd_ret_code retcode;
1656 struct drbd_tconn *tconn;
1657 struct net_conf *new_conf = NULL;
1658 int err;
1659 int ovr; /* online verify running */
1660 int rsr; /* re-sync running */
1661 struct crypto_hash *verify_tfm = NULL;
1662 struct crypto_hash *csums_tfm = NULL;
1663
1664
1665 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
1666 if (!adm_ctx.reply_skb)
1667 return retcode;
1668 if (retcode != NO_ERROR)
1669 goto out;
1670
1671 tconn = adm_ctx.tconn;
1672
1673 new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
1674 if (!new_conf) {
1675 retcode = ERR_NOMEM;
1676 goto out;
1677 }
1678
1679 /* we also need a net config
1680 * to change the options on */
1681 if (!get_net_conf(tconn)) {
1682 drbd_msg_put_info("net conf missing, try connect");
1683 retcode = ERR_INVALID_REQUEST;
1684 goto out;
1685 }
1686
1687 conn_reconfig_start(tconn);
1688
1689 memcpy(new_conf, tconn->net_conf, sizeof(*new_conf));
1690 err = net_conf_from_attrs_for_change(new_conf, info);
1691 if (err) {
1692 retcode = ERR_MANDATORY_TAG;
1693 drbd_msg_put_info(from_attrs_err_to_txt(err));
1694 goto fail;
1695 }
1696
1697 /* re-sync running */
1698 rsr = conn_resync_running(tconn);
1699 if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) {
1700 retcode = ERR_CSUMS_RESYNC_RUNNING;
1701 goto fail;
1702 }
1703
1704 if (!rsr && new_conf->csums_alg[0]) {
1705 csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC);
1706 if (IS_ERR(csums_tfm)) {
1707 csums_tfm = NULL;
1708 retcode = ERR_CSUMS_ALG;
1709 goto fail;
1710 }
1711
1712 if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
1713 retcode = ERR_CSUMS_ALG_ND;
1714 goto fail;
1715 }
1716 }
1717
1718 /* online verify running */
1719 ovr = conn_ov_running(tconn);
1720 if (ovr) {
1721 if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) {
1722 retcode = ERR_VERIFY_RUNNING;
1723 goto fail;
1724 }
1725 }
1726
1727 if (!ovr && new_conf->verify_alg[0]) {
1728 verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC);
1729 if (IS_ERR(verify_tfm)) {
1730 verify_tfm = NULL;
1731 retcode = ERR_VERIFY_ALG;
1732 goto fail;
1733 }
1734
1735 if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
1736 retcode = ERR_VERIFY_ALG_ND;
1737 goto fail;
1738 }
1739 }
1740
1741
1742 /* For now, use struct assignment, not pointer assignment.
1743 * We don't have any means to determine who might still
1744 * keep a local alias into the struct,
1745 * so we cannot just free it and hope for the best :(
1746 * FIXME
1747 * To avoid someone looking at a half-updated struct, we probably
1748 * should have a rw-semaphor on net_conf and disk_conf.
1749 */
1750 *tconn->net_conf = *new_conf;
1751
1752 if (!rsr) {
1753 crypto_free_hash(tconn->csums_tfm);
1754 tconn->csums_tfm = csums_tfm;
1755 csums_tfm = NULL;
1756 }
1757 if (!ovr) {
1758 crypto_free_hash(tconn->verify_tfm);
1759 tconn->verify_tfm = verify_tfm;
1760 verify_tfm = NULL;
1761 }
1762
1763 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1764 drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
1765
1766 fail:
1767 crypto_free_hash(csums_tfm);
1768 crypto_free_hash(verify_tfm);
1769 kfree(new_conf);
1770 put_net_conf(tconn);
1771 conn_reconfig_done(tconn);
1772 out:
1773 drbd_adm_finish(info, retcode);
1774 return 0;
1775}
1776
3b98c0c2 1777int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
b411b363 1778{
3b98c0c2
LE
1779 char hmac_name[CRYPTO_MAX_ALG_NAME];
1780 struct drbd_conf *mdev;
b411b363
PR
1781 struct net_conf *new_conf = NULL;
1782 struct crypto_hash *tfm = NULL;
1783 struct crypto_hash *integrity_w_tfm = NULL;
1784 struct crypto_hash *integrity_r_tfm = NULL;
b411b363
PR
1785 void *int_dig_out = NULL;
1786 void *int_dig_in = NULL;
1787 void *int_dig_vv = NULL;
80883197 1788 struct drbd_tconn *oconn;
3b98c0c2 1789 struct drbd_tconn *tconn;
b411b363 1790 struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
3b98c0c2
LE
1791 enum drbd_ret_code retcode;
1792 int i;
1793 int err;
b411b363 1794
3b98c0c2
LE
1795 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
1796 if (!adm_ctx.reply_skb)
1797 return retcode;
1798 if (retcode != NO_ERROR)
1799 goto out;
1800
1801 tconn = adm_ctx.tconn;
80883197 1802 conn_reconfig_start(tconn);
b411b363 1803
80883197 1804 if (tconn->cstate > C_STANDALONE) {
b411b363
PR
1805 retcode = ERR_NET_CONFIGURED;
1806 goto fail;
1807 }
1808
1809 /* allocation not in the IO path, cqueue thread context */
f399002e 1810 new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
b411b363
PR
1811 if (!new_conf) {
1812 retcode = ERR_NOMEM;
1813 goto fail;
1814 }
1815
f399002e
LE
1816 *new_conf = (struct net_conf) {
1817 {}, 0, /* my_addr */
1818 {}, 0, /* peer_addr */
1819 {}, 0, /* shared_secret */
1820 {}, 0, /* cram_hmac_alg */
1821 {}, 0, /* integrity_alg */
1822 {}, 0, /* verify_alg */
1823 {}, 0, /* csums_alg */
1824 DRBD_PROTOCOL_DEF, /* wire_protocol */
1825 DRBD_CONNECT_INT_DEF, /* try_connect_int */
1826 DRBD_TIMEOUT_DEF, /* timeout */
1827 DRBD_PING_INT_DEF, /* ping_int */
1828 DRBD_PING_TIMEO_DEF, /* ping_timeo */
1829 DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */
1830 DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */
1831 DRBD_KO_COUNT_DEF, /* ko_count */
1832 DRBD_MAX_BUFFERS_DEF, /* max_buffers */
1833 DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */
1834 DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */
1835 DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */
1836 DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */
1837 DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */
1838 DRBD_RR_CONFLICT_DEF, /* rr_conflict */
1839 DRBD_ON_CONGESTION_DEF, /* on_congestion */
1840 DRBD_CONG_FILL_DEF, /* cong_fill */
1841 DRBD_CONG_EXTENTS_DEF, /* cong_extents */
1842 0, /* two_primaries */
1843 0, /* want_lose */
1844 0, /* no_cork */
1845 0, /* always_asbp */
1846 0, /* dry_run */
1847 0, /* use_rle */
1848 };
1849
1850 err = net_conf_from_attrs(new_conf, info);
3b98c0c2 1851 if (err) {
b411b363 1852 retcode = ERR_MANDATORY_TAG;
3b98c0c2 1853 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
1854 goto fail;
1855 }
1856
1857 if (new_conf->two_primaries
1858 && (new_conf->wire_protocol != DRBD_PROT_C)) {
1859 retcode = ERR_NOT_PROTO_C;
1860 goto fail;
47ff2d0a
PR
1861 }
1862
80883197
PR
1863 idr_for_each_entry(&tconn->volumes, mdev, i) {
1864 if (get_ldev(mdev)) {
1865 enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
1866 put_ldev(mdev);
1867 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
1868 retcode = ERR_STONITH_AND_PROT_A;
1869 goto fail;
1870 }
1871 }
1872 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1873 retcode = ERR_DISCARD;
47ff2d0a
PR
1874 goto fail;
1875 }
80883197
PR
1876 if (!mdev->bitmap) {
1877 if(drbd_bm_init(mdev)) {
1878 retcode = ERR_NOMEM;
1879 goto fail;
1880 }
1881 }
47ff2d0a 1882 }
b411b363 1883
422028b1
PR
1884 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
1885 retcode = ERR_CONG_NOT_PROTO_A;
1886 goto fail;
1887 }
1888
b411b363
PR
1889 retcode = NO_ERROR;
1890
1891 new_my_addr = (struct sockaddr *)&new_conf->my_addr;
1892 new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
543cc10b
LE
1893
1894 /* No need to take drbd_cfg_mutex here. All reconfiguration is
1895 * strictly serialized on genl_lock(). We are protected against
1896 * concurrent reconfiguration/addition/deletion */
80883197
PR
1897 list_for_each_entry(oconn, &drbd_tconns, all_tconn) {
1898 if (oconn == tconn)
b411b363 1899 continue;
80883197
PR
1900 if (get_net_conf(oconn)) {
1901 taken_addr = (struct sockaddr *)&oconn->net_conf->my_addr;
1902 if (new_conf->my_addr_len == oconn->net_conf->my_addr_len &&
b411b363
PR
1903 !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
1904 retcode = ERR_LOCAL_ADDR;
1905
80883197
PR
1906 taken_addr = (struct sockaddr *)&oconn->net_conf->peer_addr;
1907 if (new_conf->peer_addr_len == oconn->net_conf->peer_addr_len &&
b411b363
PR
1908 !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
1909 retcode = ERR_PEER_ADDR;
1910
80883197 1911 put_net_conf(oconn);
b411b363
PR
1912 if (retcode != NO_ERROR)
1913 goto fail;
1914 }
1915 }
1916
1917 if (new_conf->cram_hmac_alg[0] != 0) {
1918 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
1919 new_conf->cram_hmac_alg);
1920 tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
1921 if (IS_ERR(tfm)) {
1922 tfm = NULL;
1923 retcode = ERR_AUTH_ALG;
1924 goto fail;
1925 }
1926
0798219f 1927 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
b411b363
PR
1928 retcode = ERR_AUTH_ALG_ND;
1929 goto fail;
1930 }
1931 }
1932
1933 if (new_conf->integrity_alg[0]) {
1934 integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1935 if (IS_ERR(integrity_w_tfm)) {
1936 integrity_w_tfm = NULL;
1937 retcode=ERR_INTEGRITY_ALG;
1938 goto fail;
1939 }
1940
1941 if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
1942 retcode=ERR_INTEGRITY_ALG_ND;
1943 goto fail;
1944 }
1945
1946 integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1947 if (IS_ERR(integrity_r_tfm)) {
1948 integrity_r_tfm = NULL;
1949 retcode=ERR_INTEGRITY_ALG;
1950 goto fail;
1951 }
1952 }
1953
b411b363
PR
1954 ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
1955
80883197 1956 /* allocation not in the IO path, cqueue thread context */
b411b363
PR
1957 if (integrity_w_tfm) {
1958 i = crypto_hash_digestsize(integrity_w_tfm);
1959 int_dig_out = kmalloc(i, GFP_KERNEL);
1960 if (!int_dig_out) {
1961 retcode = ERR_NOMEM;
1962 goto fail;
1963 }
1964 int_dig_in = kmalloc(i, GFP_KERNEL);
1965 if (!int_dig_in) {
1966 retcode = ERR_NOMEM;
1967 goto fail;
1968 }
1969 int_dig_vv = kmalloc(i, GFP_KERNEL);
1970 if (!int_dig_vv) {
1971 retcode = ERR_NOMEM;
1972 goto fail;
1973 }
1974 }
1975
80883197
PR
1976 conn_flush_workqueue(tconn);
1977 spin_lock_irq(&tconn->req_lock);
1978 if (tconn->net_conf != NULL) {
b411b363 1979 retcode = ERR_NET_CONFIGURED;
80883197 1980 spin_unlock_irq(&tconn->req_lock);
b411b363
PR
1981 goto fail;
1982 }
80883197 1983 tconn->net_conf = new_conf;
b411b363 1984
80883197
PR
1985 crypto_free_hash(tconn->cram_hmac_tfm);
1986 tconn->cram_hmac_tfm = tfm;
b411b363 1987
80883197
PR
1988 crypto_free_hash(tconn->integrity_w_tfm);
1989 tconn->integrity_w_tfm = integrity_w_tfm;
b411b363 1990
80883197
PR
1991 crypto_free_hash(tconn->integrity_r_tfm);
1992 tconn->integrity_r_tfm = integrity_r_tfm;
b411b363 1993
80883197
PR
1994 kfree(tconn->int_dig_out);
1995 kfree(tconn->int_dig_in);
1996 kfree(tconn->int_dig_vv);
1997 tconn->int_dig_out=int_dig_out;
1998 tconn->int_dig_in=int_dig_in;
1999 tconn->int_dig_vv=int_dig_vv;
2000 retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2001 spin_unlock_irq(&tconn->req_lock);
b411b363 2002
80883197
PR
2003 idr_for_each_entry(&tconn->volumes, mdev, i) {
2004 mdev->send_cnt = 0;
2005 mdev->recv_cnt = 0;
2006 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
2007 }
80883197 2008 conn_reconfig_done(tconn);
3b98c0c2 2009 drbd_adm_finish(info, retcode);
b411b363
PR
2010 return 0;
2011
2012fail:
2013 kfree(int_dig_out);
2014 kfree(int_dig_in);
2015 kfree(int_dig_vv);
2016 crypto_free_hash(tfm);
2017 crypto_free_hash(integrity_w_tfm);
2018 crypto_free_hash(integrity_r_tfm);
b411b363
PR
2019 kfree(new_conf);
2020
80883197 2021 conn_reconfig_done(tconn);
3b98c0c2
LE
2022out:
2023 drbd_adm_finish(info, retcode);
b411b363
PR
2024 return 0;
2025}
2026
85f75dd7
LE
2027static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
2028{
2029 enum drbd_state_rv rv;
2030 if (force) {
2031 spin_lock_irq(&tconn->req_lock);
2032 if (tconn->cstate >= C_WF_CONNECTION)
2033 _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2034 spin_unlock_irq(&tconn->req_lock);
2035 return SS_SUCCESS;
2036 }
2037
2038 rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
2039
2040 switch (rv) {
2041 case SS_NOTHING_TO_DO:
2042 case SS_ALREADY_STANDALONE:
2043 return SS_SUCCESS;
2044 case SS_PRIMARY_NOP:
2045 /* Our state checking code wants to see the peer outdated. */
2046 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
2047 pdsk, D_OUTDATED), CS_VERBOSE);
2048 break;
2049 case SS_CW_FAILED_BY_PEER:
2050 /* The peer probably wants to see us outdated. */
2051 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
2052 disk, D_OUTDATED), 0);
2053 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2054 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2055 rv = SS_SUCCESS;
2056 }
2057 break;
2058 default:;
2059 /* no special handling necessary */
2060 }
2061
2062 return rv;
2063}
2064
3b98c0c2 2065int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
b411b363 2066{
3b98c0c2
LE
2067 struct disconnect_parms parms;
2068 struct drbd_tconn *tconn;
85f75dd7 2069 enum drbd_state_rv rv;
3b98c0c2
LE
2070 enum drbd_ret_code retcode;
2071 int err;
2561b9c1 2072
3b98c0c2
LE
2073 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2074 if (!adm_ctx.reply_skb)
2075 return retcode;
2076 if (retcode != NO_ERROR)
2561b9c1 2077 goto fail;
3b98c0c2
LE
2078
2079 tconn = adm_ctx.tconn;
2080 memset(&parms, 0, sizeof(parms));
2081 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
f399002e 2082 err = disconnect_parms_from_attrs(&parms, info);
3b98c0c2
LE
2083 if (err) {
2084 retcode = ERR_MANDATORY_TAG;
2085 drbd_msg_put_info(from_attrs_err_to_txt(err));
2086 goto fail;
2087 }
2561b9c1
PR
2088 }
2089
85f75dd7
LE
2090 rv = conn_try_disconnect(tconn, parms.force_disconnect);
2091 if (rv < SS_SUCCESS)
b411b363
PR
2092 goto fail;
2093
df24aa45
PR
2094 if (wait_event_interruptible(tconn->ping_wait,
2095 tconn->cstate != C_DISCONNECTING)) {
b411b363
PR
2096 /* Do not test for mdev->state.conn == C_STANDALONE, since
2097 someone else might connect us in the mean time! */
2098 retcode = ERR_INTR;
2099 goto fail;
2100 }
2101
b411b363
PR
2102 retcode = NO_ERROR;
2103 fail:
3b98c0c2 2104 drbd_adm_finish(info, retcode);
b411b363
PR
2105 return 0;
2106}
2107
2108void resync_after_online_grow(struct drbd_conf *mdev)
2109{
2110 int iass; /* I am sync source */
2111
2112 dev_info(DEV, "Resync of new storage after online grow\n");
2113 if (mdev->state.role != mdev->state.peer)
2114 iass = (mdev->state.role == R_PRIMARY);
2115 else
25703f83 2116 iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
b411b363
PR
2117
2118 if (iass)
2119 drbd_start_resync(mdev, C_SYNC_SOURCE);
2120 else
2121 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2122}
2123
3b98c0c2 2124int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
b411b363 2125{
3b98c0c2
LE
2126 struct resize_parms rs;
2127 struct drbd_conf *mdev;
2128 enum drbd_ret_code retcode;
b411b363 2129 enum determine_dev_size dd;
6495d2c6 2130 enum dds_flags ddsf;
3b98c0c2 2131 int err;
b411b363 2132
3b98c0c2
LE
2133 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2134 if (!adm_ctx.reply_skb)
2135 return retcode;
2136 if (retcode != NO_ERROR)
b411b363 2137 goto fail;
3b98c0c2
LE
2138
2139 memset(&rs, 0, sizeof(struct resize_parms));
2140 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
f399002e 2141 err = resize_parms_from_attrs(&rs, info);
3b98c0c2
LE
2142 if (err) {
2143 retcode = ERR_MANDATORY_TAG;
2144 drbd_msg_put_info(from_attrs_err_to_txt(err));
2145 goto fail;
2146 }
b411b363
PR
2147 }
2148
3b98c0c2 2149 mdev = adm_ctx.mdev;
b411b363
PR
2150 if (mdev->state.conn > C_CONNECTED) {
2151 retcode = ERR_RESIZE_RESYNC;
2152 goto fail;
2153 }
2154
2155 if (mdev->state.role == R_SECONDARY &&
2156 mdev->state.peer == R_SECONDARY) {
2157 retcode = ERR_NO_PRIMARY;
2158 goto fail;
2159 }
2160
2161 if (!get_ldev(mdev)) {
2162 retcode = ERR_NO_DISK;
2163 goto fail;
2164 }
2165
31890f4a 2166 if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
6495d2c6
PR
2167 retcode = ERR_NEED_APV_93;
2168 goto fail;
2169 }
2170
087c2492 2171 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
b411b363 2172 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
b411b363
PR
2173
2174 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
6495d2c6 2175 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
24c4830c 2176 dd = drbd_determine_dev_size(mdev, ddsf);
b411b363
PR
2177 drbd_md_sync(mdev);
2178 put_ldev(mdev);
2179 if (dd == dev_size_error) {
2180 retcode = ERR_NOMEM_BITMAP;
2181 goto fail;
2182 }
2183
087c2492 2184 if (mdev->state.conn == C_CONNECTED) {
b411b363
PR
2185 if (dd == grew)
2186 set_bit(RESIZE_PENDING, &mdev->flags);
2187
2188 drbd_send_uuids(mdev);
6495d2c6 2189 drbd_send_sizes(mdev, 1, ddsf);
b411b363
PR
2190 }
2191
2192 fail:
3b98c0c2 2193 drbd_adm_finish(info, retcode);
b411b363
PR
2194 return 0;
2195}
2196
f399002e 2197int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
b411b363 2198{
3b98c0c2 2199 enum drbd_ret_code retcode;
b411b363 2200 cpumask_var_t new_cpu_mask;
f399002e 2201 struct drbd_tconn *tconn;
778f271d 2202 int *rs_plan_s = NULL;
f399002e
LE
2203 struct res_opts sc;
2204 int err;
b411b363 2205
f399002e 2206 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
3b98c0c2
LE
2207 if (!adm_ctx.reply_skb)
2208 return retcode;
2209 if (retcode != NO_ERROR)
2210 goto fail;
f399002e 2211 tconn = adm_ctx.tconn;
3b98c0c2 2212
b411b363
PR
2213 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
2214 retcode = ERR_NOMEM;
3b98c0c2 2215 drbd_msg_put_info("unable to allocate cpumask");
b411b363
PR
2216 goto fail;
2217 }
2218
3b98c0c2
LE
2219 if (((struct drbd_genlmsghdr*)info->userhdr)->flags
2220 & DRBD_GENL_F_SET_DEFAULTS) {
f399002e 2221 memset(&sc, 0, sizeof(struct res_opts));
265be2d0 2222 sc.on_no_data = DRBD_ON_NO_DATA_DEF;
b411b363 2223 } else
f399002e 2224 sc = tconn->res_opts;
b411b363 2225
f399002e 2226 err = res_opts_from_attrs(&sc, info);
3b98c0c2 2227 if (err) {
b411b363 2228 retcode = ERR_MANDATORY_TAG;
3b98c0c2 2229 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
2230 goto fail;
2231 }
2232
b411b363
PR
2233 /* silently ignore cpu mask on UP kernel */
2234 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
2235 err = __bitmap_parse(sc.cpu_mask, 32, 0,
2236 cpumask_bits(new_cpu_mask), nr_cpu_ids);
2237 if (err) {
f399002e 2238 conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
b411b363
PR
2239 retcode = ERR_CPU_MASK_PARSE;
2240 goto fail;
2241 }
2242 }
2243
b411b363 2244
f399002e 2245 tconn->res_opts = sc;
b411b363 2246
f399002e
LE
2247 if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
2248 cpumask_copy(tconn->cpu_mask, new_cpu_mask);
2249 drbd_calc_cpu_mask(tconn);
2250 tconn->receiver.reset_cpu_mask = 1;
2251 tconn->asender.reset_cpu_mask = 1;
2252 tconn->worker.reset_cpu_mask = 1;
b411b363
PR
2253 }
2254
b411b363 2255fail:
778f271d 2256 kfree(rs_plan_s);
b411b363 2257 free_cpumask_var(new_cpu_mask);
3b98c0c2
LE
2258
2259 drbd_adm_finish(info, retcode);
b411b363
PR
2260 return 0;
2261}
2262
3b98c0c2 2263int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
b411b363 2264{
3b98c0c2
LE
2265 struct drbd_conf *mdev;
2266 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2267
2268 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2269 if (!adm_ctx.reply_skb)
2270 return retcode;
2271 if (retcode != NO_ERROR)
2272 goto out;
2273
2274 mdev = adm_ctx.mdev;
b411b363 2275
194bfb32
LE
2276 /* If there is still bitmap IO pending, probably because of a previous
2277 * resync just being finished, wait for it before requesting a new resync. */
2278 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2279
b411b363
PR
2280 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
2281
2282 if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
2283 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2284
2285 while (retcode == SS_NEED_CONNECTION) {
87eeee41 2286 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
2287 if (mdev->state.conn < C_CONNECTED)
2288 retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
87eeee41 2289 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
2290
2291 if (retcode != SS_NEED_CONNECTION)
2292 break;
2293
2294 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2295 }
2296
3b98c0c2
LE
2297out:
2298 drbd_adm_finish(info, retcode);
b411b363
PR
2299 return 0;
2300}
2301
0778286a
PR
2302static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
2303{
2304 int rv;
2305
2306 rv = drbd_bmio_set_n_write(mdev);
2307 drbd_suspend_al(mdev);
2308 return rv;
2309}
2310
3b98c0c2
LE
2311static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2312 union drbd_state mask, union drbd_state val)
b411b363 2313{
3b98c0c2 2314 enum drbd_ret_code retcode;
194bfb32 2315
3b98c0c2
LE
2316 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2317 if (!adm_ctx.reply_skb)
2318 return retcode;
2319 if (retcode != NO_ERROR)
2320 goto out;
b411b363 2321
3b98c0c2
LE
2322 retcode = drbd_request_state(adm_ctx.mdev, mask, val);
2323out:
2324 drbd_adm_finish(info, retcode);
b411b363
PR
2325 return 0;
2326}
2327
3b98c0c2 2328int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
b411b363 2329{
3b98c0c2
LE
2330 return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
2331}
b411b363 2332
3b98c0c2
LE
2333int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2334{
2335 enum drbd_ret_code retcode;
2336
2337 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2338 if (!adm_ctx.reply_skb)
2339 return retcode;
2340 if (retcode != NO_ERROR)
2341 goto out;
b411b363 2342
3b98c0c2
LE
2343 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2344 retcode = ERR_PAUSE_IS_SET;
2345out:
2346 drbd_adm_finish(info, retcode);
b411b363
PR
2347 return 0;
2348}
2349
3b98c0c2 2350int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
b411b363 2351{
cd88d030 2352 union drbd_state s;
3b98c0c2
LE
2353 enum drbd_ret_code retcode;
2354
2355 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2356 if (!adm_ctx.reply_skb)
2357 return retcode;
2358 if (retcode != NO_ERROR)
2359 goto out;
b411b363 2360
3b98c0c2
LE
2361 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2362 s = adm_ctx.mdev->state;
cd88d030
PR
2363 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2364 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2365 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2366 } else {
2367 retcode = ERR_PAUSE_IS_CLEAR;
2368 }
2369 }
b411b363 2370
3b98c0c2
LE
2371out:
2372 drbd_adm_finish(info, retcode);
b411b363
PR
2373 return 0;
2374}
2375
3b98c0c2 2376int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
b411b363 2377{
3b98c0c2 2378 return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
b411b363
PR
2379}
2380
3b98c0c2 2381int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
b411b363 2382{
3b98c0c2
LE
2383 struct drbd_conf *mdev;
2384 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2385
2386 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2387 if (!adm_ctx.reply_skb)
2388 return retcode;
2389 if (retcode != NO_ERROR)
2390 goto out;
2391
2392 mdev = adm_ctx.mdev;
43a5182c
PR
2393 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
2394 drbd_uuid_new_current(mdev);
2395 clear_bit(NEW_CUR_UUID, &mdev->flags);
43a5182c 2396 }
265be2d0 2397 drbd_suspend_io(mdev);
3b98c0c2
LE
2398 retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2399 if (retcode == SS_SUCCESS) {
265be2d0 2400 if (mdev->state.conn < C_CONNECTED)
2f5cdd0b 2401 tl_clear(mdev->tconn);
265be2d0 2402 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
2f5cdd0b 2403 tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO);
265be2d0
PR
2404 }
2405 drbd_resume_io(mdev);
2406
3b98c0c2
LE
2407out:
2408 drbd_adm_finish(info, retcode);
b411b363
PR
2409 return 0;
2410}
2411
3b98c0c2 2412int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
b411b363 2413{
3b98c0c2 2414 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
b411b363
PR
2415}
2416
543cc10b
LE
2417int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr)
2418{
2419 struct nlattr *nla;
2420 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2421 if (!nla)
2422 goto nla_put_failure;
2423 if (vnr != VOLUME_UNSPECIFIED)
2424 NLA_PUT_U32(skb, T_ctx_volume, vnr);
2425 NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name);
2426 nla_nest_end(skb, nla);
2427 return 0;
2428
2429nla_put_failure:
2430 if (nla)
2431 nla_nest_cancel(skb, nla);
2432 return -EMSGSIZE;
2433}
2434
3b98c0c2
LE
2435int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
2436 const struct sib_info *sib)
b411b363 2437{
3b98c0c2
LE
2438 struct state_info *si = NULL; /* for sizeof(si->member); */
2439 struct nlattr *nla;
2440 int got_ldev;
2441 int got_net;
2442 int err = 0;
2443 int exclude_sensitive;
2444
2445 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2446 * to. So we better exclude_sensitive information.
2447 *
2448 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2449 * in the context of the requesting user process. Exclude sensitive
2450 * information, unless current has superuser.
2451 *
2452 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2453 * relies on the current implementation of netlink_dump(), which
2454 * executes the dump callback successively from netlink_recvmsg(),
2455 * always in the context of the receiving process */
2456 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2457
2458 got_ldev = get_ldev(mdev);
2459 got_net = get_net_conf(mdev->tconn);
2460
2461 /* We need to add connection name and volume number information still.
2462 * Minor number is in drbd_genlmsghdr. */
543cc10b 2463 if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
3b98c0c2 2464 goto nla_put_failure;
3b98c0c2 2465
f399002e
LE
2466 if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
2467 goto nla_put_failure;
2468
3b98c0c2
LE
2469 if (got_ldev)
2470 if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
2471 goto nla_put_failure;
2472 if (got_net)
2473 if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
2474 goto nla_put_failure;
2475
3b98c0c2
LE
2476 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2477 if (!nla)
2478 goto nla_put_failure;
2479 NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
2480 NLA_PUT_U32(skb, T_current_state, mdev->state.i);
2481 NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
2482 NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
2483
2484 if (got_ldev) {
2485 NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
2486 NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
2487 NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
2488 NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
2489 if (C_SYNC_SOURCE <= mdev->state.conn &&
2490 C_PAUSED_SYNC_T >= mdev->state.conn) {
2491 NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
2492 NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
2493 }
b411b363
PR
2494 }
2495
3b98c0c2
LE
2496 if (sib) {
2497 switch(sib->sib_reason) {
2498 case SIB_SYNC_PROGRESS:
2499 case SIB_GET_STATUS_REPLY:
2500 break;
2501 case SIB_STATE_CHANGE:
2502 NLA_PUT_U32(skb, T_prev_state, sib->os.i);
2503 NLA_PUT_U32(skb, T_new_state, sib->ns.i);
2504 break;
2505 case SIB_HELPER_POST:
2506 NLA_PUT_U32(skb,
2507 T_helper_exit_code, sib->helper_exit_code);
2508 /* fall through */
2509 case SIB_HELPER_PRE:
2510 NLA_PUT_STRING(skb, T_helper, sib->helper_name);
2511 break;
2512 }
b411b363 2513 }
3b98c0c2 2514 nla_nest_end(skb, nla);
b411b363 2515
3b98c0c2
LE
2516 if (0)
2517nla_put_failure:
2518 err = -EMSGSIZE;
2519 if (got_ldev)
2520 put_ldev(mdev);
2521 if (got_net)
2522 put_net_conf(mdev->tconn);
2523 return err;
b411b363
PR
2524}
2525
3b98c0c2 2526int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
b411b363 2527{
3b98c0c2
LE
2528 enum drbd_ret_code retcode;
2529 int err;
b411b363 2530
3b98c0c2
LE
2531 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2532 if (!adm_ctx.reply_skb)
2533 return retcode;
2534 if (retcode != NO_ERROR)
2535 goto out;
b411b363 2536
3b98c0c2
LE
2537 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
2538 if (err) {
2539 nlmsg_free(adm_ctx.reply_skb);
2540 return err;
b411b363 2541 }
3b98c0c2
LE
2542out:
2543 drbd_adm_finish(info, retcode);
2544 return 0;
b411b363
PR
2545}
2546
3b98c0c2 2547int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
b411b363 2548{
3b98c0c2
LE
2549 struct drbd_conf *mdev;
2550 struct drbd_genlmsghdr *dh;
543cc10b
LE
2551 struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0];
2552 struct drbd_tconn *tconn = NULL;
2553 struct drbd_tconn *tmp;
2554 unsigned volume = cb->args[1];
2555
2556 /* Open coded, deferred, iteration:
2557 * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
2558 * idr_for_each_entry(&tconn->volumes, mdev, i) {
2559 * ...
2560 * }
2561 * }
2562 * where tconn is cb->args[0];
2563 * and i is cb->args[1];
2564 *
3b98c0c2
LE
2565 * This may miss entries inserted after this dump started,
2566 * or entries deleted before they are reached.
543cc10b
LE
2567 *
2568 * We need to make sure the mdev won't disappear while
2569 * we are looking at it, and revalidate our iterators
2570 * on each iteration.
2571 */
3b98c0c2 2572
543cc10b
LE
2573 /* synchronize with drbd_new_tconn/drbd_free_tconn */
2574 mutex_lock(&drbd_cfg_mutex);
2575 /* synchronize with drbd_delete_device */
3b98c0c2 2576 rcu_read_lock();
543cc10b
LE
2577next_tconn:
2578 /* revalidate iterator position */
2579 list_for_each_entry(tmp, &drbd_tconns, all_tconn) {
2580 if (pos == NULL) {
2581 /* first iteration */
2582 pos = tmp;
2583 tconn = pos;
2584 break;
2585 }
2586 if (tmp == pos) {
2587 tconn = pos;
2588 break;
2589 }
2590 }
2591 if (tconn) {
2592 mdev = idr_get_next(&tconn->volumes, &volume);
2593 if (!mdev) {
2594 /* No more volumes to dump on this tconn.
2595 * Advance tconn iterator. */
2596 pos = list_entry(tconn->all_tconn.next,
2597 struct drbd_tconn, all_tconn);
2598 /* But, did we dump any volume on this tconn yet? */
2599 if (volume != 0) {
2600 tconn = NULL;
2601 volume = 0;
2602 goto next_tconn;
2603 }
2604 }
2605
3b98c0c2
LE
2606 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid,
2607 cb->nlh->nlmsg_seq, &drbd_genl_family,
2608 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
2609 if (!dh)
543cc10b
LE
2610 goto out;
2611
2612 if (!mdev) {
2613 /* this is a tconn without a single volume */
2614 dh->minor = -1U;
2615 dh->ret_code = NO_ERROR;
2616 if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED))
2617 genlmsg_cancel(skb, dh);
2618 else
2619 genlmsg_end(skb, dh);
2620 goto out;
2621 }
3b98c0c2 2622
543cc10b
LE
2623 D_ASSERT(mdev->vnr == volume);
2624 D_ASSERT(mdev->tconn == tconn);
3b98c0c2 2625
543cc10b 2626 dh->minor = mdev_to_minor(mdev);
3b98c0c2
LE
2627 dh->ret_code = NO_ERROR;
2628
2629 if (nla_put_status_info(skb, mdev, NULL)) {
2630 genlmsg_cancel(skb, dh);
543cc10b 2631 goto out;
3b98c0c2
LE
2632 }
2633 genlmsg_end(skb, dh);
2634 }
b411b363 2635
543cc10b 2636out:
3b98c0c2 2637 rcu_read_unlock();
543cc10b
LE
2638 mutex_unlock(&drbd_cfg_mutex);
2639 /* where to start the next iteration */
2640 cb->args[0] = (long)pos;
2641 cb->args[1] = (pos == tconn) ? volume + 1 : 0;
b411b363 2642
543cc10b
LE
2643 /* No more tconns/volumes/minors found results in an empty skb.
2644 * Which will terminate the dump. */
3b98c0c2 2645 return skb->len;
b411b363
PR
2646}
2647
3b98c0c2 2648int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
b411b363 2649{
3b98c0c2
LE
2650 enum drbd_ret_code retcode;
2651 struct timeout_parms tp;
2652 int err;
b411b363 2653
3b98c0c2
LE
2654 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2655 if (!adm_ctx.reply_skb)
2656 return retcode;
2657 if (retcode != NO_ERROR)
2658 goto out;
b411b363 2659
3b98c0c2
LE
2660 tp.timeout_type =
2661 adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
2662 test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
2663 UT_DEFAULT;
b411b363 2664
3b98c0c2
LE
2665 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
2666 if (err) {
2667 nlmsg_free(adm_ctx.reply_skb);
2668 return err;
2669 }
2670out:
2671 drbd_adm_finish(info, retcode);
2672 return 0;
b411b363
PR
2673}
2674
3b98c0c2 2675int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
b411b363 2676{
3b98c0c2
LE
2677 struct drbd_conf *mdev;
2678 enum drbd_ret_code retcode;
b411b363 2679
3b98c0c2
LE
2680 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2681 if (!adm_ctx.reply_skb)
2682 return retcode;
2683 if (retcode != NO_ERROR)
2684 goto out;
873b0d5f 2685
3b98c0c2
LE
2686 mdev = adm_ctx.mdev;
2687 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
2688 /* resume from last known position, if possible */
2689 struct start_ov_parms parms =
2690 { .ov_start_sector = mdev->ov_start_sector };
f399002e 2691 int err = start_ov_parms_from_attrs(&parms, info);
3b98c0c2
LE
2692 if (err) {
2693 retcode = ERR_MANDATORY_TAG;
2694 drbd_msg_put_info(from_attrs_err_to_txt(err));
2695 goto out;
2696 }
2697 /* w_make_ov_request expects position to be aligned */
2698 mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
2699 }
873b0d5f
LE
2700 /* If there is still bitmap IO pending, e.g. previous resync or verify
2701 * just being finished, wait for it before requesting a new resync. */
2702 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3b98c0c2
LE
2703 retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2704out:
2705 drbd_adm_finish(info, retcode);
b411b363
PR
2706 return 0;
2707}
2708
2709
3b98c0c2 2710int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
b411b363 2711{
3b98c0c2
LE
2712 struct drbd_conf *mdev;
2713 enum drbd_ret_code retcode;
b411b363
PR
2714 int skip_initial_sync = 0;
2715 int err;
3b98c0c2 2716 struct new_c_uuid_parms args;
b411b363 2717
3b98c0c2
LE
2718 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2719 if (!adm_ctx.reply_skb)
2720 return retcode;
2721 if (retcode != NO_ERROR)
2722 goto out_nolock;
b411b363 2723
3b98c0c2
LE
2724 mdev = adm_ctx.mdev;
2725 memset(&args, 0, sizeof(args));
2726 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
f399002e 2727 err = new_c_uuid_parms_from_attrs(&args, info);
3b98c0c2
LE
2728 if (err) {
2729 retcode = ERR_MANDATORY_TAG;
2730 drbd_msg_put_info(from_attrs_err_to_txt(err));
2731 goto out_nolock;
2732 }
b411b363
PR
2733 }
2734
8410da8f 2735 mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
b411b363
PR
2736
2737 if (!get_ldev(mdev)) {
2738 retcode = ERR_NO_DISK;
2739 goto out;
2740 }
2741
2742 /* this is "skip initial sync", assume to be clean */
31890f4a 2743 if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 &&
b411b363
PR
2744 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
2745 dev_info(DEV, "Preparing to skip initial sync\n");
2746 skip_initial_sync = 1;
2747 } else if (mdev->state.conn != C_STANDALONE) {
2748 retcode = ERR_CONNECTED;
2749 goto out_dec;
2750 }
2751
2752 drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
2753 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2754
2755 if (args.clear_bm) {
20ceb2b2
LE
2756 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
2757 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
b411b363
PR
2758 if (err) {
2759 dev_err(DEV, "Writing bitmap failed with %d\n",err);
2760 retcode = ERR_IO_MD_DISK;
2761 }
2762 if (skip_initial_sync) {
2763 drbd_send_uuids_skip_initial_sync(mdev);
2764 _drbd_uuid_set(mdev, UI_BITMAP, 0);
62b0da3a 2765 drbd_print_uuids(mdev, "cleared bitmap UUID");
87eeee41 2766 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
2767 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2768 CS_VERBOSE, NULL);
87eeee41 2769 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
2770 }
2771 }
2772
2773 drbd_md_sync(mdev);
2774out_dec:
2775 put_ldev(mdev);
2776out:
8410da8f 2777 mutex_unlock(mdev->state_mutex);
3b98c0c2
LE
2778out_nolock:
2779 drbd_adm_finish(info, retcode);
774b3055
PR
2780 return 0;
2781}
2782
3b98c0c2
LE
2783static enum drbd_ret_code
2784drbd_check_conn_name(const char *name)
774b3055 2785{
3b98c0c2
LE
2786 if (!name || !name[0]) {
2787 drbd_msg_put_info("connection name missing");
2788 return ERR_MANDATORY_TAG;
774b3055 2789 }
3b98c0c2
LE
2790 /* if we want to use these in sysfs/configfs/debugfs some day,
2791 * we must not allow slashes */
2792 if (strchr(name, '/')) {
2793 drbd_msg_put_info("invalid connection name");
2794 return ERR_INVALID_REQUEST;
774b3055 2795 }
3b98c0c2 2796 return NO_ERROR;
774b3055
PR
2797}
2798
3b98c0c2 2799int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
b411b363 2800{
3b98c0c2 2801 enum drbd_ret_code retcode;
9f5180e5 2802
3b98c0c2
LE
2803 retcode = drbd_adm_prepare(skb, info, 0);
2804 if (!adm_ctx.reply_skb)
2805 return retcode;
2806 if (retcode != NO_ERROR)
2807 goto out;
b411b363 2808
3b98c0c2
LE
2809 retcode = drbd_check_conn_name(adm_ctx.conn_name);
2810 if (retcode != NO_ERROR)
2811 goto out;
b411b363 2812
3b98c0c2 2813 if (adm_ctx.tconn) {
38f19616
LE
2814 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
2815 retcode = ERR_INVALID_REQUEST;
2816 drbd_msg_put_info("connection exists");
2817 }
2818 /* else: still NO_ERROR */
3b98c0c2 2819 goto out;
b411b363
PR
2820 }
2821
3b98c0c2 2822 if (!drbd_new_tconn(adm_ctx.conn_name))
b411b363 2823 retcode = ERR_NOMEM;
3b98c0c2
LE
2824out:
2825 drbd_adm_finish(info, retcode);
2826 return 0;
b411b363
PR
2827}
2828
3b98c0c2 2829int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
b411b363 2830{
3b98c0c2
LE
2831 struct drbd_genlmsghdr *dh = info->userhdr;
2832 enum drbd_ret_code retcode;
b411b363 2833
3b98c0c2
LE
2834 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2835 if (!adm_ctx.reply_skb)
2836 return retcode;
2837 if (retcode != NO_ERROR)
2838 goto out;
b411b363 2839
3b98c0c2
LE
2840 /* FIXME drop minor_count parameter, limit to MINORMASK */
2841 if (dh->minor >= minor_count) {
2842 drbd_msg_put_info("requested minor out of range");
2843 retcode = ERR_INVALID_REQUEST;
2844 goto out;
b411b363 2845 }
3b98c0c2
LE
2846 /* FIXME we need a define here */
2847 if (adm_ctx.volume >= 256) {
2848 drbd_msg_put_info("requested volume id out of range");
2849 retcode = ERR_INVALID_REQUEST;
2850 goto out;
b411b363 2851 }
b411b363 2852
38f19616
LE
2853 /* drbd_adm_prepare made sure already
2854 * that mdev->tconn and mdev->vnr match the request. */
2855 if (adm_ctx.mdev) {
2856 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
2857 retcode = ERR_MINOR_EXISTS;
2858 /* else: still NO_ERROR */
2859 goto out;
2860 }
2861
3b98c0c2
LE
2862 retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
2863out:
2864 drbd_adm_finish(info, retcode);
2865 return 0;
b411b363
PR
2866}
2867
85f75dd7
LE
2868static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
2869{
2870 if (mdev->state.disk == D_DISKLESS &&
2871 /* no need to be mdev->state.conn == C_STANDALONE &&
2872 * we may want to delete a minor from a live replication group.
2873 */
2874 mdev->state.role == R_SECONDARY) {
2875 drbd_delete_device(mdev_to_minor(mdev));
2876 return NO_ERROR;
2877 } else
2878 return ERR_MINOR_CONFIGURED;
2879}
2880
3b98c0c2 2881int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
b411b363 2882{
3b98c0c2 2883 enum drbd_ret_code retcode;
b411b363 2884
3b98c0c2
LE
2885 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2886 if (!adm_ctx.reply_skb)
2887 return retcode;
2888 if (retcode != NO_ERROR)
2889 goto out;
b411b363 2890
85f75dd7
LE
2891 mutex_lock(&drbd_cfg_mutex);
2892 retcode = adm_delete_minor(adm_ctx.mdev);
2893 mutex_unlock(&drbd_cfg_mutex);
2894 /* if this was the last volume of this connection,
2895 * this will terminate all threads */
2896 if (retcode == NO_ERROR)
cffec5b2 2897 conn_reconfig_done(adm_ctx.tconn);
85f75dd7
LE
2898out:
2899 drbd_adm_finish(info, retcode);
2900 return 0;
2901}
2902
2903int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
2904{
2905 enum drbd_ret_code retcode;
2906 enum drbd_state_rv rv;
2907 struct drbd_conf *mdev;
2908 unsigned i;
2909
2910 retcode = drbd_adm_prepare(skb, info, 0);
2911 if (!adm_ctx.reply_skb)
2912 return retcode;
2913 if (retcode != NO_ERROR)
2914 goto out;
2915
2916 if (!adm_ctx.tconn) {
2917 retcode = ERR_CONN_NOT_KNOWN;
2918 goto out;
2919 }
2920
2921 mutex_lock(&drbd_cfg_mutex);
2922 /* demote */
2923 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2924 retcode = drbd_set_role(mdev, R_SECONDARY, 0);
2925 if (retcode < SS_SUCCESS) {
2926 drbd_msg_put_info("failed to demote");
2927 goto out_unlock;
2928 }
2929 }
2930
2931 /* disconnect */
2932 rv = conn_try_disconnect(adm_ctx.tconn, 0);
2933 if (rv < SS_SUCCESS) {
2934 retcode = rv; /* enum type mismatch! */
2935 drbd_msg_put_info("failed to disconnect");
2936 goto out_unlock;
2937 }
2938
2939 /* detach */
2940 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2941 rv = adm_detach(mdev);
2942 if (rv < SS_SUCCESS) {
2943 retcode = rv; /* enum type mismatch! */
2944 drbd_msg_put_info("failed to detach");
2945 goto out_unlock;
2946 }
2947 }
2948
2949 /* delete volumes */
2950 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2951 retcode = adm_delete_minor(mdev);
2952 if (retcode != NO_ERROR) {
2953 /* "can not happen" */
2954 drbd_msg_put_info("failed to delete volume");
2955 goto out_unlock;
2956 }
2957 }
2958
2959 /* stop all threads */
2960 conn_reconfig_done(adm_ctx.tconn);
2961
2962 /* delete connection */
2963 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2964 drbd_free_tconn(adm_ctx.tconn);
2965 retcode = NO_ERROR;
2966 } else {
2967 /* "can not happen" */
2968 retcode = ERR_CONN_IN_USE;
2969 drbd_msg_put_info("failed to delete connection");
2970 goto out_unlock;
2971 }
2972out_unlock:
2973 mutex_unlock(&drbd_cfg_mutex);
3b98c0c2
LE
2974out:
2975 drbd_adm_finish(info, retcode);
2976 return 0;
b411b363
PR
2977}
2978
3b98c0c2 2979int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
b411b363 2980{
3b98c0c2 2981 enum drbd_ret_code retcode;
b411b363 2982
3b98c0c2
LE
2983 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2984 if (!adm_ctx.reply_skb)
2985 return retcode;
2986 if (retcode != NO_ERROR)
2987 goto out;
2988
85f75dd7 2989 mutex_lock(&drbd_cfg_mutex);
3b98c0c2
LE
2990 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2991 drbd_free_tconn(adm_ctx.tconn);
2992 retcode = NO_ERROR;
2993 } else {
2994 retcode = ERR_CONN_IN_USE;
b411b363 2995 }
85f75dd7 2996 mutex_unlock(&drbd_cfg_mutex);
b411b363 2997
3b98c0c2
LE
2998out:
2999 drbd_adm_finish(info, retcode);
b411b363
PR
3000 return 0;
3001}
3002
3b98c0c2 3003void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
b411b363 3004{
3b98c0c2
LE
3005 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3006 struct sk_buff *msg;
3007 struct drbd_genlmsghdr *d_out;
3008 unsigned seq;
3009 int err = -ENOMEM;
3010
3011 seq = atomic_inc_return(&drbd_genl_seq);
3012 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3013 if (!msg)
3014 goto failed;
3015
3016 err = -EMSGSIZE;
3017 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3018 if (!d_out) /* cannot happen, but anyways. */
3019 goto nla_put_failure;
3020 d_out->minor = mdev_to_minor(mdev);
3021 d_out->ret_code = 0;
3022
3023 if (nla_put_status_info(msg, mdev, sib))
3024 goto nla_put_failure;
3025 genlmsg_end(msg, d_out);
3026 err = drbd_genl_multicast_events(msg, 0);
3027 /* msg has been consumed or freed in netlink_broadcast() */
3028 if (err && err != -ESRCH)
3029 goto failed;
b411b363 3030
3b98c0c2 3031 return;
b411b363 3032
3b98c0c2
LE
3033nla_put_failure:
3034 nlmsg_free(msg);
3035failed:
3036 dev_err(DEV, "Error %d while broadcasting event. "
3037 "Event seq:%u sib_reason:%u\n",
3038 err, seq, sib->sib_reason);
b411b363 3039}
This page took 0.272625 seconds and 5 git commands to generate.