drivers/block/drbd/drbd_nl.c

   1 /*
   2    drbd_nl.c
   3
   4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10    drbd is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    drbd is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with drbd; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24  */
  25
  26 #include <linux/module.h>
  27 #include <linux/drbd.h>
  28 #include <linux/in.h>
  29 #include <linux/fs.h>
  30 #include <linux/file.h>
  31 #include <linux/slab.h>
  32 #include <linux/blkpg.h>
  33 #include <linux/cpumask.h>
  34 #include "drbd_int.h"
  35 #include "drbd_protocol.h"
  36 #include "drbd_req.h"
  37 #include "drbd_wrappers.h"
  38 #include <asm/unaligned.h>
  39 #include <linux/drbd_limits.h>
  40 #include <linux/kthread.h>
  41
  42 #include <net/genetlink.h>
  43
  44 /* .doit */
  45 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
  46 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
  47
  48 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
  49 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
  50
  51 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
  52 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
  53 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
  54
  55 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
  56 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
  57 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
  58 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
  59 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
  60 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
  61 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
  62 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
  63 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
  64 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
  65 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
  66 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
  67 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
  68 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
  69 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
  70 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
  71 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
  72 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
  73 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
  74 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
  75 /* .dumpit */
  76 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
  77
  78 #include <linux/drbd_genl_api.h>
  79 #include "drbd_nla.h"
  80 #include <linux/genl_magic_func.h>
  81
  82 /* used blkdev_get_by_path, to claim our meta data device(s) */
  83 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
  84
  85 /* Configuration is strictly serialized, because generic netlink message
  86  * processing is strictly serialized by the genl_lock().
  87  * Which means we can use one static global drbd_config_context struct.
  88  */
  89 static struct drbd_config_context {
  90         /* assigned from drbd_genlmsghdr */
  91         unsigned int minor;
  92         /* assigned from request attributes, if present */
  93         unsigned int volume;
  94 #define VOLUME_UNSPECIFIED              (-1U)
  95         /* pointer into the request skb,
  96          * limited lifetime! */
  97         char *resource_name;
  98         struct nlattr *my_addr;
  99         struct nlattr *peer_addr;
 100
 101         /* reply buffer */
 102         struct sk_buff *reply_skb;
 103         /* pointer into reply buffer */
 104         struct drbd_genlmsghdr *reply_dh;
 105         /* resolved from attributes, if possible */
 106         struct drbd_device *device;
 107         struct drbd_resource *resource;
 108         struct drbd_connection *connection;
 109 } adm_ctx;
 110
 111 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 112 {
 113         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
 114         if (genlmsg_reply(skb, info))
 115                 printk(KERN_ERR "drbd: error sending genl reply\n");
 116 }
 117
 118 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
 119  * reason it could fail was no space in skb, and there are 4k available. */
 120 int drbd_msg_put_info(const char *info)
 121 {
 122         struct sk_buff *skb = adm_ctx.reply_skb;
 123         struct nlattr *nla;
 124         int err = -EMSGSIZE;
 125
 126         if (!info || !info[0])
 127                 return 0;
 128
 129         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
 130         if (!nla)
 131                 return err;
 132
 133         err = nla_put_string(skb, T_info_text, info);
 134         if (err) {
 135                 nla_nest_cancel(skb, nla);
 136                 return err;
 137         } else
 138                 nla_nest_end(skb, nla);
 139         return 0;
 140 }
 141
 142 /* This would be a good candidate for a "pre_doit" hook,
 143  * and per-family private info->pointers.
 144  * But we need to stay compatible with older kernels.
 145  * If it returns successfully, adm_ctx members are valid.
 146  */
 147 #define DRBD_ADM_NEED_MINOR     1
 148 #define DRBD_ADM_NEED_RESOURCE  2
 149 #define DRBD_ADM_NEED_CONNECTION 4
 150 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 151                 unsigned flags)
 152 {
 153         struct drbd_genlmsghdr *d_in = info->userhdr;
 154         const u8 cmd = info->genlhdr->cmd;
 155         int err;
 156
 157         memset(&adm_ctx, 0, sizeof(adm_ctx));
 158
 159         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 160         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 161                return -EPERM;
 162
 163         adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 164         if (!adm_ctx.reply_skb) {
 165                 err = -ENOMEM;
 166                 goto fail;
 167         }
 168
 169         adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
 170                                         info, &drbd_genl_family, 0, cmd);
 171         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
 172          * but anyways */
 173         if (!adm_ctx.reply_dh) {
 174                 err = -ENOMEM;
 175                 goto fail;
 176         }
 177
 178         adm_ctx.reply_dh->minor = d_in->minor;
 179         adm_ctx.reply_dh->ret_code = NO_ERROR;
 180
 181         adm_ctx.volume = VOLUME_UNSPECIFIED;
 182         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 183                 struct nlattr *nla;
 184                 /* parse and validate only */
 185                 err = drbd_cfg_context_from_attrs(NULL, info);
 186                 if (err)
 187                         goto fail;
 188
 189                 /* It was present, and valid,
 190                  * copy it over to the reply skb. */
 191                 err = nla_put_nohdr(adm_ctx.reply_skb,
 192                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 193                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
 194                 if (err)
 195                         goto fail;
 196
 197                 /* and assign stuff to the global adm_ctx */
 198                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 199                 if (nla)
 200                         adm_ctx.volume = nla_get_u32(nla);
 201                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 202                 if (nla)
 203                         adm_ctx.resource_name = nla_data(nla);
 204                 adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
 205                 adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
 206                 if ((adm_ctx.my_addr &&
 207                      nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) ||
 208                     (adm_ctx.peer_addr &&
 209                      nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) {
 210                         err = -EINVAL;
 211                         goto fail;
 212                 }
 213         }
 214
 215         adm_ctx.minor = d_in->minor;
 216         adm_ctx.device = minor_to_device(d_in->minor);
 217         if (adm_ctx.resource_name) {
 218                 adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name);
 219                 if (adm_ctx.resource) {
 220                         adm_ctx.connection = first_connection(adm_ctx.resource);
 221                         kref_get(&adm_ctx.connection->kref);
 222                 }
 223         }
 224
 225         if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) {
 226                 drbd_msg_put_info("unknown minor");
 227                 return ERR_MINOR_INVALID;
 228         }
 229         if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
 230                 drbd_msg_put_info("unknown resource");
 231                 if (adm_ctx.resource_name)
 232                         return ERR_RES_NOT_KNOWN;
 233                 return ERR_INVALID_REQUEST;
 234         }
 235
 236         if (flags & DRBD_ADM_NEED_CONNECTION) {
 237                 if (adm_ctx.connection && !(flags & DRBD_ADM_NEED_RESOURCE)) {
 238                         drbd_msg_put_info("no resource name expected");
 239                         return ERR_INVALID_REQUEST;
 240                 }
 241                 if (adm_ctx.device) {
 242                         drbd_msg_put_info("no minor number expected");
 243                         return ERR_INVALID_REQUEST;
 244                 }
 245                 if (adm_ctx.my_addr && adm_ctx.peer_addr)
 246                         adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
 247                                                           nla_len(adm_ctx.my_addr),
 248                                                           nla_data(adm_ctx.peer_addr),
 249                                                           nla_len(adm_ctx.peer_addr));
 250                 if (!adm_ctx.connection) {
 251                         drbd_msg_put_info("unknown connection");
 252                         return ERR_INVALID_REQUEST;
 253                 }
 254         }
 255
 256         /* some more paranoia, if the request was over-determined */
 257         if (adm_ctx.device && adm_ctx.resource &&
 258             adm_ctx.device->resource != adm_ctx.resource) {
 259                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
 260                                 adm_ctx.minor, adm_ctx.resource->name,
 261                                 adm_ctx.device->resource->name);
 262                 drbd_msg_put_info("minor exists in different resource");
 263                 return ERR_INVALID_REQUEST;
 264         }
 265         if (adm_ctx.device &&
 266             adm_ctx.volume != VOLUME_UNSPECIFIED &&
 267             adm_ctx.volume != adm_ctx.device->vnr) {
 268                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
 269                                 adm_ctx.minor, adm_ctx.volume,
 270                                 adm_ctx.device->vnr,
 271                                 adm_ctx.device->resource->name);
 272                 drbd_msg_put_info("minor exists as different volume");
 273                 return ERR_INVALID_REQUEST;
 274         }
 275
 276         return NO_ERROR;
 277
 278 fail:
 279         nlmsg_free(adm_ctx.reply_skb);
 280         adm_ctx.reply_skb = NULL;
 281         return err;
 282 }
 283
 284 static int drbd_adm_finish(struct genl_info *info, int retcode)
 285 {
 286         if (adm_ctx.connection) {
 287                 kref_put(&adm_ctx.connection->kref, drbd_destroy_connection);
 288                 adm_ctx.connection = NULL;
 289         }
 290         if (adm_ctx.resource) {
 291                 kref_put(&adm_ctx.resource->kref, drbd_destroy_resource);
 292                 adm_ctx.resource = NULL;
 293         }
 294
 295         if (!adm_ctx.reply_skb)
 296                 return -ENOMEM;
 297
 298         adm_ctx.reply_dh->ret_code = retcode;
 299         drbd_adm_send_reply(adm_ctx.reply_skb, info);
 300         return 0;
 301 }
 302
 303 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
 304 {
 305         char *afs;
 306
 307         /* FIXME: A future version will not allow this case. */
 308         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
 309                 return;
 310
 311         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
 312         case AF_INET6:
 313                 afs = "ipv6";
 314                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
 315                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
 316                 break;
 317         case AF_INET:
 318                 afs = "ipv4";
 319                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 320                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 321                 break;
 322         default:
 323                 afs = "ssocks";
 324                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 325                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 326         }
 327         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 328 }
 329
 330 int drbd_khelper(struct drbd_device *device, char *cmd)
 331 {
 332         char *envp[] = { "HOME=/",
 333                         "TERM=linux",
 334                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 335                          (char[20]) { }, /* address family */
 336                          (char[60]) { }, /* address */
 337                         NULL };
 338         char mb[12];
 339         char *argv[] = {usermode_helper, cmd, mb, NULL };
 340         struct drbd_connection *connection = first_peer_device(device)->connection;
 341         struct sib_info sib;
 342         int ret;
 343
 344         if (current == connection->worker.task)
 345                 set_bit(CALLBACK_PENDING, &connection->flags);
 346
 347         snprintf(mb, 12, "minor-%d", device_to_minor(device));
 348         setup_khelper_env(connection, envp);
 349
 350         /* The helper may take some time.
 351          * write out any unsynced meta data changes now */
 352         drbd_md_sync(device);
 353
 354         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 355         sib.sib_reason = SIB_HELPER_PRE;
 356         sib.helper_name = cmd;
 357         drbd_bcast_event(device, &sib);
 358         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 359         if (ret)
 360                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 361                                 usermode_helper, cmd, mb,
 362                                 (ret >> 8) & 0xff, ret);
 363         else
 364                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 365                                 usermode_helper, cmd, mb,
 366                                 (ret >> 8) & 0xff, ret);
 367         sib.sib_reason = SIB_HELPER_POST;
 368         sib.helper_exit_code = ret;
 369         drbd_bcast_event(device, &sib);
 370
 371         if (current == connection->worker.task)
 372                 clear_bit(CALLBACK_PENDING, &connection->flags);
 373
 374         if (ret < 0) /* Ignore any ERRNOs we got. */
 375                 ret = 0;
 376
 377         return ret;
 378 }
 379
 380 static int conn_khelper(struct drbd_connection *connection, char *cmd)
 381 {
 382         char *envp[] = { "HOME=/",
 383                         "TERM=linux",
 384                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 385                          (char[20]) { }, /* address family */
 386                          (char[60]) { }, /* address */
 387                         NULL };
 388         char *resource_name = connection->resource->name;
 389         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
 390         int ret;
 391
 392         setup_khelper_env(connection, envp);
 393         conn_md_sync(connection);
 394
 395         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
 396         /* TODO: conn_bcast_event() ?? */
 397
 398         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 399         if (ret)
 400                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 401                           usermode_helper, cmd, resource_name,
 402                           (ret >> 8) & 0xff, ret);
 403         else
 404                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 405                           usermode_helper, cmd, resource_name,
 406                           (ret >> 8) & 0xff, ret);
 407         /* TODO: conn_bcast_event() ?? */
 408
 409         if (ret < 0) /* Ignore any ERRNOs we got. */
 410                 ret = 0;
 411
 412         return ret;
 413 }
 414
 415 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
 416 {
 417         enum drbd_fencing_p fp = FP_NOT_AVAIL;
 418         struct drbd_peer_device *peer_device;
 419         int vnr;
 420
 421         rcu_read_lock();
 422         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 423                 struct drbd_device *device = peer_device->device;
 424                 if (get_ldev_if_state(device, D_CONSISTENT)) {
 425                         struct disk_conf *disk_conf =
 426                                 rcu_dereference(peer_device->device->ldev->disk_conf);
 427                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
 428                         put_ldev(device);
 429                 }
 430         }
 431         rcu_read_unlock();
 432
 433         return fp;
 434 }
 435
 436 bool conn_try_outdate_peer(struct drbd_connection *connection)
 437 {
 438         unsigned int connect_cnt;
 439         union drbd_state mask = { };
 440         union drbd_state val = { };
 441         enum drbd_fencing_p fp;
 442         char *ex_to_string;
 443         int r;
 444
 445         if (connection->cstate >= C_WF_REPORT_PARAMS) {
 446                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
 447                 return false;
 448         }
 449
 450         spin_lock_irq(&connection->req_lock);
 451         connect_cnt = connection->connect_cnt;
 452         spin_unlock_irq(&connection->req_lock);
 453
 454         fp = highest_fencing_policy(connection);
 455         switch (fp) {
 456         case FP_NOT_AVAIL:
 457                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
 458                 goto out;
 459         case FP_DONT_CARE:
 460                 return true;
 461         default: ;
 462         }
 463
 464         r = conn_khelper(connection, "fence-peer");
 465
 466         switch ((r>>8) & 0xff) {
 467         case 3: /* peer is inconsistent */
 468                 ex_to_string = "peer is inconsistent or worse";
 469                 mask.pdsk = D_MASK;
 470                 val.pdsk = D_INCONSISTENT;
 471                 break;
 472         case 4: /* peer got outdated, or was already outdated */
 473                 ex_to_string = "peer was fenced";
 474                 mask.pdsk = D_MASK;
 475                 val.pdsk = D_OUTDATED;
 476                 break;
 477         case 5: /* peer was down */
 478                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
 479                         /* we will(have) create(d) a new UUID anyways... */
 480                         ex_to_string = "peer is unreachable, assumed to be dead";
 481                         mask.pdsk = D_MASK;
 482                         val.pdsk = D_OUTDATED;
 483                 } else {
 484                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
 485                 }
 486                 break;
 487         case 6: /* Peer is primary, voluntarily outdate myself.
 488                  * This is useful when an unconnected R_SECONDARY is asked to
 489                  * become R_PRIMARY, but finds the other peer being active. */
 490                 ex_to_string = "peer is active";
 491                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
 492                 mask.disk = D_MASK;
 493                 val.disk = D_OUTDATED;
 494                 break;
 495         case 7:
 496                 if (fp != FP_STONITH)
 497                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
 498                 ex_to_string = "peer was stonithed";
 499                 mask.pdsk = D_MASK;
 500                 val.pdsk = D_OUTDATED;
 501                 break;
 502         default:
 503                 /* The script is broken ... */
 504                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
 505                 return false; /* Eventually leave IO frozen */
 506         }
 507
 508         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
 509                   (r>>8) & 0xff, ex_to_string);
 510
 511  out:
 512
 513         /* Not using
 514            conn_request_state(connection, mask, val, CS_VERBOSE);
 515            here, because we might were able to re-establish the connection in the
 516            meantime. */
 517         spin_lock_irq(&connection->req_lock);
 518         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
 519                 if (connection->connect_cnt != connect_cnt)
 520                         /* In case the connection was established and droped
 521                            while the fence-peer handler was running, ignore it */
 522                         drbd_info(connection, "Ignoring fence-peer exit code\n");
 523                 else
 524                         _conn_request_state(connection, mask, val, CS_VERBOSE);
 525         }
 526         spin_unlock_irq(&connection->req_lock);
 527
 528         return conn_highest_pdsk(connection) <= D_OUTDATED;
 529 }
 530
 531 static int _try_outdate_peer_async(void *data)
 532 {
 533         struct drbd_connection *connection = (struct drbd_connection *)data;
 534
 535         conn_try_outdate_peer(connection);
 536
 537         kref_put(&connection->kref, drbd_destroy_connection);
 538         return 0;
 539 }
 540
 541 void conn_try_outdate_peer_async(struct drbd_connection *connection)
 542 {
 543         struct task_struct *opa;
 544
 545         kref_get(&connection->kref);
 546         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
 547         if (IS_ERR(opa)) {
 548                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
 549                 kref_put(&connection->kref, drbd_destroy_connection);
 550         }
 551 }
 552
 553 enum drbd_state_rv
 554 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 555 {
 556         const int max_tries = 4;
 557         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 558         struct net_conf *nc;
 559         int try = 0;
 560         int forced = 0;
 561         union drbd_state mask, val;
 562
 563         if (new_role == R_PRIMARY) {
 564                 struct drbd_connection *connection;
 565
 566                 /* Detect dead peers as soon as possible.  */
 567
 568                 rcu_read_lock();
 569                 for_each_connection(connection, device->resource)
 570                         request_ping(connection);
 571                 rcu_read_unlock();
 572         }
 573
 574         mutex_lock(device->state_mutex);
 575
 576         mask.i = 0; mask.role = R_MASK;
 577         val.i  = 0; val.role  = new_role;
 578
 579         while (try++ < max_tries) {
 580                 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
 581
 582                 /* in case we first succeeded to outdate,
 583                  * but now suddenly could establish a connection */
 584                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 585                         val.pdsk = 0;
 586                         mask.pdsk = 0;
 587                         continue;
 588                 }
 589
 590                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
 591                     (device->state.disk < D_UP_TO_DATE &&
 592                      device->state.disk >= D_INCONSISTENT)) {
 593                         mask.disk = D_MASK;
 594                         val.disk  = D_UP_TO_DATE;
 595                         forced = 1;
 596                         continue;
 597                 }
 598
 599                 if (rv == SS_NO_UP_TO_DATE_DISK &&
 600                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 601                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
 602
 603                         if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
 604                                 val.disk = D_UP_TO_DATE;
 605                                 mask.disk = D_MASK;
 606                         }
 607                         continue;
 608                 }
 609
 610                 if (rv == SS_NOTHING_TO_DO)
 611                         goto out;
 612                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 613                         if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
 614                                 drbd_warn(device, "Forced into split brain situation!\n");
 615                                 mask.pdsk = D_MASK;
 616                                 val.pdsk  = D_OUTDATED;
 617
 618                         }
 619                         continue;
 620                 }
 621                 if (rv == SS_TWO_PRIMARIES) {
 622                         /* Maybe the peer is detected as dead very soon...
 623                            retry at most once more in this case. */
 624                         int timeo;
 625                         rcu_read_lock();
 626                         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
 627                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 628                         rcu_read_unlock();
 629                         schedule_timeout_interruptible(timeo);
 630                         if (try < max_tries)
 631                                 try = max_tries - 1;
 632                         continue;
 633                 }
 634                 if (rv < SS_SUCCESS) {
 635                         rv = _drbd_request_state(device, mask, val,
 636                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
 637                         if (rv < SS_SUCCESS)
 638                                 goto out;
 639                 }
 640                 break;
 641         }
 642
 643         if (rv < SS_SUCCESS)
 644                 goto out;
 645
 646         if (forced)
 647                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
 648
 649         /* Wait until nothing is on the fly :) */
 650         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
 651
 652         /* FIXME also wait for all pending P_BARRIER_ACK? */
 653
 654         if (new_role == R_SECONDARY) {
 655                 set_disk_ro(device->vdisk, true);
 656                 if (get_ldev(device)) {
 657                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 658                         put_ldev(device);
 659                 }
 660         } else {
 661                 mutex_lock(&first_peer_device(device)->connection->conf_update);
 662                 nc = first_peer_device(device)->connection->net_conf;
 663                 if (nc)
 664                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 665                 mutex_unlock(&first_peer_device(device)->connection->conf_update);
 666
 667                 set_disk_ro(device->vdisk, false);
 668                 if (get_ldev(device)) {
 669                         if (((device->state.conn < C_CONNECTED ||
 670                                device->state.pdsk <= D_FAILED)
 671                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
 672                                 drbd_uuid_new_current(device);
 673
 674                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 675                         put_ldev(device);
 676                 }
 677         }
 678
 679         /* writeout of activity log covered areas of the bitmap
 680          * to stable storage done in after state change already */
 681
 682         if (device->state.conn >= C_WF_REPORT_PARAMS) {
 683                 /* if this was forced, we should consider sync */
 684                 if (forced)
 685                         drbd_send_uuids(device);
 686                 drbd_send_current_state(device);
 687         }
 688
 689         drbd_md_sync(device);
 690
 691         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 692 out:
 693         mutex_unlock(device->state_mutex);
 694         return rv;
 695 }
 696
 697 static const char *from_attrs_err_to_txt(int err)
 698 {
 699         return  err == -ENOMSG ? "required attribute missing" :
 700                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
 701                 err == -EEXIST ? "can not change invariant setting" :
 702                 "invalid attribute value";
 703 }
 704
 705 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 706 {
 707         struct set_role_parms parms;
 708         int err;
 709         enum drbd_ret_code retcode;
 710
 711         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 712         if (!adm_ctx.reply_skb)
 713                 return retcode;
 714         if (retcode != NO_ERROR)
 715                 goto out;
 716
 717         memset(&parms, 0, sizeof(parms));
 718         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
 719                 err = set_role_parms_from_attrs(&parms, info);
 720                 if (err) {
 721                         retcode = ERR_MANDATORY_TAG;
 722                         drbd_msg_put_info(from_attrs_err_to_txt(err));
 723                         goto out;
 724                 }
 725         }
 726
 727         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 728                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 729         else
 730                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 731 out:
 732         drbd_adm_finish(info, retcode);
 733         return 0;
 734 }
 735
 736 /* Initializes the md.*_offset members, so we are able to find
 737  * the on disk meta data.
 738  *
 739  * We currently have two possible layouts:
 740  * external:
 741  *   |----------- md_size_sect ------------------|
 742  *   [ 4k superblock ][ activity log ][  Bitmap  ]
 743  *   | al_offset == 8 |
 744  *   | bm_offset = al_offset + X      |
 745  *  ==> bitmap sectors = md_size_sect - bm_offset
 746  *
 747  * internal:
 748  *            |----------- md_size_sect ------------------|
 749  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
 750  *                        | al_offset < 0 |
 751  *            | bm_offset = al_offset - Y |
 752  *  ==> bitmap sectors = Y = al_offset - bm_offset
 753  *
 754  *  Activity log size used to be fixed 32kB,
 755  *  but is about to become configurable.
 756  */
 757 static void drbd_md_set_sector_offsets(struct drbd_device *device,
 758                                        struct drbd_backing_dev *bdev)
 759 {
 760         sector_t md_size_sect = 0;
 761         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
 762
 763         bdev->md.md_offset = drbd_md_ss(bdev);
 764
 765         switch (bdev->md.meta_dev_idx) {
 766         default:
 767                 /* v07 style fixed size indexed meta data */
 768                 bdev->md.md_size_sect = MD_128MB_SECT;
 769                 bdev->md.al_offset = MD_4kB_SECT;
 770                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 771                 break;
 772         case DRBD_MD_INDEX_FLEX_EXT:
 773                 /* just occupy the full device; unit: sectors */
 774                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
 775                 bdev->md.al_offset = MD_4kB_SECT;
 776                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 777                 break;
 778         case DRBD_MD_INDEX_INTERNAL:
 779         case DRBD_MD_INDEX_FLEX_INT:
 780                 /* al size is still fixed */
 781                 bdev->md.al_offset = -al_size_sect;
 782                 /* we need (slightly less than) ~ this much bitmap sectors: */
 783                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 784                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
 785                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
 786                 md_size_sect = ALIGN(md_size_sect, 8);
 787
 788                 /* plus the "drbd meta data super block",
 789                  * and the activity log; */
 790                 md_size_sect += MD_4kB_SECT + al_size_sect;
 791
 792                 bdev->md.md_size_sect = md_size_sect;
 793                 /* bitmap offset is adjusted by 'super' block size */
 794                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
 795                 break;
 796         }
 797 }
 798
 799 /* input size is expected to be in KB */
 800 char *ppsize(char *buf, unsigned long long size)
 801 {
 802         /* Needs 9 bytes at max including trailing NUL:
 803          * -1ULL ==> "16384 EB" */
 804         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 805         int base = 0;
 806         while (size >= 10000 && base < sizeof(units)-1) {
 807                 /* shift + round */
 808                 size = (size >> 10) + !!(size & (1<<9));
 809                 base++;
 810         }
 811         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 812
 813         return buf;
 814 }
 815
 816 /* there is still a theoretical deadlock when called from receiver
 817  * on an D_INCONSISTENT R_PRIMARY:
 818  *  remote READ does inc_ap_bio, receiver would need to receive answer
 819  *  packet from remote to dec_ap_bio again.
 820  *  receiver receive_sizes(), comes here,
 821  *  waits for ap_bio_cnt == 0. -> deadlock.
 822  * but this cannot happen, actually, because:
 823  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
 824  *  (not connected, or bad/no disk on peer):
 825  *  see drbd_fail_request_early, ap_bio_cnt is zero.
 826  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
 827  *  peer may not initiate a resize.
 828  */
 829 /* Note these are not to be confused with
 830  * drbd_adm_suspend_io/drbd_adm_resume_io,
 831  * which are (sub) state changes triggered by admin (drbdsetup),
 832  * and can be long lived.
 833  * This changes an device->flag, is triggered by drbd internals,
 834  * and should be short-lived. */
 835 void drbd_suspend_io(struct drbd_device *device)
 836 {
 837         set_bit(SUSPEND_IO, &device->flags);
 838         if (drbd_suspended(device))
 839                 return;
 840         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
 841 }
 842
 843 void drbd_resume_io(struct drbd_device *device)
 844 {
 845         clear_bit(SUSPEND_IO, &device->flags);
 846         wake_up(&device->misc_wait);
 847 }
 848
 849 /**
 850  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
 851  * @device:     DRBD device.
 852  *
 853  * Returns 0 on success, negative return values indicate errors.
 854  * You should call drbd_md_sync() after calling this function.
 855  */
 856 enum determine_dev_size
 857 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 858 {
 859         sector_t prev_first_sect, prev_size; /* previous meta location */
 860         sector_t la_size_sect, u_size;
 861         struct drbd_md *md = &device->ldev->md;
 862         u32 prev_al_stripe_size_4k;
 863         u32 prev_al_stripes;
 864         sector_t size;
 865         char ppb[10];
 866         void *buffer;
 867
 868         int md_moved, la_size_changed;
 869         enum determine_dev_size rv = DS_UNCHANGED;
 870
 871         /* race:
 872          * application request passes inc_ap_bio,
 873          * but then cannot get an AL-reference.
 874          * this function later may wait on ap_bio_cnt == 0. -> deadlock.
 875          *
 876          * to avoid that:
 877          * Suspend IO right here.
 878          * still lock the act_log to not trigger ASSERTs there.
 879          */
 880         drbd_suspend_io(device);
 881         buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
 882         if (!buffer) {
 883                 drbd_resume_io(device);
 884                 return DS_ERROR;
 885         }
 886
 887         /* no wait necessary anymore, actually we could assert that */
 888         wait_event(device->al_wait, lc_try_lock(device->act_log));
 889
 890         prev_first_sect = drbd_md_first_sector(device->ldev);
 891         prev_size = device->ldev->md.md_size_sect;
 892         la_size_sect = device->ldev->md.la_size_sect;
 893
 894         if (rs) {
 895                 /* rs is non NULL if we should change the AL layout only */
 896
 897                 prev_al_stripes = md->al_stripes;
 898                 prev_al_stripe_size_4k = md->al_stripe_size_4k;
 899
 900                 md->al_stripes = rs->al_stripes;
 901                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
 902                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
 903         }
 904
 905         drbd_md_set_sector_offsets(device, device->ldev);
 906
 907         rcu_read_lock();
 908         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
 909         rcu_read_unlock();
 910         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
 911
 912         if (size < la_size_sect) {
 913                 if (rs && u_size == 0) {
 914                         /* Remove "rs &&" later. This check should always be active, but
 915                            right now the receiver expects the permissive behavior */
 916                         drbd_warn(device, "Implicit shrink not allowed. "
 917                                  "Use --size=%llus for explicit shrink.\n",
 918                                  (unsigned long long)size);
 919                         rv = DS_ERROR_SHRINK;
 920                 }
 921                 if (u_size > size)
 922                         rv = DS_ERROR_SPACE_MD;
 923                 if (rv != DS_UNCHANGED)
 924                         goto err_out;
 925         }
 926
 927         if (drbd_get_capacity(device->this_bdev) != size ||
 928             drbd_bm_capacity(device) != size) {
 929                 int err;
 930                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
 931                 if (unlikely(err)) {
 932                         /* currently there is only one error: ENOMEM! */
 933                         size = drbd_bm_capacity(device)>>1;
 934                         if (size == 0) {
 935                                 drbd_err(device, "OUT OF MEMORY! "
 936                                     "Could not allocate bitmap!\n");
 937                         } else {
 938                                 drbd_err(device, "BM resizing failed. "
 939                                     "Leaving size unchanged at size = %lu KB\n",
 940                                     (unsigned long)size);
 941                         }
 942                         rv = DS_ERROR;
 943                 }
 944                 /* racy, see comments above. */
 945                 drbd_set_my_capacity(device, size);
 946                 device->ldev->md.la_size_sect = size;
 947                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 948                      (unsigned long long)size>>1);
 949         }
 950         if (rv <= DS_ERROR)
 951                 goto err_out;
 952
 953         la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
 954
 955         md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
 956                 || prev_size       != device->ldev->md.md_size_sect;
 957
 958         if (la_size_changed || md_moved || rs) {
 959                 u32 prev_flags;
 960
 961                 drbd_al_shrink(device); /* All extents inactive. */
 962
 963                 prev_flags = md->flags;
 964                 md->flags &= ~MDF_PRIMARY_IND;
 965                 drbd_md_write(device, buffer);
 966
 967                 drbd_info(device, "Writing the whole bitmap, %s\n",
 968                          la_size_changed && md_moved ? "size changed and md moved" :
 969                          la_size_changed ? "size changed" : "md moved");
 970                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
 971                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
 972                                "size changed", BM_LOCKED_MASK);
 973                 drbd_initialize_al(device, buffer);
 974
 975                 md->flags = prev_flags;
 976                 drbd_md_write(device, buffer);
 977
 978                 if (rs)
 979                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
 980                                   md->al_stripes, md->al_stripe_size_4k * 4);
 981         }
 982
 983         if (size > la_size_sect)
 984                 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
 985         if (size < la_size_sect)
 986                 rv = DS_SHRUNK;
 987
 988         if (0) {
 989         err_out:
 990                 if (rs) {
 991                         md->al_stripes = prev_al_stripes;
 992                         md->al_stripe_size_4k = prev_al_stripe_size_4k;
 993                         md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
 994
 995                         drbd_md_set_sector_offsets(device, device->ldev);
 996                 }
 997         }
 998         lc_unlock(device->act_log);
 999         wake_up(&device->al_wait);
1000         drbd_md_put_buffer(device);
1001         drbd_resume_io(device);
1002
1003         return rv;
1004 }
1005
1006 sector_t
1007 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1008                   sector_t u_size, int assume_peer_has_space)
1009 {
1010         sector_t p_size = device->p_size;   /* partner's disk size. */
1011         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1012         sector_t m_size; /* my size */
1013         sector_t size = 0;
1014
1015         m_size = drbd_get_max_capacity(bdev);
1016
1017         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1018                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1019                 p_size = m_size;
1020         }
1021
1022         if (p_size && m_size) {
1023                 size = min_t(sector_t, p_size, m_size);
1024         } else {
1025                 if (la_size_sect) {
1026                         size = la_size_sect;
1027                         if (m_size && m_size < size)
1028                                 size = m_size;
1029                         if (p_size && p_size < size)
1030                                 size = p_size;
1031                 } else {
1032                         if (m_size)
1033                                 size = m_size;
1034                         if (p_size)
1035                                 size = p_size;
1036                 }
1037         }
1038
1039         if (size == 0)
1040                 drbd_err(device, "Both nodes diskless!\n");
1041
1042         if (u_size) {
1043                 if (u_size > size)
1044                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1045                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1046                 else
1047                         size = u_size;
1048         }
1049
1050         return size;
1051 }
1052
1053 /**
1054  * drbd_check_al_size() - Ensures that the AL is of the right size
1055  * @device:     DRBD device.
1056  *
1057  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1058  * failed, and 0 on success. You should call drbd_md_sync() after you called
1059  * this function.
1060  */
1061 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1062 {
1063         struct lru_cache *n, *t;
1064         struct lc_element *e;
1065         unsigned int in_use;
1066         int i;
1067
1068         if (device->act_log &&
1069             device->act_log->nr_elements == dc->al_extents)
1070                 return 0;
1071
1072         in_use = 0;
1073         t = device->act_log;
1074         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1075                 dc->al_extents, sizeof(struct lc_element), 0);
1076
1077         if (n == NULL) {
1078                 drbd_err(device, "Cannot allocate act_log lru!\n");
1079                 return -ENOMEM;
1080         }
1081         spin_lock_irq(&device->al_lock);
1082         if (t) {
1083                 for (i = 0; i < t->nr_elements; i++) {
1084                         e = lc_element_by_index(t, i);
1085                         if (e->refcnt)
1086                                 drbd_err(device, "refcnt(%d)==%d\n",
1087                                     e->lc_number, e->refcnt);
1088                         in_use += e->refcnt;
1089                 }
1090         }
1091         if (!in_use)
1092                 device->act_log = n;
1093         spin_unlock_irq(&device->al_lock);
1094         if (in_use) {
1095                 drbd_err(device, "Activity log still in use!\n");
1096                 lc_destroy(n);
1097                 return -EBUSY;
1098         } else {
1099                 if (t)
1100                         lc_destroy(t);
1101         }
1102         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1103         return 0;
1104 }
1105
1106 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1107 {
1108         struct request_queue * const q = device->rq_queue;
1109         unsigned int max_hw_sectors = max_bio_size >> 9;
1110         unsigned int max_segments = 0;
1111
1112         if (get_ldev_if_state(device, D_ATTACHING)) {
1113                 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1114
1115                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1116                 rcu_read_lock();
1117                 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1118                 rcu_read_unlock();
1119                 put_ldev(device);
1120         }
1121
1122         blk_queue_logical_block_size(q, 512);
1123         blk_queue_max_hw_sectors(q, max_hw_sectors);
1124         /* This is the workaround for "bio would need to, but cannot, be split" */
1125         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1126         blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1127
1128         if (get_ldev_if_state(device, D_ATTACHING)) {
1129                 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1130
1131                 blk_queue_stack_limits(q, b);
1132
1133                 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1134                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1135                                  q->backing_dev_info.ra_pages,
1136                                  b->backing_dev_info.ra_pages);
1137                         q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1138                 }
1139                 put_ldev(device);
1140         }
1141 }
1142
1143 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1144 {
1145         unsigned int now, new, local, peer;
1146
1147         now = queue_max_hw_sectors(device->rq_queue) << 9;
1148         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1149         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1150
1151         if (get_ldev_if_state(device, D_ATTACHING)) {
1152                 local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1153                 device->local_max_bio_size = local;
1154                 put_ldev(device);
1155         }
1156         local = min(local, DRBD_MAX_BIO_SIZE);
1157
1158         /* We may ignore peer limits if the peer is modern enough.
1159            Because new from 8.3.8 onwards the peer can use multiple
1160            BIOs for a single peer_request */
1161         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1162                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1163                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1164                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1165                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1166                         peer = DRBD_MAX_SIZE_H80_PACKET;
1167                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1168                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1169                 else
1170                         peer = DRBD_MAX_BIO_SIZE;
1171         }
1172
1173         new = min(local, peer);
1174
1175         if (device->state.role == R_PRIMARY && new < now)
1176                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1177
1178         if (new != now)
1179                 drbd_info(device, "max BIO size = %u\n", new);
1180
1181         drbd_setup_queue_param(device, new);
1182 }
1183
1184 /* Starts the worker thread */
1185 static void conn_reconfig_start(struct drbd_connection *connection)
1186 {
1187         drbd_thread_start(&connection->worker);
1188         conn_flush_workqueue(connection);
1189 }
1190
1191 /* if still unconfigured, stops worker again. */
1192 static void conn_reconfig_done(struct drbd_connection *connection)
1193 {
1194         bool stop_threads;
1195         spin_lock_irq(&connection->req_lock);
1196         stop_threads = conn_all_vols_unconf(connection) &&
1197                 connection->cstate == C_STANDALONE;
1198         spin_unlock_irq(&connection->req_lock);
1199         if (stop_threads) {
1200                 /* asender is implicitly stopped by receiver
1201                  * in conn_disconnect() */
1202                 drbd_thread_stop(&connection->receiver);
1203                 drbd_thread_stop(&connection->worker);
1204         }
1205 }
1206
1207 /* Make sure IO is suspended before calling this function(). */
1208 static void drbd_suspend_al(struct drbd_device *device)
1209 {
1210         int s = 0;
1211
1212         if (!lc_try_lock(device->act_log)) {
1213                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1214                 return;
1215         }
1216
1217         drbd_al_shrink(device);
1218         spin_lock_irq(&first_peer_device(device)->connection->req_lock);
1219         if (device->state.conn < C_CONNECTED)
1220                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1221         spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
1222         lc_unlock(device->act_log);
1223
1224         if (s)
1225                 drbd_info(device, "Suspended AL updates\n");
1226 }
1227
1228
1229 static bool should_set_defaults(struct genl_info *info)
1230 {
1231         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1232         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1233 }
1234
1235 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1236 {
1237         /* This is limited by 16 bit "slot" numbers,
1238          * and by available on-disk context storage.
1239          *
1240          * Also (u16)~0 is special (denotes a "free" extent).
1241          *
1242          * One transaction occupies one 4kB on-disk block,
1243          * we have n such blocks in the on disk ring buffer,
1244          * the "current" transaction may fail (n-1),
1245          * and there is 919 slot numbers context information per transaction.
1246          *
1247          * 72 transaction blocks amounts to more than 2**16 context slots,
1248          * so cap there first.
1249          */
1250         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1251         const unsigned int sufficient_on_disk =
1252                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1253                 /AL_CONTEXT_PER_TRANSACTION;
1254
1255         unsigned int al_size_4k = bdev->md.al_size_4k;
1256
1257         if (al_size_4k > sufficient_on_disk)
1258                 return max_al_nr;
1259
1260         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1261 }
1262
1263 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1264 {
1265         enum drbd_ret_code retcode;
1266         struct drbd_device *device;
1267         struct disk_conf *new_disk_conf, *old_disk_conf;
1268         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1269         int err, fifo_size;
1270
1271         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1272         if (!adm_ctx.reply_skb)
1273                 return retcode;
1274         if (retcode != NO_ERROR)
1275                 goto out;
1276
1277         device = adm_ctx.device;
1278
1279         /* we also need a disk
1280          * to change the options on */
1281         if (!get_ldev(device)) {
1282                 retcode = ERR_NO_DISK;
1283                 goto out;
1284         }
1285
1286         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1287         if (!new_disk_conf) {
1288                 retcode = ERR_NOMEM;
1289                 goto fail;
1290         }
1291
1292         mutex_lock(&first_peer_device(device)->connection->conf_update);
1293         old_disk_conf = device->ldev->disk_conf;
1294         *new_disk_conf = *old_disk_conf;
1295         if (should_set_defaults(info))
1296                 set_disk_conf_defaults(new_disk_conf);
1297
1298         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1299         if (err && err != -ENOMSG) {
1300                 retcode = ERR_MANDATORY_TAG;
1301                 drbd_msg_put_info(from_attrs_err_to_txt(err));
1302                 goto fail_unlock;
1303         }
1304
1305         if (!expect(new_disk_conf->resync_rate >= 1))
1306                 new_disk_conf->resync_rate = 1;
1307
1308         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1309                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1310         if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1311                 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1312
1313         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1314                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1315
1316         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1317         if (fifo_size != device->rs_plan_s->size) {
1318                 new_plan = fifo_alloc(fifo_size);
1319                 if (!new_plan) {
1320                         drbd_err(device, "kmalloc of fifo_buffer failed");
1321                         retcode = ERR_NOMEM;
1322                         goto fail_unlock;
1323                 }
1324         }
1325
1326         drbd_suspend_io(device);
1327         wait_event(device->al_wait, lc_try_lock(device->act_log));
1328         drbd_al_shrink(device);
1329         err = drbd_check_al_size(device, new_disk_conf);
1330         lc_unlock(device->act_log);
1331         wake_up(&device->al_wait);
1332         drbd_resume_io(device);
1333
1334         if (err) {
1335                 retcode = ERR_NOMEM;
1336                 goto fail_unlock;
1337         }
1338
1339         write_lock_irq(&global_state_lock);
1340         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1341         if (retcode == NO_ERROR) {
1342                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1343                 drbd_resync_after_changed(device);
1344         }
1345         write_unlock_irq(&global_state_lock);
1346
1347         if (retcode != NO_ERROR)
1348                 goto fail_unlock;
1349
1350         if (new_plan) {
1351                 old_plan = device->rs_plan_s;
1352                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1353         }
1354
1355         mutex_unlock(&first_peer_device(device)->connection->conf_update);
1356
1357         if (new_disk_conf->al_updates)
1358                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1359         else
1360                 device->ldev->md.flags |= MDF_AL_DISABLED;
1361
1362         if (new_disk_conf->md_flushes)
1363                 clear_bit(MD_NO_FUA, &device->flags);
1364         else
1365                 set_bit(MD_NO_FUA, &device->flags);
1366
1367         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1368
1369         drbd_md_sync(device);
1370
1371         if (device->state.conn >= C_CONNECTED)
1372                 drbd_send_sync_param(device);
1373
1374         synchronize_rcu();
1375         kfree(old_disk_conf);
1376         kfree(old_plan);
1377         mod_timer(&device->request_timer, jiffies + HZ);
1378         goto success;
1379
1380 fail_unlock:
1381         mutex_unlock(&first_peer_device(device)->connection->conf_update);
1382  fail:
1383         kfree(new_disk_conf);
1384         kfree(new_plan);
1385 success:
1386         put_ldev(device);
1387  out:
1388         drbd_adm_finish(info, retcode);
1389         return 0;
1390 }
1391
1392 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1393 {
1394         struct drbd_device *device;
1395         int err;
1396         enum drbd_ret_code retcode;
1397         enum determine_dev_size dd;
1398         sector_t max_possible_sectors;
1399         sector_t min_md_device_sectors;
1400         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1401         struct disk_conf *new_disk_conf = NULL;
1402         struct block_device *bdev;
1403         struct lru_cache *resync_lru = NULL;
1404         struct fifo_buffer *new_plan = NULL;
1405         union drbd_state ns, os;
1406         enum drbd_state_rv rv;
1407         struct net_conf *nc;
1408
1409         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1410         if (!adm_ctx.reply_skb)
1411                 return retcode;
1412         if (retcode != NO_ERROR)
1413                 goto finish;
1414
1415         device = adm_ctx.device;
1416         conn_reconfig_start(first_peer_device(device)->connection);
1417
1418         /* if you want to reconfigure, please tear down first */
1419         if (device->state.disk > D_DISKLESS) {
1420                 retcode = ERR_DISK_CONFIGURED;
1421                 goto fail;
1422         }
1423         /* It may just now have detached because of IO error.  Make sure
1424          * drbd_ldev_destroy is done already, we may end up here very fast,
1425          * e.g. if someone calls attach from the on-io-error handler,
1426          * to realize a "hot spare" feature (not that I'd recommend that) */
1427         wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1428
1429         /* make sure there is no leftover from previous force-detach attempts */
1430         clear_bit(FORCE_DETACH, &device->flags);
1431         clear_bit(WAS_IO_ERROR, &device->flags);
1432         clear_bit(WAS_READ_ERROR, &device->flags);
1433
1434         /* and no leftover from previously aborted resync or verify, either */
1435         device->rs_total = 0;
1436         device->rs_failed = 0;
1437         atomic_set(&device->rs_pending_cnt, 0);
1438
1439         /* allocation not in the IO path, drbdsetup context */
1440         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1441         if (!nbc) {
1442                 retcode = ERR_NOMEM;
1443                 goto fail;
1444         }
1445         spin_lock_init(&nbc->md.uuid_lock);
1446
1447         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1448         if (!new_disk_conf) {
1449                 retcode = ERR_NOMEM;
1450                 goto fail;
1451         }
1452         nbc->disk_conf = new_disk_conf;
1453
1454         set_disk_conf_defaults(new_disk_conf);
1455         err = disk_conf_from_attrs(new_disk_conf, info);
1456         if (err) {
1457                 retcode = ERR_MANDATORY_TAG;
1458                 drbd_msg_put_info(from_attrs_err_to_txt(err));
1459                 goto fail;
1460         }
1461
1462         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1463                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1464
1465         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1466         if (!new_plan) {
1467                 retcode = ERR_NOMEM;
1468                 goto fail;
1469         }
1470
1471         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1472                 retcode = ERR_MD_IDX_INVALID;
1473                 goto fail;
1474         }
1475
1476         write_lock_irq(&global_state_lock);
1477         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1478         write_unlock_irq(&global_state_lock);
1479         if (retcode != NO_ERROR)
1480                 goto fail;
1481
1482         rcu_read_lock();
1483         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1484         if (nc) {
1485                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1486                         rcu_read_unlock();
1487                         retcode = ERR_STONITH_AND_PROT_A;
1488                         goto fail;
1489                 }
1490         }
1491         rcu_read_unlock();
1492
1493         bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1494                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1495         if (IS_ERR(bdev)) {
1496                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1497                         PTR_ERR(bdev));
1498                 retcode = ERR_OPEN_DISK;
1499                 goto fail;
1500         }
1501         nbc->backing_bdev = bdev;
1502
1503         /*
1504          * meta_dev_idx >= 0: external fixed size, possibly multiple
1505          * drbd sharing one meta device.  TODO in that case, paranoia
1506          * check that [md_bdev, meta_dev_idx] is not yet used by some
1507          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1508          * should check it for you already; but if you don't, or
1509          * someone fooled it, we need to double check here)
1510          */
1511         bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1512                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1513                                   (new_disk_conf->meta_dev_idx < 0) ?
1514                                   (void *)device : (void *)drbd_m_holder);
1515         if (IS_ERR(bdev)) {
1516                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1517                         PTR_ERR(bdev));
1518                 retcode = ERR_OPEN_MD_DISK;
1519                 goto fail;
1520         }
1521         nbc->md_bdev = bdev;
1522
1523         if ((nbc->backing_bdev == nbc->md_bdev) !=
1524             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1525              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1526                 retcode = ERR_MD_IDX_INVALID;
1527                 goto fail;
1528         }
1529
1530         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1531                         1, 61, sizeof(struct bm_extent),
1532                         offsetof(struct bm_extent, lce));
1533         if (!resync_lru) {
1534                 retcode = ERR_NOMEM;
1535                 goto fail;
1536         }
1537
1538         /* Read our meta data super block early.
1539          * This also sets other on-disk offsets. */
1540         retcode = drbd_md_read(device, nbc);
1541         if (retcode != NO_ERROR)
1542                 goto fail;
1543
1544         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1545                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1546         if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1547                 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1548
1549         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1550                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1551                         (unsigned long long) drbd_get_max_capacity(nbc),
1552                         (unsigned long long) new_disk_conf->disk_size);
1553                 retcode = ERR_DISK_TOO_SMALL;
1554                 goto fail;
1555         }
1556
1557         if (new_disk_conf->meta_dev_idx < 0) {
1558                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1559                 /* at least one MB, otherwise it does not make sense */
1560                 min_md_device_sectors = (2<<10);
1561         } else {
1562                 max_possible_sectors = DRBD_MAX_SECTORS;
1563                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1564         }
1565
1566         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1567                 retcode = ERR_MD_DISK_TOO_SMALL;
1568                 drbd_warn(device, "refusing attach: md-device too small, "
1569                      "at least %llu sectors needed for this meta-disk type\n",
1570                      (unsigned long long) min_md_device_sectors);
1571                 goto fail;
1572         }
1573
1574         /* Make sure the new disk is big enough
1575          * (we may currently be R_PRIMARY with no local disk...) */
1576         if (drbd_get_max_capacity(nbc) <
1577             drbd_get_capacity(device->this_bdev)) {
1578                 retcode = ERR_DISK_TOO_SMALL;
1579                 goto fail;
1580         }
1581
1582         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1583
1584         if (nbc->known_size > max_possible_sectors) {
1585                 drbd_warn(device, "==> truncating very big lower level device "
1586                         "to currently maximum possible %llu sectors <==\n",
1587                         (unsigned long long) max_possible_sectors);
1588                 if (new_disk_conf->meta_dev_idx >= 0)
1589                         drbd_warn(device, "==>> using internal or flexible "
1590                                       "meta data may help <<==\n");
1591         }
1592
1593         drbd_suspend_io(device);
1594         /* also wait for the last barrier ack. */
1595         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1596          * We need a way to either ignore barrier acks for barriers sent before a device
1597          * was attached, or a way to wait for all pending barrier acks to come in.
1598          * As barriers are counted per resource,
1599          * we'd need to suspend io on all devices of a resource.
1600          */
1601         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1602         /* and for any other previously queued work */
1603         drbd_flush_workqueue(device);
1604
1605         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1606         retcode = rv;  /* FIXME: Type mismatch. */
1607         drbd_resume_io(device);
1608         if (rv < SS_SUCCESS)
1609                 goto fail;
1610
1611         if (!get_ldev_if_state(device, D_ATTACHING))
1612                 goto force_diskless;
1613
1614         if (!device->bitmap) {
1615                 if (drbd_bm_init(device)) {
1616                         retcode = ERR_NOMEM;
1617                         goto force_diskless_dec;
1618                 }
1619         }
1620
1621         if (device->state.conn < C_CONNECTED &&
1622             device->state.role == R_PRIMARY &&
1623             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1624                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1625                     (unsigned long long)device->ed_uuid);
1626                 retcode = ERR_DATA_NOT_CURRENT;
1627                 goto force_diskless_dec;
1628         }
1629
1630         /* Since we are diskless, fix the activity log first... */
1631         if (drbd_check_al_size(device, new_disk_conf)) {
1632                 retcode = ERR_NOMEM;
1633                 goto force_diskless_dec;
1634         }
1635
1636         /* Prevent shrinking of consistent devices ! */
1637         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1638             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1639                 drbd_warn(device, "refusing to truncate a consistent device\n");
1640                 retcode = ERR_DISK_TOO_SMALL;
1641                 goto force_diskless_dec;
1642         }
1643
1644         /* Reset the "barriers don't work" bits here, then force meta data to
1645          * be written, to ensure we determine if barriers are supported. */
1646         if (new_disk_conf->md_flushes)
1647                 clear_bit(MD_NO_FUA, &device->flags);
1648         else
1649                 set_bit(MD_NO_FUA, &device->flags);
1650
1651         /* Point of no return reached.
1652          * Devices and memory are no longer released by error cleanup below.
1653          * now device takes over responsibility, and the state engine should
1654          * clean it up somewhere.  */
1655         D_ASSERT(device, device->ldev == NULL);
1656         device->ldev = nbc;
1657         device->resync = resync_lru;
1658         device->rs_plan_s = new_plan;
1659         nbc = NULL;
1660         resync_lru = NULL;
1661         new_disk_conf = NULL;
1662         new_plan = NULL;
1663
1664         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1665
1666         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1667                 set_bit(CRASHED_PRIMARY, &device->flags);
1668         else
1669                 clear_bit(CRASHED_PRIMARY, &device->flags);
1670
1671         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1672             !(device->state.role == R_PRIMARY &&
1673               first_peer_device(device)->connection->susp_nod))
1674                 set_bit(CRASHED_PRIMARY, &device->flags);
1675
1676         device->send_cnt = 0;
1677         device->recv_cnt = 0;
1678         device->read_cnt = 0;
1679         device->writ_cnt = 0;
1680
1681         drbd_reconsider_max_bio_size(device);
1682
1683         /* If I am currently not R_PRIMARY,
1684          * but meta data primary indicator is set,
1685          * I just now recover from a hard crash,
1686          * and have been R_PRIMARY before that crash.
1687          *
1688          * Now, if I had no connection before that crash
1689          * (have been degraded R_PRIMARY), chances are that
1690          * I won't find my peer now either.
1691          *
1692          * In that case, and _only_ in that case,
1693          * we use the degr-wfc-timeout instead of the default,
1694          * so we can automatically recover from a crash of a
1695          * degraded but active "cluster" after a certain timeout.
1696          */
1697         clear_bit(USE_DEGR_WFC_T, &device->flags);
1698         if (device->state.role != R_PRIMARY &&
1699              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1700             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1701                 set_bit(USE_DEGR_WFC_T, &device->flags);
1702
1703         dd = drbd_determine_dev_size(device, 0, NULL);
1704         if (dd <= DS_ERROR) {
1705                 retcode = ERR_NOMEM_BITMAP;
1706                 goto force_diskless_dec;
1707         } else if (dd == DS_GREW)
1708                 set_bit(RESYNC_AFTER_NEG, &device->flags);
1709
1710         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1711             (test_bit(CRASHED_PRIMARY, &device->flags) &&
1712              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1713                 drbd_info(device, "Assuming that all blocks are out of sync "
1714                      "(aka FullSync)\n");
1715                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1716                         "set_n_write from attaching", BM_LOCKED_MASK)) {
1717                         retcode = ERR_IO_MD_DISK;
1718                         goto force_diskless_dec;
1719                 }
1720         } else {
1721                 if (drbd_bitmap_io(device, &drbd_bm_read,
1722                         "read from attaching", BM_LOCKED_MASK)) {
1723                         retcode = ERR_IO_MD_DISK;
1724                         goto force_diskless_dec;
1725                 }
1726         }
1727
1728         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1729                 drbd_suspend_al(device); /* IO is still suspended here... */
1730
1731         spin_lock_irq(&first_peer_device(device)->connection->req_lock);
1732         os = drbd_read_state(device);
1733         ns = os;
1734         /* If MDF_CONSISTENT is not set go into inconsistent state,
1735            otherwise investigate MDF_WasUpToDate...
1736            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1737            otherwise into D_CONSISTENT state.
1738         */
1739         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1740                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1741                         ns.disk = D_CONSISTENT;
1742                 else
1743                         ns.disk = D_OUTDATED;
1744         } else {
1745                 ns.disk = D_INCONSISTENT;
1746         }
1747
1748         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1749                 ns.pdsk = D_OUTDATED;
1750
1751         rcu_read_lock();
1752         if (ns.disk == D_CONSISTENT &&
1753             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1754                 ns.disk = D_UP_TO_DATE;
1755
1756         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1757            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1758            this point, because drbd_request_state() modifies these
1759            flags. */
1760
1761         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1762                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1763         else
1764                 device->ldev->md.flags |= MDF_AL_DISABLED;
1765
1766         rcu_read_unlock();
1767
1768         /* In case we are C_CONNECTED postpone any decision on the new disk
1769            state after the negotiation phase. */
1770         if (device->state.conn == C_CONNECTED) {
1771                 device->new_state_tmp.i = ns.i;
1772                 ns.i = os.i;
1773                 ns.disk = D_NEGOTIATING;
1774
1775                 /* We expect to receive up-to-date UUIDs soon.
1776                    To avoid a race in receive_state, free p_uuid while
1777                    holding req_lock. I.e. atomic with the state change */
1778                 kfree(device->p_uuid);
1779                 device->p_uuid = NULL;
1780         }
1781
1782         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1783         spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
1784
1785         if (rv < SS_SUCCESS)
1786                 goto force_diskless_dec;
1787
1788         mod_timer(&device->request_timer, jiffies + HZ);
1789
1790         if (device->state.role == R_PRIMARY)
1791                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1792         else
1793                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1794
1795         drbd_md_mark_dirty(device);
1796         drbd_md_sync(device);
1797
1798         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1799         put_ldev(device);
1800         conn_reconfig_done(first_peer_device(device)->connection);
1801         drbd_adm_finish(info, retcode);
1802         return 0;
1803
1804  force_diskless_dec:
1805         put_ldev(device);
1806  force_diskless:
1807         drbd_force_state(device, NS(disk, D_DISKLESS));
1808         drbd_md_sync(device);
1809  fail:
1810         conn_reconfig_done(first_peer_device(device)->connection);
1811         if (nbc) {
1812                 if (nbc->backing_bdev)
1813                         blkdev_put(nbc->backing_bdev,
1814                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1815                 if (nbc->md_bdev)
1816                         blkdev_put(nbc->md_bdev,
1817                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1818                 kfree(nbc);
1819         }
1820         kfree(new_disk_conf);
1821         lc_destroy(resync_lru);
1822         kfree(new_plan);
1823
1824  finish:
1825         drbd_adm_finish(info, retcode);
1826         return 0;
1827 }
1828
1829 static int adm_detach(struct drbd_device *device, int force)
1830 {
1831         enum drbd_state_rv retcode;
1832         int ret;
1833
1834         if (force) {
1835                 set_bit(FORCE_DETACH, &device->flags);
1836                 drbd_force_state(device, NS(disk, D_FAILED));
1837                 retcode = SS_SUCCESS;
1838                 goto out;
1839         }
1840
1841         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1842         drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1843         retcode = drbd_request_state(device, NS(disk, D_FAILED));
1844         drbd_md_put_buffer(device);
1845         /* D_FAILED will transition to DISKLESS. */
1846         ret = wait_event_interruptible(device->misc_wait,
1847                         device->state.disk != D_FAILED);
1848         drbd_resume_io(device);
1849         if ((int)retcode == (int)SS_IS_DISKLESS)
1850                 retcode = SS_NOTHING_TO_DO;
1851         if (ret)
1852                 retcode = ERR_INTR;
1853 out:
1854         return retcode;
1855 }
1856
1857 /* Detaching the disk is a process in multiple stages.  First we need to lock
1858  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1859  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1860  * internal references as well.
1861  * Only then we have finally detached. */
1862 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1863 {
1864         enum drbd_ret_code retcode;
1865         struct detach_parms parms = { };
1866         int err;
1867
1868         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1869         if (!adm_ctx.reply_skb)
1870                 return retcode;
1871         if (retcode != NO_ERROR)
1872                 goto out;
1873
1874         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1875                 err = detach_parms_from_attrs(&parms, info);
1876                 if (err) {
1877                         retcode = ERR_MANDATORY_TAG;
1878                         drbd_msg_put_info(from_attrs_err_to_txt(err));
1879                         goto out;
1880                 }
1881         }
1882
1883         retcode = adm_detach(adm_ctx.device, parms.force_detach);
1884 out:
1885         drbd_adm_finish(info, retcode);
1886         return 0;
1887 }
1888
1889 static bool conn_resync_running(struct drbd_connection *connection)
1890 {
1891         struct drbd_peer_device *peer_device;
1892         bool rv = false;
1893         int vnr;
1894
1895         rcu_read_lock();
1896         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1897                 struct drbd_device *device = peer_device->device;
1898                 if (device->state.conn == C_SYNC_SOURCE ||
1899                     device->state.conn == C_SYNC_TARGET ||
1900                     device->state.conn == C_PAUSED_SYNC_S ||
1901                     device->state.conn == C_PAUSED_SYNC_T) {
1902                         rv = true;
1903                         break;
1904                 }
1905         }
1906         rcu_read_unlock();
1907
1908         return rv;
1909 }
1910
1911 static bool conn_ov_running(struct drbd_connection *connection)
1912 {
1913         struct drbd_peer_device *peer_device;
1914         bool rv = false;
1915         int vnr;
1916
1917         rcu_read_lock();
1918         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1919                 struct drbd_device *device = peer_device->device;
1920                 if (device->state.conn == C_VERIFY_S ||
1921                     device->state.conn == C_VERIFY_T) {
1922                         rv = true;
1923                         break;
1924                 }
1925         }
1926         rcu_read_unlock();
1927
1928         return rv;
1929 }
1930
1931 static enum drbd_ret_code
1932 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1933 {
1934         struct drbd_peer_device *peer_device;
1935         int i;
1936
1937         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1938                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1939                         return ERR_NEED_APV_100;
1940
1941                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1942                         return ERR_NEED_APV_100;
1943
1944                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1945                         return ERR_NEED_APV_100;
1946         }
1947
1948         if (!new_net_conf->two_primaries &&
1949             conn_highest_role(connection) == R_PRIMARY &&
1950             conn_highest_peer(connection) == R_PRIMARY)
1951                 return ERR_NEED_ALLOW_TWO_PRI;
1952
1953         if (new_net_conf->two_primaries &&
1954             (new_net_conf->wire_protocol != DRBD_PROT_C))
1955                 return ERR_NOT_PROTO_C;
1956
1957         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1958                 struct drbd_device *device = peer_device->device;
1959                 if (get_ldev(device)) {
1960                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1961                         put_ldev(device);
1962                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
1963                                 return ERR_STONITH_AND_PROT_A;
1964                 }
1965                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
1966                         return ERR_DISCARD_IMPOSSIBLE;
1967         }
1968
1969         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
1970                 return ERR_CONG_NOT_PROTO_A;
1971
1972         return NO_ERROR;
1973 }
1974
1975 static enum drbd_ret_code
1976 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
1977 {
1978         static enum drbd_ret_code rv;
1979         struct drbd_peer_device *peer_device;
1980         int i;
1981
1982         rcu_read_lock();
1983         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
1984         rcu_read_unlock();
1985
1986         /* connection->volumes protected by genl_lock() here */
1987         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1988                 struct drbd_device *device = peer_device->device;
1989                 if (!device->bitmap) {
1990                         if (drbd_bm_init(device))
1991                                 return ERR_NOMEM;
1992                 }
1993         }
1994
1995         return rv;
1996 }
1997
1998 struct crypto {
1999         struct crypto_hash *verify_tfm;
2000         struct crypto_hash *csums_tfm;
2001         struct crypto_hash *cram_hmac_tfm;
2002         struct crypto_hash *integrity_tfm;
2003 };
2004
2005 static int
2006 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2007 {
2008         if (!tfm_name[0])
2009                 return NO_ERROR;
2010
2011         *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2012         if (IS_ERR(*tfm)) {
2013                 *tfm = NULL;
2014                 return err_alg;
2015         }
2016
2017         return NO_ERROR;
2018 }
2019
2020 static enum drbd_ret_code
2021 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2022 {
2023         char hmac_name[CRYPTO_MAX_ALG_NAME];
2024         enum drbd_ret_code rv;
2025
2026         rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2027                        ERR_CSUMS_ALG);
2028         if (rv != NO_ERROR)
2029                 return rv;
2030         rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2031                        ERR_VERIFY_ALG);
2032         if (rv != NO_ERROR)
2033                 return rv;
2034         rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2035                        ERR_INTEGRITY_ALG);
2036         if (rv != NO_ERROR)
2037                 return rv;
2038         if (new_net_conf->cram_hmac_alg[0] != 0) {
2039                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2040                          new_net_conf->cram_hmac_alg);
2041
2042                 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2043                                ERR_AUTH_ALG);
2044         }
2045
2046         return rv;
2047 }
2048
2049 static void free_crypto(struct crypto *crypto)
2050 {
2051         crypto_free_hash(crypto->cram_hmac_tfm);
2052         crypto_free_hash(crypto->integrity_tfm);
2053         crypto_free_hash(crypto->csums_tfm);
2054         crypto_free_hash(crypto->verify_tfm);
2055 }
2056
2057 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2058 {
2059         enum drbd_ret_code retcode;
2060         struct drbd_connection *connection;
2061         struct net_conf *old_net_conf, *new_net_conf = NULL;
2062         int err;
2063         int ovr; /* online verify running */
2064         int rsr; /* re-sync running */
2065         struct crypto crypto = { };
2066
2067         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
2068         if (!adm_ctx.reply_skb)
2069                 return retcode;
2070         if (retcode != NO_ERROR)
2071                 goto out;
2072
2073         connection = adm_ctx.connection;
2074
2075         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2076         if (!new_net_conf) {
2077                 retcode = ERR_NOMEM;
2078                 goto out;
2079         }
2080
2081         conn_reconfig_start(connection);
2082
2083         mutex_lock(&connection->data.mutex);
2084         mutex_lock(&connection->conf_update);
2085         old_net_conf = connection->net_conf;
2086
2087         if (!old_net_conf) {
2088                 drbd_msg_put_info("net conf missing, try connect");
2089                 retcode = ERR_INVALID_REQUEST;
2090                 goto fail;
2091         }
2092
2093         *new_net_conf = *old_net_conf;
2094         if (should_set_defaults(info))
2095                 set_net_conf_defaults(new_net_conf);
2096
2097         err = net_conf_from_attrs_for_change(new_net_conf, info);
2098         if (err && err != -ENOMSG) {
2099                 retcode = ERR_MANDATORY_TAG;
2100                 drbd_msg_put_info(from_attrs_err_to_txt(err));
2101                 goto fail;
2102         }
2103
2104         retcode = check_net_options(connection, new_net_conf);
2105         if (retcode != NO_ERROR)
2106                 goto fail;
2107
2108         /* re-sync running */
2109         rsr = conn_resync_running(connection);
2110         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2111                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2112                 goto fail;
2113         }
2114
2115         /* online verify running */
2116         ovr = conn_ov_running(connection);
2117         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2118                 retcode = ERR_VERIFY_RUNNING;
2119                 goto fail;
2120         }
2121
2122         retcode = alloc_crypto(&crypto, new_net_conf);
2123         if (retcode != NO_ERROR)
2124                 goto fail;
2125
2126         rcu_assign_pointer(connection->net_conf, new_net_conf);
2127
2128         if (!rsr) {
2129                 crypto_free_hash(connection->csums_tfm);
2130                 connection->csums_tfm = crypto.csums_tfm;
2131                 crypto.csums_tfm = NULL;
2132         }
2133         if (!ovr) {
2134                 crypto_free_hash(connection->verify_tfm);
2135                 connection->verify_tfm = crypto.verify_tfm;
2136                 crypto.verify_tfm = NULL;
2137         }
2138
2139         crypto_free_hash(connection->integrity_tfm);
2140         connection->integrity_tfm = crypto.integrity_tfm;
2141         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2142                 /* Do this without trying to take connection->data.mutex again.  */
2143                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2144
2145         crypto_free_hash(connection->cram_hmac_tfm);
2146         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2147
2148         mutex_unlock(&connection->conf_update);
2149         mutex_unlock(&connection->data.mutex);
2150         synchronize_rcu();
2151         kfree(old_net_conf);
2152
2153         if (connection->cstate >= C_WF_REPORT_PARAMS)
2154                 drbd_send_sync_param(minor_to_device(conn_lowest_minor(connection)));
2155
2156         goto done;
2157
2158  fail:
2159         mutex_unlock(&connection->conf_update);
2160         mutex_unlock(&connection->data.mutex);
2161         free_crypto(&crypto);
2162         kfree(new_net_conf);
2163  done:
2164         conn_reconfig_done(connection);
2165  out:
2166         drbd_adm_finish(info, retcode);
2167         return 0;
2168 }
2169
2170 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2171 {
2172         struct drbd_peer_device *peer_device;
2173         struct net_conf *old_net_conf, *new_net_conf = NULL;
2174         struct crypto crypto = { };
2175         struct drbd_resource *resource;
2176         struct drbd_connection *connection;
2177         enum drbd_ret_code retcode;
2178         int i;
2179         int err;
2180
2181         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2182
2183         if (!adm_ctx.reply_skb)
2184                 return retcode;
2185         if (retcode != NO_ERROR)
2186                 goto out;
2187         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2188                 drbd_msg_put_info("connection endpoint(s) missing");
2189                 retcode = ERR_INVALID_REQUEST;
2190                 goto out;
2191         }
2192
2193         /* No need for _rcu here. All reconfiguration is
2194          * strictly serialized on genl_lock(). We are protected against
2195          * concurrent reconfiguration/addition/deletion */
2196         for_each_resource(resource, &drbd_resources) {
2197                 for_each_connection(connection, resource) {
2198                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2199                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2200                                     connection->my_addr_len)) {
2201                                 retcode = ERR_LOCAL_ADDR;
2202                                 goto out;
2203                         }
2204
2205                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2206                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2207                                     connection->peer_addr_len)) {
2208                                 retcode = ERR_PEER_ADDR;
2209                                 goto out;
2210                         }
2211                 }
2212         }
2213
2214         connection = adm_ctx.connection;
2215         conn_reconfig_start(connection);
2216
2217         if (connection->cstate > C_STANDALONE) {
2218                 retcode = ERR_NET_CONFIGURED;
2219                 goto fail;
2220         }
2221
2222         /* allocation not in the IO path, drbdsetup / netlink process context */
2223         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2224         if (!new_net_conf) {
2225                 retcode = ERR_NOMEM;
2226                 goto fail;
2227         }
2228
2229         set_net_conf_defaults(new_net_conf);
2230
2231         err = net_conf_from_attrs(new_net_conf, info);
2232         if (err && err != -ENOMSG) {
2233                 retcode = ERR_MANDATORY_TAG;
2234                 drbd_msg_put_info(from_attrs_err_to_txt(err));
2235                 goto fail;
2236         }
2237
2238         retcode = check_net_options(connection, new_net_conf);
2239         if (retcode != NO_ERROR)
2240                 goto fail;
2241
2242         retcode = alloc_crypto(&crypto, new_net_conf);
2243         if (retcode != NO_ERROR)
2244                 goto fail;
2245
2246         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2247
2248         conn_flush_workqueue(connection);
2249
2250         mutex_lock(&connection->conf_update);
2251         old_net_conf = connection->net_conf;
2252         if (old_net_conf) {
2253                 retcode = ERR_NET_CONFIGURED;
2254                 mutex_unlock(&connection->conf_update);
2255                 goto fail;
2256         }
2257         rcu_assign_pointer(connection->net_conf, new_net_conf);
2258
2259         conn_free_crypto(connection);
2260         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2261         connection->integrity_tfm = crypto.integrity_tfm;
2262         connection->csums_tfm = crypto.csums_tfm;
2263         connection->verify_tfm = crypto.verify_tfm;
2264
2265         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2266         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2267         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2268         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2269
2270         mutex_unlock(&connection->conf_update);
2271
2272         rcu_read_lock();
2273         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2274                 struct drbd_device *device = peer_device->device;
2275                 device->send_cnt = 0;
2276                 device->recv_cnt = 0;
2277         }
2278         rcu_read_unlock();
2279
2280         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2281
2282         conn_reconfig_done(connection);
2283         drbd_adm_finish(info, retcode);
2284         return 0;
2285
2286 fail:
2287         free_crypto(&crypto);
2288         kfree(new_net_conf);
2289
2290         conn_reconfig_done(connection);
2291 out:
2292         drbd_adm_finish(info, retcode);
2293         return 0;
2294 }
2295
2296 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2297 {
2298         enum drbd_state_rv rv;
2299
2300         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2301                         force ? CS_HARD : 0);
2302
2303         switch (rv) {
2304         case SS_NOTHING_TO_DO:
2305                 break;
2306         case SS_ALREADY_STANDALONE:
2307                 return SS_SUCCESS;
2308         case SS_PRIMARY_NOP:
2309                 /* Our state checking code wants to see the peer outdated. */
2310                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2311
2312                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2313                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2314
2315                 break;
2316         case SS_CW_FAILED_BY_PEER:
2317                 /* The peer probably wants to see us outdated. */
2318                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2319                                                         disk, D_OUTDATED), 0);
2320                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2321                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2322                                         CS_HARD);
2323                 }
2324                 break;
2325         default:;
2326                 /* no special handling necessary */
2327         }
2328
2329         if (rv >= SS_SUCCESS) {
2330                 enum drbd_state_rv rv2;
2331                 /* No one else can reconfigure the network while I am here.
2332                  * The state handling only uses drbd_thread_stop_nowait(),
2333                  * we want to really wait here until the receiver is no more.
2334                  */
2335                 drbd_thread_stop(&connection->receiver);
2336
2337                 /* Race breaker.  This additional state change request may be
2338                  * necessary, if this was a forced disconnect during a receiver
2339                  * restart.  We may have "killed" the receiver thread just
2340                  * after drbdd_init() returned.  Typically, we should be
2341                  * C_STANDALONE already, now, and this becomes a no-op.
2342                  */
2343                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2344                                 CS_VERBOSE | CS_HARD);
2345                 if (rv2 < SS_SUCCESS)
2346                         drbd_err(connection,
2347                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2348                                 rv2);
2349         }
2350         return rv;
2351 }
2352
2353 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2354 {
2355         struct disconnect_parms parms;
2356         struct drbd_connection *connection;
2357         enum drbd_state_rv rv;
2358         enum drbd_ret_code retcode;
2359         int err;
2360
2361         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
2362         if (!adm_ctx.reply_skb)
2363                 return retcode;
2364         if (retcode != NO_ERROR)
2365                 goto fail;
2366
2367         connection = adm_ctx.connection;
2368         memset(&parms, 0, sizeof(parms));
2369         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2370                 err = disconnect_parms_from_attrs(&parms, info);
2371                 if (err) {
2372                         retcode = ERR_MANDATORY_TAG;
2373                         drbd_msg_put_info(from_attrs_err_to_txt(err));
2374                         goto fail;
2375                 }
2376         }
2377
2378         rv = conn_try_disconnect(connection, parms.force_disconnect);
2379         if (rv < SS_SUCCESS)
2380                 retcode = rv;  /* FIXME: Type mismatch. */
2381         else
2382                 retcode = NO_ERROR;
2383  fail:
2384         drbd_adm_finish(info, retcode);
2385         return 0;
2386 }
2387
2388 void resync_after_online_grow(struct drbd_device *device)
2389 {
2390         int iass; /* I am sync source */
2391
2392         drbd_info(device, "Resync of new storage after online grow\n");
2393         if (device->state.role != device->state.peer)
2394                 iass = (device->state.role == R_PRIMARY);
2395         else
2396                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2397
2398         if (iass)
2399                 drbd_start_resync(device, C_SYNC_SOURCE);
2400         else
2401                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2402 }
2403
2404 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2405 {
2406         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2407         struct resize_parms rs;
2408         struct drbd_device *device;
2409         enum drbd_ret_code retcode;
2410         enum determine_dev_size dd;
2411         bool change_al_layout = false;
2412         enum dds_flags ddsf;
2413         sector_t u_size;
2414         int err;
2415
2416         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2417         if (!adm_ctx.reply_skb)
2418                 return retcode;
2419         if (retcode != NO_ERROR)
2420                 goto fail;
2421
2422         device = adm_ctx.device;
2423         if (!get_ldev(device)) {
2424                 retcode = ERR_NO_DISK;
2425                 goto fail;
2426         }
2427
2428         memset(&rs, 0, sizeof(struct resize_parms));
2429         rs.al_stripes = device->ldev->md.al_stripes;
2430         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2431         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2432                 err = resize_parms_from_attrs(&rs, info);
2433                 if (err) {
2434                         retcode = ERR_MANDATORY_TAG;
2435                         drbd_msg_put_info(from_attrs_err_to_txt(err));
2436                         goto fail_ldev;
2437                 }
2438         }
2439
2440         if (device->state.conn > C_CONNECTED) {
2441                 retcode = ERR_RESIZE_RESYNC;
2442                 goto fail_ldev;
2443         }
2444
2445         if (device->state.role == R_SECONDARY &&
2446             device->state.peer == R_SECONDARY) {
2447                 retcode = ERR_NO_PRIMARY;
2448                 goto fail_ldev;
2449         }
2450
2451         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2452                 retcode = ERR_NEED_APV_93;
2453                 goto fail_ldev;
2454         }
2455
2456         rcu_read_lock();
2457         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2458         rcu_read_unlock();
2459         if (u_size != (sector_t)rs.resize_size) {
2460                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2461                 if (!new_disk_conf) {
2462                         retcode = ERR_NOMEM;
2463                         goto fail_ldev;
2464                 }
2465         }
2466
2467         if (device->ldev->md.al_stripes != rs.al_stripes ||
2468             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2469                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2470
2471                 if (al_size_k > (16 * 1024 * 1024)) {
2472                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2473                         goto fail_ldev;
2474                 }
2475
2476                 if (al_size_k < MD_32kB_SECT/2) {
2477                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2478                         goto fail_ldev;
2479                 }
2480
2481                 if (device->state.conn != C_CONNECTED) {
2482                         retcode = ERR_MD_LAYOUT_CONNECTED;
2483                         goto fail_ldev;
2484                 }
2485
2486                 change_al_layout = true;
2487         }
2488
2489         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2490                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2491
2492         if (new_disk_conf) {
2493                 mutex_lock(&first_peer_device(device)->connection->conf_update);
2494                 old_disk_conf = device->ldev->disk_conf;
2495                 *new_disk_conf = *old_disk_conf;
2496                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2497                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2498                 mutex_unlock(&first_peer_device(device)->connection->conf_update);
2499                 synchronize_rcu();
2500                 kfree(old_disk_conf);
2501         }
2502
2503         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2504         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2505         drbd_md_sync(device);
2506         put_ldev(device);
2507         if (dd == DS_ERROR) {
2508                 retcode = ERR_NOMEM_BITMAP;
2509                 goto fail;
2510         } else if (dd == DS_ERROR_SPACE_MD) {
2511                 retcode = ERR_MD_LAYOUT_NO_FIT;
2512                 goto fail;
2513         } else if (dd == DS_ERROR_SHRINK) {
2514                 retcode = ERR_IMPLICIT_SHRINK;
2515                 goto fail;
2516         }
2517
2518         if (device->state.conn == C_CONNECTED) {
2519                 if (dd == DS_GREW)
2520                         set_bit(RESIZE_PENDING, &device->flags);
2521
2522                 drbd_send_uuids(device);
2523                 drbd_send_sizes(device, 1, ddsf);
2524         }
2525
2526  fail:
2527         drbd_adm_finish(info, retcode);
2528         return 0;
2529
2530  fail_ldev:
2531         put_ldev(device);
2532         goto fail;
2533 }
2534
2535 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2536 {
2537         enum drbd_ret_code retcode;
2538         struct res_opts res_opts;
2539         int err;
2540
2541         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2542         if (!adm_ctx.reply_skb)
2543                 return retcode;
2544         if (retcode != NO_ERROR)
2545                 goto fail;
2546
2547         res_opts = adm_ctx.resource->res_opts;
2548         if (should_set_defaults(info))
2549                 set_res_opts_defaults(&res_opts);
2550
2551         err = res_opts_from_attrs(&res_opts, info);
2552         if (err && err != -ENOMSG) {
2553                 retcode = ERR_MANDATORY_TAG;
2554                 drbd_msg_put_info(from_attrs_err_to_txt(err));
2555                 goto fail;
2556         }
2557
2558         err = set_resource_options(adm_ctx.resource, &res_opts);
2559         if (err) {
2560                 retcode = ERR_INVALID_REQUEST;
2561                 if (err == -ENOMEM)
2562                         retcode = ERR_NOMEM;
2563         }
2564
2565 fail:
2566         drbd_adm_finish(info, retcode);
2567         return 0;
2568 }
2569
2570 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2571 {
2572         struct drbd_device *device;
2573         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2574
2575         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2576         if (!adm_ctx.reply_skb)
2577                 return retcode;
2578         if (retcode != NO_ERROR)
2579                 goto out;
2580
2581         device = adm_ctx.device;
2582
2583         /* If there is still bitmap IO pending, probably because of a previous
2584          * resync just being finished, wait for it before requesting a new resync.
2585          * Also wait for it's after_state_ch(). */
2586         drbd_suspend_io(device);
2587         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2588         drbd_flush_workqueue(device);
2589
2590         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2591          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2592          * try to start a resync handshake as sync target for full sync.
2593          */
2594         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2595                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2596                 if (retcode >= SS_SUCCESS) {
2597                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2598                                 "set_n_write from invalidate", BM_LOCKED_MASK))
2599                                 retcode = ERR_IO_MD_DISK;
2600                 }
2601         } else
2602                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2603         drbd_resume_io(device);
2604
2605 out:
2606         drbd_adm_finish(info, retcode);
2607         return 0;
2608 }
2609
2610 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2611                 union drbd_state mask, union drbd_state val)
2612 {
2613         enum drbd_ret_code retcode;
2614
2615         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2616         if (!adm_ctx.reply_skb)
2617                 return retcode;
2618         if (retcode != NO_ERROR)
2619                 goto out;
2620
2621         retcode = drbd_request_state(adm_ctx.device, mask, val);
2622 out:
2623         drbd_adm_finish(info, retcode);
2624         return 0;
2625 }
2626
2627 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2628 {
2629         int rv;
2630
2631         rv = drbd_bmio_set_n_write(device);
2632         drbd_suspend_al(device);
2633         return rv;
2634 }
2635
2636 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2637 {
2638         int retcode; /* drbd_ret_code, drbd_state_rv */
2639         struct drbd_device *device;
2640
2641         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2642         if (!adm_ctx.reply_skb)
2643                 return retcode;
2644         if (retcode != NO_ERROR)
2645                 goto out;
2646
2647         device = adm_ctx.device;
2648
2649         /* If there is still bitmap IO pending, probably because of a previous
2650          * resync just being finished, wait for it before requesting a new resync.
2651          * Also wait for it's after_state_ch(). */
2652         drbd_suspend_io(device);
2653         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2654         drbd_flush_workqueue(device);
2655
2656         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2657          * in the bitmap.  Otherwise, try to start a resync handshake
2658          * as sync source for full sync.
2659          */
2660         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2661                 /* The peer will get a resync upon connect anyways. Just make that
2662                    into a full resync. */
2663                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2664                 if (retcode >= SS_SUCCESS) {
2665                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2666                                 "set_n_write from invalidate_peer",
2667                                 BM_LOCKED_SET_ALLOWED))
2668                                 retcode = ERR_IO_MD_DISK;
2669                 }
2670         } else
2671                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2672         drbd_resume_io(device);
2673
2674 out:
2675         drbd_adm_finish(info, retcode);
2676         return 0;
2677 }
2678
2679 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2680 {
2681         enum drbd_ret_code retcode;
2682
2683         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2684         if (!adm_ctx.reply_skb)
2685                 return retcode;
2686         if (retcode != NO_ERROR)
2687                 goto out;
2688
2689         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2690                 retcode = ERR_PAUSE_IS_SET;
2691 out:
2692         drbd_adm_finish(info, retcode);
2693         return 0;
2694 }
2695
2696 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2697 {
2698         union drbd_dev_state s;
2699         enum drbd_ret_code retcode;
2700
2701         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2702         if (!adm_ctx.reply_skb)
2703                 return retcode;
2704         if (retcode != NO_ERROR)
2705                 goto out;
2706
2707         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2708                 s = adm_ctx.device->state;
2709                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2710                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2711                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2712                 } else {
2713                         retcode = ERR_PAUSE_IS_CLEAR;
2714                 }
2715         }
2716
2717 out:
2718         drbd_adm_finish(info, retcode);
2719         return 0;
2720 }
2721
2722 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2723 {
2724         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2725 }
2726
2727 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2728 {
2729         struct drbd_device *device;
2730         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2731
2732         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2733         if (!adm_ctx.reply_skb)
2734                 return retcode;
2735         if (retcode != NO_ERROR)
2736                 goto out;
2737
2738         device = adm_ctx.device;
2739         if (test_bit(NEW_CUR_UUID, &device->flags)) {
2740                 drbd_uuid_new_current(device);
2741                 clear_bit(NEW_CUR_UUID, &device->flags);
2742         }
2743         drbd_suspend_io(device);
2744         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2745         if (retcode == SS_SUCCESS) {
2746                 if (device->state.conn < C_CONNECTED)
2747                         tl_clear(first_peer_device(device)->connection);
2748                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2749                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2750         }
2751         drbd_resume_io(device);
2752
2753 out:
2754         drbd_adm_finish(info, retcode);
2755         return 0;
2756 }
2757
2758 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2759 {
2760         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2761 }
2762
2763 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2764                                     struct drbd_resource *resource,
2765                                     struct drbd_connection *connection,
2766                                     struct drbd_device *device)
2767 {
2768         struct nlattr *nla;
2769         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2770         if (!nla)
2771                 goto nla_put_failure;
2772         if (device &&
2773             nla_put_u32(skb, T_ctx_volume, device->vnr))
2774                 goto nla_put_failure;
2775         if (nla_put_string(skb, T_ctx_resource_name, connection->resource->name))
2776                 goto nla_put_failure;
2777         if (connection) {
2778                 if (connection->my_addr_len &&
2779                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2780                         goto nla_put_failure;
2781                 if (connection->peer_addr_len &&
2782                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2783                         goto nla_put_failure;
2784         }
2785         nla_nest_end(skb, nla);
2786         return 0;
2787
2788 nla_put_failure:
2789         if (nla)
2790                 nla_nest_cancel(skb, nla);
2791         return -EMSGSIZE;
2792 }
2793
2794 /*
2795  * Return the connection of @resource if @resource has exactly one connection.
2796  */
2797 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2798 {
2799         struct list_head *connections = &resource->connections;
2800
2801         if (list_empty(connections) || connections->next->next != connections)
2802                 return NULL;
2803         return list_first_entry(&resource->connections, struct drbd_connection, connections);
2804 }
2805
2806 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2807                 const struct sib_info *sib)
2808 {
2809         struct drbd_resource *resource = device->resource;
2810         struct state_info *si = NULL; /* for sizeof(si->member); */
2811         struct nlattr *nla;
2812         int got_ldev;
2813         int err = 0;
2814         int exclude_sensitive;
2815
2816         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2817          * to.  So we better exclude_sensitive information.
2818          *
2819          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2820          * in the context of the requesting user process. Exclude sensitive
2821          * information, unless current has superuser.
2822          *
2823          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2824          * relies on the current implementation of netlink_dump(), which
2825          * executes the dump callback successively from netlink_recvmsg(),
2826          * always in the context of the receiving process */
2827         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2828
2829         got_ldev = get_ldev(device);
2830
2831         /* We need to add connection name and volume number information still.
2832          * Minor number is in drbd_genlmsghdr. */
2833         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2834                 goto nla_put_failure;
2835
2836         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2837                 goto nla_put_failure;
2838
2839         rcu_read_lock();
2840         if (got_ldev) {
2841                 struct disk_conf *disk_conf;
2842
2843                 disk_conf = rcu_dereference(device->ldev->disk_conf);
2844                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2845         }
2846         if (!err) {
2847                 struct net_conf *nc;
2848
2849                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2850                 if (nc)
2851                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
2852         }
2853         rcu_read_unlock();
2854         if (err)
2855                 goto nla_put_failure;
2856
2857         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2858         if (!nla)
2859                 goto nla_put_failure;
2860         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2861             nla_put_u32(skb, T_current_state, device->state.i) ||
2862             nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2863             nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2864             nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2865             nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2866             nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2867             nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2868             nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2869             nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2870             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2871             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2872             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2873                 goto nla_put_failure;
2874
2875         if (got_ldev) {
2876                 int err;
2877
2878                 spin_lock_irq(&device->ldev->md.uuid_lock);
2879                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2880                 spin_unlock_irq(&device->ldev->md.uuid_lock);
2881
2882                 if (err)
2883                         goto nla_put_failure;
2884
2885                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2886                     nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2887                     nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2888                         goto nla_put_failure;
2889                 if (C_SYNC_SOURCE <= device->state.conn &&
2890                     C_PAUSED_SYNC_T >= device->state.conn) {
2891                         if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2892                             nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2893                                 goto nla_put_failure;
2894                 }
2895         }
2896
2897         if (sib) {
2898                 switch(sib->sib_reason) {
2899                 case SIB_SYNC_PROGRESS:
2900                 case SIB_GET_STATUS_REPLY:
2901                         break;
2902                 case SIB_STATE_CHANGE:
2903                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2904                             nla_put_u32(skb, T_new_state, sib->ns.i))
2905                                 goto nla_put_failure;
2906                         break;
2907                 case SIB_HELPER_POST:
2908                         if (nla_put_u32(skb, T_helper_exit_code,
2909                                         sib->helper_exit_code))
2910                                 goto nla_put_failure;
2911                         /* fall through */
2912                 case SIB_HELPER_PRE:
2913                         if (nla_put_string(skb, T_helper, sib->helper_name))
2914                                 goto nla_put_failure;
2915                         break;
2916                 }
2917         }
2918         nla_nest_end(skb, nla);
2919
2920         if (0)
2921 nla_put_failure:
2922                 err = -EMSGSIZE;
2923         if (got_ldev)
2924                 put_ldev(device);
2925         return err;
2926 }
2927
2928 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
2929 {
2930         enum drbd_ret_code retcode;
2931         int err;
2932
2933         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2934         if (!adm_ctx.reply_skb)
2935                 return retcode;
2936         if (retcode != NO_ERROR)
2937                 goto out;
2938
2939         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
2940         if (err) {
2941                 nlmsg_free(adm_ctx.reply_skb);
2942                 return err;
2943         }
2944 out:
2945         drbd_adm_finish(info, retcode);
2946         return 0;
2947 }
2948
2949 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
2950 {
2951         struct drbd_device *device;
2952         struct drbd_genlmsghdr *dh;
2953         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
2954         struct drbd_resource *resource = NULL;
2955         struct drbd_resource *tmp;
2956         unsigned volume = cb->args[1];
2957
2958         /* Open coded, deferred, iteration:
2959          * for_each_resource_safe(resource, tmp, &drbd_resources) {
2960          *      connection = "first connection of resource or undefined";
2961          *      idr_for_each_entry(&resource->devices, device, i) {
2962          *        ...
2963          *      }
2964          * }
2965          * where resource is cb->args[0];
2966          * and i is cb->args[1];
2967          *
2968          * cb->args[2] indicates if we shall loop over all resources,
2969          * or just dump all volumes of a single resource.
2970          *
2971          * This may miss entries inserted after this dump started,
2972          * or entries deleted before they are reached.
2973          *
2974          * We need to make sure the device won't disappear while
2975          * we are looking at it, and revalidate our iterators
2976          * on each iteration.
2977          */
2978
2979         /* synchronize with conn_create()/drbd_destroy_connection() */
2980         rcu_read_lock();
2981         /* revalidate iterator position */
2982         for_each_resource_rcu(tmp, &drbd_resources) {
2983                 if (pos == NULL) {
2984                         /* first iteration */
2985                         pos = tmp;
2986                         resource = pos;
2987                         break;
2988                 }
2989                 if (tmp == pos) {
2990                         resource = pos;
2991                         break;
2992                 }
2993         }
2994         if (resource) {
2995 next_resource:
2996                 device = idr_get_next(&resource->devices, &volume);
2997                 if (!device) {
2998                         /* No more volumes to dump on this resource.
2999                          * Advance resource iterator. */
3000                         pos = list_entry_rcu(resource->resources.next,
3001                                              struct drbd_resource, resources);
3002                         /* Did we dump any volume of this resource yet? */
3003                         if (volume != 0) {
3004                                 /* If we reached the end of the list,
3005                                  * or only a single resource dump was requested,
3006                                  * we are done. */
3007                                 if (&pos->resources == &drbd_resources || cb->args[2])
3008                                         goto out;
3009                                 volume = 0;
3010                                 resource = pos;
3011                                 goto next_resource;
3012                         }
3013                 }
3014
3015                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3016                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3017                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3018                 if (!dh)
3019                         goto out;
3020
3021                 if (!device) {
3022                         /* This is a connection without a single volume.
3023                          * Suprisingly enough, it may have a network
3024                          * configuration. */
3025                         struct drbd_connection *connection;
3026
3027                         dh->minor = -1U;
3028                         dh->ret_code = NO_ERROR;
3029                         connection = the_only_connection(resource);
3030                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3031                                 goto cancel;
3032                         if (connection) {
3033                                 struct net_conf *nc;
3034
3035                                 nc = rcu_dereference(connection->net_conf);
3036                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3037                                         goto cancel;
3038                         }
3039                         goto done;
3040                 }
3041
3042                 D_ASSERT(device, device->vnr == volume);
3043                 D_ASSERT(device, device->resource == resource);
3044
3045                 dh->minor = device_to_minor(device);
3046                 dh->ret_code = NO_ERROR;
3047
3048                 if (nla_put_status_info(skb, device, NULL)) {
3049 cancel:
3050                         genlmsg_cancel(skb, dh);
3051                         goto out;
3052                 }
3053 done:
3054                 genlmsg_end(skb, dh);
3055         }
3056
3057 out:
3058         rcu_read_unlock();
3059         /* where to start the next iteration */
3060         cb->args[0] = (long)pos;
3061         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3062
3063         /* No more resources/volumes/minors found results in an empty skb.
3064          * Which will terminate the dump. */
3065         return skb->len;
3066 }
3067
3068 /*
3069  * Request status of all resources, or of all volumes within a single resource.
3070  *
3071  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3072  * Which means we cannot use the family->attrbuf or other such members, because
3073  * dump is NOT protected by the genl_lock().  During dump, we only have access
3074  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3075  *
3076  * Once things are setup properly, we call into get_one_status().
3077  */
3078 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3079 {
3080         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3081         struct nlattr *nla;
3082         const char *resource_name;
3083         struct drbd_resource *resource;
3084         int maxtype;
3085
3086         /* Is this a followup call? */
3087         if (cb->args[0]) {
3088                 /* ... of a single resource dump,
3089                  * and the resource iterator has been advanced already? */
3090                 if (cb->args[2] && cb->args[2] != cb->args[0])
3091                         return 0; /* DONE. */
3092                 goto dump;
3093         }
3094
3095         /* First call (from netlink_dump_start).  We need to figure out
3096          * which resource(s) the user wants us to dump. */
3097         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3098                         nlmsg_attrlen(cb->nlh, hdrlen),
3099                         DRBD_NLA_CFG_CONTEXT);
3100
3101         /* No explicit context given.  Dump all. */
3102         if (!nla)
3103                 goto dump;
3104         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3105         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3106         if (IS_ERR(nla))
3107                 return PTR_ERR(nla);
3108         /* context given, but no name present? */
3109         if (!nla)
3110                 return -EINVAL;
3111         resource_name = nla_data(nla);
3112         if (!*resource_name)
3113                 return -ENODEV;
3114         resource = drbd_find_resource(resource_name);
3115         if (!resource)
3116                 return -ENODEV;
3117
3118         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3119
3120         /* prime iterators, and set "filter" mode mark:
3121          * only dump this connection. */
3122         cb->args[0] = (long)resource;
3123         /* cb->args[1] = 0; passed in this way. */
3124         cb->args[2] = (long)resource;
3125
3126 dump:
3127         return get_one_status(skb, cb);
3128 }
3129
3130 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3131 {
3132         enum drbd_ret_code retcode;
3133         struct timeout_parms tp;
3134         int err;
3135
3136         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3137         if (!adm_ctx.reply_skb)
3138                 return retcode;
3139         if (retcode != NO_ERROR)
3140                 goto out;
3141
3142         tp.timeout_type =
3143                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3144                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3145                 UT_DEFAULT;
3146
3147         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3148         if (err) {
3149                 nlmsg_free(adm_ctx.reply_skb);
3150                 return err;
3151         }
3152 out:
3153         drbd_adm_finish(info, retcode);
3154         return 0;
3155 }
3156
3157 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3158 {
3159         struct drbd_device *device;
3160         enum drbd_ret_code retcode;
3161         struct start_ov_parms parms;
3162
3163         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3164         if (!adm_ctx.reply_skb)
3165                 return retcode;
3166         if (retcode != NO_ERROR)
3167                 goto out;
3168
3169         device = adm_ctx.device;
3170
3171         /* resume from last known position, if possible */
3172         parms.ov_start_sector = device->ov_start_sector;
3173         parms.ov_stop_sector = ULLONG_MAX;
3174         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3175                 int err = start_ov_parms_from_attrs(&parms, info);
3176                 if (err) {
3177                         retcode = ERR_MANDATORY_TAG;
3178                         drbd_msg_put_info(from_attrs_err_to_txt(err));
3179                         goto out;
3180                 }
3181         }
3182         /* w_make_ov_request expects position to be aligned */
3183         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3184         device->ov_stop_sector = parms.ov_stop_sector;
3185
3186         /* If there is still bitmap IO pending, e.g. previous resync or verify
3187          * just being finished, wait for it before requesting a new resync. */
3188         drbd_suspend_io(device);
3189         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3190         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3191         drbd_resume_io(device);
3192 out:
3193         drbd_adm_finish(info, retcode);
3194         return 0;
3195 }
3196
3197
3198 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3199 {
3200         struct drbd_device *device;
3201         enum drbd_ret_code retcode;
3202         int skip_initial_sync = 0;
3203         int err;
3204         struct new_c_uuid_parms args;
3205
3206         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3207         if (!adm_ctx.reply_skb)
3208                 return retcode;
3209         if (retcode != NO_ERROR)
3210                 goto out_nolock;
3211
3212         device = adm_ctx.device;
3213         memset(&args, 0, sizeof(args));
3214         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3215                 err = new_c_uuid_parms_from_attrs(&args, info);
3216                 if (err) {
3217                         retcode = ERR_MANDATORY_TAG;
3218                         drbd_msg_put_info(from_attrs_err_to_txt(err));
3219                         goto out_nolock;
3220                 }
3221         }
3222
3223         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3224
3225         if (!get_ldev(device)) {
3226                 retcode = ERR_NO_DISK;
3227                 goto out;
3228         }
3229
3230         /* this is "skip initial sync", assume to be clean */
3231         if (device->state.conn == C_CONNECTED &&
3232             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3233             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3234                 drbd_info(device, "Preparing to skip initial sync\n");
3235                 skip_initial_sync = 1;
3236         } else if (device->state.conn != C_STANDALONE) {
3237                 retcode = ERR_CONNECTED;
3238                 goto out_dec;
3239         }
3240
3241         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3242         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3243
3244         if (args.clear_bm) {
3245                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3246                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3247                 if (err) {
3248                         drbd_err(device, "Writing bitmap failed with %d\n", err);
3249                         retcode = ERR_IO_MD_DISK;
3250                 }
3251                 if (skip_initial_sync) {
3252                         drbd_send_uuids_skip_initial_sync(device);
3253                         _drbd_uuid_set(device, UI_BITMAP, 0);
3254                         drbd_print_uuids(device, "cleared bitmap UUID");
3255                         spin_lock_irq(&first_peer_device(device)->connection->req_lock);
3256                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3257                                         CS_VERBOSE, NULL);
3258                         spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
3259                 }
3260         }
3261
3262         drbd_md_sync(device);
3263 out_dec:
3264         put_ldev(device);
3265 out:
3266         mutex_unlock(device->state_mutex);
3267 out_nolock:
3268         drbd_adm_finish(info, retcode);
3269         return 0;
3270 }
3271
3272 static enum drbd_ret_code
3273 drbd_check_resource_name(const char *name)
3274 {
3275         if (!name || !name[0]) {
3276                 drbd_msg_put_info("resource name missing");
3277                 return ERR_MANDATORY_TAG;
3278         }
3279         /* if we want to use these in sysfs/configfs/debugfs some day,
3280          * we must not allow slashes */
3281         if (strchr(name, '/')) {
3282                 drbd_msg_put_info("invalid resource name");
3283                 return ERR_INVALID_REQUEST;
3284         }
3285         return NO_ERROR;
3286 }
3287
3288 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3289 {
3290         enum drbd_ret_code retcode;
3291         struct res_opts res_opts;
3292         int err;
3293
3294         retcode = drbd_adm_prepare(skb, info, 0);
3295         if (!adm_ctx.reply_skb)
3296                 return retcode;
3297         if (retcode != NO_ERROR)
3298                 goto out;
3299
3300         set_res_opts_defaults(&res_opts);
3301         err = res_opts_from_attrs(&res_opts, info);
3302         if (err && err != -ENOMSG) {
3303                 retcode = ERR_MANDATORY_TAG;
3304                 drbd_msg_put_info(from_attrs_err_to_txt(err));
3305                 goto out;
3306         }
3307
3308         retcode = drbd_check_resource_name(adm_ctx.resource_name);
3309         if (retcode != NO_ERROR)
3310                 goto out;
3311
3312         if (adm_ctx.resource) {
3313                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3314                         retcode = ERR_INVALID_REQUEST;
3315                         drbd_msg_put_info("resource exists");
3316                 }
3317                 /* else: still NO_ERROR */
3318                 goto out;
3319         }
3320
3321         if (!conn_create(adm_ctx.resource_name, &res_opts))
3322                 retcode = ERR_NOMEM;
3323 out:
3324         drbd_adm_finish(info, retcode);
3325         return 0;
3326 }
3327
3328 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3329 {
3330         struct drbd_genlmsghdr *dh = info->userhdr;
3331         enum drbd_ret_code retcode;
3332
3333         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3334         if (!adm_ctx.reply_skb)
3335                 return retcode;
3336         if (retcode != NO_ERROR)
3337                 goto out;
3338
3339         if (dh->minor > MINORMASK) {
3340                 drbd_msg_put_info("requested minor out of range");
3341                 retcode = ERR_INVALID_REQUEST;
3342                 goto out;
3343         }
3344         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3345                 drbd_msg_put_info("requested volume id out of range");
3346                 retcode = ERR_INVALID_REQUEST;
3347                 goto out;
3348         }
3349
3350         /* drbd_adm_prepare made sure already
3351          * that first_peer_device(device)->connection and device->vnr match the request. */
3352         if (adm_ctx.device) {
3353                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3354                         retcode = ERR_MINOR_EXISTS;
3355                 /* else: still NO_ERROR */
3356                 goto out;
3357         }
3358
3359         retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume);
3360 out:
3361         drbd_adm_finish(info, retcode);
3362         return 0;
3363 }
3364
3365 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3366 {
3367         if (device->state.disk == D_DISKLESS &&
3368             /* no need to be device->state.conn == C_STANDALONE &&
3369              * we may want to delete a minor from a live replication group.
3370              */
3371             device->state.role == R_SECONDARY) {
3372                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3373                                     CS_VERBOSE + CS_WAIT_COMPLETE);
3374                 drbd_delete_device(device);
3375                 return NO_ERROR;
3376         } else
3377                 return ERR_MINOR_CONFIGURED;
3378 }
3379
3380 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3381 {
3382         enum drbd_ret_code retcode;
3383
3384         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3385         if (!adm_ctx.reply_skb)
3386                 return retcode;
3387         if (retcode != NO_ERROR)
3388                 goto out;
3389
3390         retcode = adm_del_minor(adm_ctx.device);
3391 out:
3392         drbd_adm_finish(info, retcode);
3393         return 0;
3394 }
3395
3396 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3397 {
3398         struct drbd_resource *resource;
3399         struct drbd_connection *connection;
3400         struct drbd_device *device;
3401         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3402         unsigned i;
3403
3404         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3405         if (!adm_ctx.reply_skb)
3406                 return retcode;
3407         if (retcode != NO_ERROR)
3408                 goto out;
3409
3410         resource = adm_ctx.resource;
3411         /* demote */
3412         for_each_connection(connection, resource) {
3413                 struct drbd_peer_device *peer_device;
3414
3415                 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3416                         retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3417                         if (retcode < SS_SUCCESS) {
3418                                 drbd_msg_put_info("failed to demote");
3419                                 goto out;
3420                         }
3421                 }
3422
3423                 retcode = conn_try_disconnect(connection, 0);
3424                 if (retcode < SS_SUCCESS) {
3425                         drbd_msg_put_info("failed to disconnect");
3426                         goto out;
3427                 }
3428         }
3429
3430         /* detach */
3431         idr_for_each_entry(&resource->devices, device, i) {
3432                 retcode = adm_detach(device, 0);
3433                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3434                         drbd_msg_put_info("failed to detach");
3435                         goto out;
3436                 }
3437         }
3438
3439         /* If we reach this, all volumes (of this connection) are Secondary,
3440          * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3441          * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3442         for_each_connection(connection, resource)
3443                 drbd_thread_stop(&connection->worker);
3444
3445         /* Now, nothing can fail anymore */
3446
3447         /* delete volumes */
3448         idr_for_each_entry(&resource->devices, device, i) {
3449                 retcode = adm_del_minor(device);
3450                 if (retcode != NO_ERROR) {
3451                         /* "can not happen" */
3452                         drbd_msg_put_info("failed to delete volume");
3453                         goto out;
3454                 }
3455         }
3456
3457         list_del_rcu(&resource->resources);
3458         synchronize_rcu();
3459         drbd_free_resource(resource);
3460         retcode = NO_ERROR;
3461
3462 out:
3463         drbd_adm_finish(info, retcode);
3464         return 0;
3465 }
3466
3467 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3468 {
3469         struct drbd_resource *resource;
3470         struct drbd_connection *connection;
3471         enum drbd_ret_code retcode;
3472
3473         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3474         if (!adm_ctx.reply_skb)
3475                 return retcode;
3476         if (retcode != NO_ERROR)
3477                 goto out;
3478
3479         resource = adm_ctx.resource;
3480         for_each_connection(connection, resource) {
3481                 if (connection->cstate > C_STANDALONE) {
3482                         retcode = ERR_NET_CONFIGURED;
3483                         goto out;
3484                 }
3485         }
3486         if (!idr_is_empty(&resource->devices)) {
3487                 retcode = ERR_RES_IN_USE;
3488                 goto out;
3489         }
3490
3491         list_del_rcu(&resource->resources);
3492         for_each_connection(connection, resource)
3493                 drbd_thread_stop(&connection->worker);
3494         synchronize_rcu();
3495         drbd_free_resource(resource);
3496         retcode = NO_ERROR;
3497 out:
3498         drbd_adm_finish(info, retcode);
3499         return 0;
3500 }
3501
3502 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3503 {
3504         static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3505         struct sk_buff *msg;
3506         struct drbd_genlmsghdr *d_out;
3507         unsigned seq;
3508         int err = -ENOMEM;
3509
3510         if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3511                 if (time_after(jiffies, device->rs_last_bcast + HZ))
3512                         device->rs_last_bcast = jiffies;
3513                 else
3514                         return;
3515         }
3516
3517         seq = atomic_inc_return(&drbd_genl_seq);
3518         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3519         if (!msg)
3520                 goto failed;
3521
3522         err = -EMSGSIZE;
3523         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3524         if (!d_out) /* cannot happen, but anyways. */
3525                 goto nla_put_failure;
3526         d_out->minor = device_to_minor(device);
3527         d_out->ret_code = NO_ERROR;
3528
3529         if (nla_put_status_info(msg, device, sib))
3530                 goto nla_put_failure;
3531         genlmsg_end(msg, d_out);
3532         err = drbd_genl_multicast_events(msg, 0);
3533         /* msg has been consumed or freed in netlink_broadcast() */
3534         if (err && err != -ESRCH)
3535                 goto failed;
3536
3537         return;
3538
3539 nla_put_failure:
3540         nlmsg_free(msg);
3541 failed:
3542         drbd_err(device, "Error %d while broadcasting event. "
3543                         "Event seq:%u sib_reason:%u\n",
3544                         err, seq, sib->sib_reason);
3545 }