drivers/block/drbd/drbd_nl.c

   1 /*
   2    drbd_nl.c
   3
   4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10    drbd is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    drbd is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with drbd; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24  */
  25
  26 #include <linux/module.h>
  27 #include <linux/drbd.h>
  28 #include <linux/in.h>
  29 #include <linux/fs.h>
  30 #include <linux/file.h>
  31 #include <linux/slab.h>
  32 #include <linux/blkpg.h>
  33 #include <linux/cpumask.h>
  34 #include "drbd_int.h"
  35 #include "drbd_protocol.h"
  36 #include "drbd_req.h"
  37 #include "drbd_wrappers.h"
  38 #include <asm/unaligned.h>
  39 #include <linux/drbd_limits.h>
  40 #include <linux/kthread.h>
  41
  42 #include <net/genetlink.h>
  43
  44 /* .doit */
  45 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
  46 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
  47
  48 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
  49 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
  50
  51 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
  52 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
  53 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
  54
  55 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
  56 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
  57 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
  58 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
  59 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
  60 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
  61 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
  62 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
  63 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
  64 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
  65 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
  66 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
  67 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
  68 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
  69 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
  70 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
  71 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
  72 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
  73 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
  74 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
  75 /* .dumpit */
  76 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
  77
  78 #include <linux/drbd_genl_api.h>
  79 #include "drbd_nla.h"
  80 #include <linux/genl_magic_func.h>
  81
  82 /* used blkdev_get_by_path, to claim our meta data device(s) */
  83 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
  84
  85 /* Configuration is strictly serialized, because generic netlink message
  86  * processing is strictly serialized by the genl_lock().
  87  * Which means we can use one static global drbd_config_context struct.
  88  */
  89 static struct drbd_config_context {
  90         /* assigned from drbd_genlmsghdr */
  91         unsigned int minor;
  92         /* assigned from request attributes, if present */
  93         unsigned int volume;
  94 #define VOLUME_UNSPECIFIED              (-1U)
  95         /* pointer into the request skb,
  96          * limited lifetime! */
  97         char *resource_name;
  98         struct nlattr *my_addr;
  99         struct nlattr *peer_addr;
 100
 101         /* reply buffer */
 102         struct sk_buff *reply_skb;
 103         /* pointer into reply buffer */
 104         struct drbd_genlmsghdr *reply_dh;
 105         /* resolved from attributes, if possible */
 106         struct drbd_device *device;
 107         struct drbd_resource *resource;
 108         struct drbd_connection *connection;
 109 } adm_ctx;
 110
 111 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 112 {
 113         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
 114         if (genlmsg_reply(skb, info))
 115                 printk(KERN_ERR "drbd: error sending genl reply\n");
 116 }
 117
 118 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
 119  * reason it could fail was no space in skb, and there are 4k available. */
 120 int drbd_msg_put_info(const char *info)
 121 {
 122         struct sk_buff *skb = adm_ctx.reply_skb;
 123         struct nlattr *nla;
 124         int err = -EMSGSIZE;
 125
 126         if (!info || !info[0])
 127                 return 0;
 128
 129         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
 130         if (!nla)
 131                 return err;
 132
 133         err = nla_put_string(skb, T_info_text, info);
 134         if (err) {
 135                 nla_nest_cancel(skb, nla);
 136                 return err;
 137         } else
 138                 nla_nest_end(skb, nla);
 139         return 0;
 140 }
 141
 142 /* This would be a good candidate for a "pre_doit" hook,
 143  * and per-family private info->pointers.
 144  * But we need to stay compatible with older kernels.
 145  * If it returns successfully, adm_ctx members are valid.
 146  */
 147 #define DRBD_ADM_NEED_MINOR     1
 148 #define DRBD_ADM_NEED_RESOURCE  2
 149 #define DRBD_ADM_NEED_CONNECTION 4
 150 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 151                 unsigned flags)
 152 {
 153         struct drbd_genlmsghdr *d_in = info->userhdr;
 154         const u8 cmd = info->genlhdr->cmd;
 155         int err;
 156
 157         memset(&adm_ctx, 0, sizeof(adm_ctx));
 158
 159         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 160         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 161                return -EPERM;
 162
 163         adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 164         if (!adm_ctx.reply_skb) {
 165                 err = -ENOMEM;
 166                 goto fail;
 167         }
 168
 169         adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
 170                                         info, &drbd_genl_family, 0, cmd);
 171         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
 172          * but anyways */
 173         if (!adm_ctx.reply_dh) {
 174                 err = -ENOMEM;
 175                 goto fail;
 176         }
 177
 178         adm_ctx.reply_dh->minor = d_in->minor;
 179         adm_ctx.reply_dh->ret_code = NO_ERROR;
 180
 181         adm_ctx.volume = VOLUME_UNSPECIFIED;
 182         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 183                 struct nlattr *nla;
 184                 /* parse and validate only */
 185                 err = drbd_cfg_context_from_attrs(NULL, info);
 186                 if (err)
 187                         goto fail;
 188
 189                 /* It was present, and valid,
 190                  * copy it over to the reply skb. */
 191                 err = nla_put_nohdr(adm_ctx.reply_skb,
 192                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 193                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
 194                 if (err)
 195                         goto fail;
 196
 197                 /* and assign stuff to the global adm_ctx */
 198                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 199                 if (nla)
 200                         adm_ctx.volume = nla_get_u32(nla);
 201                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 202                 if (nla)
 203                         adm_ctx.resource_name = nla_data(nla);
 204                 adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
 205                 adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
 206                 if ((adm_ctx.my_addr &&
 207                      nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) ||
 208                     (adm_ctx.peer_addr &&
 209                      nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) {
 210                         err = -EINVAL;
 211                         goto fail;
 212                 }
 213         }
 214
 215         adm_ctx.minor = d_in->minor;
 216         adm_ctx.device = minor_to_device(d_in->minor);
 217         if (adm_ctx.resource_name) {
 218                 adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name);
 219                 if (adm_ctx.resource) {
 220                         adm_ctx.connection = first_connection(adm_ctx.resource);
 221                         kref_get(&adm_ctx.connection->kref);
 222                 }
 223         }
 224
 225         if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) {
 226                 drbd_msg_put_info("unknown minor");
 227                 return ERR_MINOR_INVALID;
 228         }
 229         if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
 230                 drbd_msg_put_info("unknown resource");
 231                 if (adm_ctx.resource_name)
 232                         return ERR_RES_NOT_KNOWN;
 233                 return ERR_INVALID_REQUEST;
 234         }
 235
 236         if (flags & DRBD_ADM_NEED_CONNECTION) {
 237                 if (adm_ctx.connection && !(flags & DRBD_ADM_NEED_RESOURCE)) {
 238                         drbd_msg_put_info("no resource name expected");
 239                         return ERR_INVALID_REQUEST;
 240                 }
 241                 if (adm_ctx.device) {
 242                         drbd_msg_put_info("no minor number expected");
 243                         return ERR_INVALID_REQUEST;
 244                 }
 245                 if (adm_ctx.my_addr && adm_ctx.peer_addr)
 246                         adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
 247                                                           nla_len(adm_ctx.my_addr),
 248                                                           nla_data(adm_ctx.peer_addr),
 249                                                           nla_len(adm_ctx.peer_addr));
 250                 if (!adm_ctx.connection) {
 251                         drbd_msg_put_info("unknown connection");
 252                         return ERR_INVALID_REQUEST;
 253                 }
 254         }
 255
 256         /* some more paranoia, if the request was over-determined */
 257         if (adm_ctx.device && adm_ctx.resource &&
 258             adm_ctx.device->resource != adm_ctx.resource) {
 259                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
 260                                 adm_ctx.minor, adm_ctx.resource->name,
 261                                 adm_ctx.device->resource->name);
 262                 drbd_msg_put_info("minor exists in different resource");
 263                 return ERR_INVALID_REQUEST;
 264         }
 265         if (adm_ctx.device &&
 266             adm_ctx.volume != VOLUME_UNSPECIFIED &&
 267             adm_ctx.volume != adm_ctx.device->vnr) {
 268                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
 269                                 adm_ctx.minor, adm_ctx.volume,
 270                                 adm_ctx.device->vnr,
 271                                 adm_ctx.device->resource->name);
 272                 drbd_msg_put_info("minor exists as different volume");
 273                 return ERR_INVALID_REQUEST;
 274         }
 275
 276         return NO_ERROR;
 277
 278 fail:
 279         nlmsg_free(adm_ctx.reply_skb);
 280         adm_ctx.reply_skb = NULL;
 281         return err;
 282 }
 283
 284 static int drbd_adm_finish(struct genl_info *info, int retcode)
 285 {
 286         if (adm_ctx.connection) {
 287                 kref_put(&adm_ctx.connection->kref, drbd_destroy_connection);
 288                 adm_ctx.connection = NULL;
 289         }
 290         if (adm_ctx.resource) {
 291                 kref_put(&adm_ctx.resource->kref, drbd_destroy_resource);
 292                 adm_ctx.resource = NULL;
 293         }
 294
 295         if (!adm_ctx.reply_skb)
 296                 return -ENOMEM;
 297
 298         adm_ctx.reply_dh->ret_code = retcode;
 299         drbd_adm_send_reply(adm_ctx.reply_skb, info);
 300         return 0;
 301 }
 302
 303 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
 304 {
 305         char *afs;
 306
 307         /* FIXME: A future version will not allow this case. */
 308         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
 309                 return;
 310
 311         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
 312         case AF_INET6:
 313                 afs = "ipv6";
 314                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
 315                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
 316                 break;
 317         case AF_INET:
 318                 afs = "ipv4";
 319                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 320                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 321                 break;
 322         default:
 323                 afs = "ssocks";
 324                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 325                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 326         }
 327         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 328 }
 329
 330 int drbd_khelper(struct drbd_device *device, char *cmd)
 331 {
 332         char *envp[] = { "HOME=/",
 333                         "TERM=linux",
 334                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 335                          (char[20]) { }, /* address family */
 336                          (char[60]) { }, /* address */
 337                         NULL };
 338         char mb[12];
 339         char *argv[] = {usermode_helper, cmd, mb, NULL };
 340         struct drbd_connection *connection = first_peer_device(device)->connection;
 341         struct sib_info sib;
 342         int ret;
 343
 344         if (current == connection->worker.task)
 345                 set_bit(CALLBACK_PENDING, &connection->flags);
 346
 347         snprintf(mb, 12, "minor-%d", device_to_minor(device));
 348         setup_khelper_env(connection, envp);
 349
 350         /* The helper may take some time.
 351          * write out any unsynced meta data changes now */
 352         drbd_md_sync(device);
 353
 354         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 355         sib.sib_reason = SIB_HELPER_PRE;
 356         sib.helper_name = cmd;
 357         drbd_bcast_event(device, &sib);
 358         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 359         if (ret)
 360                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 361                                 usermode_helper, cmd, mb,
 362                                 (ret >> 8) & 0xff, ret);
 363         else
 364                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 365                                 usermode_helper, cmd, mb,
 366                                 (ret >> 8) & 0xff, ret);
 367         sib.sib_reason = SIB_HELPER_POST;
 368         sib.helper_exit_code = ret;
 369         drbd_bcast_event(device, &sib);
 370
 371         if (current == connection->worker.task)
 372                 clear_bit(CALLBACK_PENDING, &connection->flags);
 373
 374         if (ret < 0) /* Ignore any ERRNOs we got. */
 375                 ret = 0;
 376
 377         return ret;
 378 }
 379
 380 static int conn_khelper(struct drbd_connection *connection, char *cmd)
 381 {
 382         char *envp[] = { "HOME=/",
 383                         "TERM=linux",
 384                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 385                          (char[20]) { }, /* address family */
 386                          (char[60]) { }, /* address */
 387                         NULL };
 388         char *resource_name = connection->resource->name;
 389         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
 390         int ret;
 391
 392         setup_khelper_env(connection, envp);
 393         conn_md_sync(connection);
 394
 395         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
 396         /* TODO: conn_bcast_event() ?? */
 397
 398         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 399         if (ret)
 400                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 401                           usermode_helper, cmd, resource_name,
 402                           (ret >> 8) & 0xff, ret);
 403         else
 404                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 405                           usermode_helper, cmd, resource_name,
 406                           (ret >> 8) & 0xff, ret);
 407         /* TODO: conn_bcast_event() ?? */
 408
 409         if (ret < 0) /* Ignore any ERRNOs we got. */
 410                 ret = 0;
 411
 412         return ret;
 413 }
 414
 415 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
 416 {
 417         enum drbd_fencing_p fp = FP_NOT_AVAIL;
 418         struct drbd_peer_device *peer_device;
 419         int vnr;
 420
 421         rcu_read_lock();
 422         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 423                 struct drbd_device *device = peer_device->device;
 424                 if (get_ldev_if_state(device, D_CONSISTENT)) {
 425                         struct disk_conf *disk_conf =
 426                                 rcu_dereference(peer_device->device->ldev->disk_conf);
 427                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
 428                         put_ldev(device);
 429                 }
 430         }
 431         rcu_read_unlock();
 432
 433         return fp;
 434 }
 435
 436 bool conn_try_outdate_peer(struct drbd_connection *connection)
 437 {
 438         unsigned int connect_cnt;
 439         union drbd_state mask = { };
 440         union drbd_state val = { };
 441         enum drbd_fencing_p fp;
 442         char *ex_to_string;
 443         int r;
 444
 445         if (connection->cstate >= C_WF_REPORT_PARAMS) {
 446                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
 447                 return false;
 448         }
 449
 450         spin_lock_irq(&connection->req_lock);
 451         connect_cnt = connection->connect_cnt;
 452         spin_unlock_irq(&connection->req_lock);
 453
 454         fp = highest_fencing_policy(connection);
 455         switch (fp) {
 456         case FP_NOT_AVAIL:
 457                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
 458                 goto out;
 459         case FP_DONT_CARE:
 460                 return true;
 461         default: ;
 462         }
 463
 464         r = conn_khelper(connection, "fence-peer");
 465
 466         switch ((r>>8) & 0xff) {
 467         case 3: /* peer is inconsistent */
 468                 ex_to_string = "peer is inconsistent or worse";
 469                 mask.pdsk = D_MASK;
 470                 val.pdsk = D_INCONSISTENT;
 471                 break;
 472         case 4: /* peer got outdated, or was already outdated */
 473                 ex_to_string = "peer was fenced";
 474                 mask.pdsk = D_MASK;
 475                 val.pdsk = D_OUTDATED;
 476                 break;
 477         case 5: /* peer was down */
 478                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
 479                         /* we will(have) create(d) a new UUID anyways... */
 480                         ex_to_string = "peer is unreachable, assumed to be dead";
 481                         mask.pdsk = D_MASK;
 482                         val.pdsk = D_OUTDATED;
 483                 } else {
 484                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
 485                 }
 486                 break;
 487         case 6: /* Peer is primary, voluntarily outdate myself.
 488                  * This is useful when an unconnected R_SECONDARY is asked to
 489                  * become R_PRIMARY, but finds the other peer being active. */
 490                 ex_to_string = "peer is active";
 491                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
 492                 mask.disk = D_MASK;
 493                 val.disk = D_OUTDATED;
 494                 break;
 495         case 7:
 496                 if (fp != FP_STONITH)
 497                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
 498                 ex_to_string = "peer was stonithed";
 499                 mask.pdsk = D_MASK;
 500                 val.pdsk = D_OUTDATED;
 501                 break;
 502         default:
 503                 /* The script is broken ... */
 504                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
 505                 return false; /* Eventually leave IO frozen */
 506         }
 507
 508         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
 509                   (r>>8) & 0xff, ex_to_string);
 510
 511  out:
 512
 513         /* Not using
 514            conn_request_state(connection, mask, val, CS_VERBOSE);
 515            here, because we might were able to re-establish the connection in the
 516            meantime. */
 517         spin_lock_irq(&connection->req_lock);
 518         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
 519                 if (connection->connect_cnt != connect_cnt)
 520                         /* In case the connection was established and droped
 521                            while the fence-peer handler was running, ignore it */
 522                         drbd_info(connection, "Ignoring fence-peer exit code\n");
 523                 else
 524                         _conn_request_state(connection, mask, val, CS_VERBOSE);
 525         }
 526         spin_unlock_irq(&connection->req_lock);
 527
 528         return conn_highest_pdsk(connection) <= D_OUTDATED;
 529 }
 530
 531 static int _try_outdate_peer_async(void *data)
 532 {
 533         struct drbd_connection *connection = (struct drbd_connection *)data;
 534
 535         conn_try_outdate_peer(connection);
 536
 537         kref_put(&connection->kref, drbd_destroy_connection);
 538         return 0;
 539 }
 540
 541 void conn_try_outdate_peer_async(struct drbd_connection *connection)
 542 {
 543         struct task_struct *opa;
 544
 545         kref_get(&connection->kref);
 546         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
 547         if (IS_ERR(opa)) {
 548                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
 549                 kref_put(&connection->kref, drbd_destroy_connection);
 550         }
 551 }
 552
 553 enum drbd_state_rv
 554 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 555 {
 556         const int max_tries = 4;
 557         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 558         struct net_conf *nc;
 559         int try = 0;
 560         int forced = 0;
 561         union drbd_state mask, val;
 562
 563         if (new_role == R_PRIMARY)
 564                 request_ping(first_peer_device(device)->connection); /* Detect a dead peer ASAP */
 565
 566         mutex_lock(device->state_mutex);
 567
 568         mask.i = 0; mask.role = R_MASK;
 569         val.i  = 0; val.role  = new_role;
 570
 571         while (try++ < max_tries) {
 572                 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
 573
 574                 /* in case we first succeeded to outdate,
 575                  * but now suddenly could establish a connection */
 576                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 577                         val.pdsk = 0;
 578                         mask.pdsk = 0;
 579                         continue;
 580                 }
 581
 582                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
 583                     (device->state.disk < D_UP_TO_DATE &&
 584                      device->state.disk >= D_INCONSISTENT)) {
 585                         mask.disk = D_MASK;
 586                         val.disk  = D_UP_TO_DATE;
 587                         forced = 1;
 588                         continue;
 589                 }
 590
 591                 if (rv == SS_NO_UP_TO_DATE_DISK &&
 592                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 593                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
 594
 595                         if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
 596                                 val.disk = D_UP_TO_DATE;
 597                                 mask.disk = D_MASK;
 598                         }
 599                         continue;
 600                 }
 601
 602                 if (rv == SS_NOTHING_TO_DO)
 603                         goto out;
 604                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 605                         if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
 606                                 drbd_warn(device, "Forced into split brain situation!\n");
 607                                 mask.pdsk = D_MASK;
 608                                 val.pdsk  = D_OUTDATED;
 609
 610                         }
 611                         continue;
 612                 }
 613                 if (rv == SS_TWO_PRIMARIES) {
 614                         /* Maybe the peer is detected as dead very soon...
 615                            retry at most once more in this case. */
 616                         int timeo;
 617                         rcu_read_lock();
 618                         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
 619                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 620                         rcu_read_unlock();
 621                         schedule_timeout_interruptible(timeo);
 622                         if (try < max_tries)
 623                                 try = max_tries - 1;
 624                         continue;
 625                 }
 626                 if (rv < SS_SUCCESS) {
 627                         rv = _drbd_request_state(device, mask, val,
 628                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
 629                         if (rv < SS_SUCCESS)
 630                                 goto out;
 631                 }
 632                 break;
 633         }
 634
 635         if (rv < SS_SUCCESS)
 636                 goto out;
 637
 638         if (forced)
 639                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
 640
 641         /* Wait until nothing is on the fly :) */
 642         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
 643
 644         /* FIXME also wait for all pending P_BARRIER_ACK? */
 645
 646         if (new_role == R_SECONDARY) {
 647                 set_disk_ro(device->vdisk, true);
 648                 if (get_ldev(device)) {
 649                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 650                         put_ldev(device);
 651                 }
 652         } else {
 653                 mutex_lock(&first_peer_device(device)->connection->conf_update);
 654                 nc = first_peer_device(device)->connection->net_conf;
 655                 if (nc)
 656                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 657                 mutex_unlock(&first_peer_device(device)->connection->conf_update);
 658
 659                 set_disk_ro(device->vdisk, false);
 660                 if (get_ldev(device)) {
 661                         if (((device->state.conn < C_CONNECTED ||
 662                                device->state.pdsk <= D_FAILED)
 663                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
 664                                 drbd_uuid_new_current(device);
 665
 666                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 667                         put_ldev(device);
 668                 }
 669         }
 670
 671         /* writeout of activity log covered areas of the bitmap
 672          * to stable storage done in after state change already */
 673
 674         if (device->state.conn >= C_WF_REPORT_PARAMS) {
 675                 /* if this was forced, we should consider sync */
 676                 if (forced)
 677                         drbd_send_uuids(device);
 678                 drbd_send_current_state(device);
 679         }
 680
 681         drbd_md_sync(device);
 682
 683         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 684 out:
 685         mutex_unlock(device->state_mutex);
 686         return rv;
 687 }
 688
 689 static const char *from_attrs_err_to_txt(int err)
 690 {
 691         return  err == -ENOMSG ? "required attribute missing" :
 692                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
 693                 err == -EEXIST ? "can not change invariant setting" :
 694                 "invalid attribute value";
 695 }
 696
 697 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 698 {
 699         struct set_role_parms parms;
 700         int err;
 701         enum drbd_ret_code retcode;
 702
 703         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 704         if (!adm_ctx.reply_skb)
 705                 return retcode;
 706         if (retcode != NO_ERROR)
 707                 goto out;
 708
 709         memset(&parms, 0, sizeof(parms));
 710         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
 711                 err = set_role_parms_from_attrs(&parms, info);
 712                 if (err) {
 713                         retcode = ERR_MANDATORY_TAG;
 714                         drbd_msg_put_info(from_attrs_err_to_txt(err));
 715                         goto out;
 716                 }
 717         }
 718
 719         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 720                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 721         else
 722                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 723 out:
 724         drbd_adm_finish(info, retcode);
 725         return 0;
 726 }
 727
 728 /* Initializes the md.*_offset members, so we are able to find
 729  * the on disk meta data.
 730  *
 731  * We currently have two possible layouts:
 732  * external:
 733  *   |----------- md_size_sect ------------------|
 734  *   [ 4k superblock ][ activity log ][  Bitmap  ]
 735  *   | al_offset == 8 |
 736  *   | bm_offset = al_offset + X      |
 737  *  ==> bitmap sectors = md_size_sect - bm_offset
 738  *
 739  * internal:
 740  *            |----------- md_size_sect ------------------|
 741  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
 742  *                        | al_offset < 0 |
 743  *            | bm_offset = al_offset - Y |
 744  *  ==> bitmap sectors = Y = al_offset - bm_offset
 745  *
 746  *  Activity log size used to be fixed 32kB,
 747  *  but is about to become configurable.
 748  */
 749 static void drbd_md_set_sector_offsets(struct drbd_device *device,
 750                                        struct drbd_backing_dev *bdev)
 751 {
 752         sector_t md_size_sect = 0;
 753         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
 754
 755         bdev->md.md_offset = drbd_md_ss(bdev);
 756
 757         switch (bdev->md.meta_dev_idx) {
 758         default:
 759                 /* v07 style fixed size indexed meta data */
 760                 bdev->md.md_size_sect = MD_128MB_SECT;
 761                 bdev->md.al_offset = MD_4kB_SECT;
 762                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 763                 break;
 764         case DRBD_MD_INDEX_FLEX_EXT:
 765                 /* just occupy the full device; unit: sectors */
 766                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
 767                 bdev->md.al_offset = MD_4kB_SECT;
 768                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 769                 break;
 770         case DRBD_MD_INDEX_INTERNAL:
 771         case DRBD_MD_INDEX_FLEX_INT:
 772                 /* al size is still fixed */
 773                 bdev->md.al_offset = -al_size_sect;
 774                 /* we need (slightly less than) ~ this much bitmap sectors: */
 775                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 776                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
 777                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
 778                 md_size_sect = ALIGN(md_size_sect, 8);
 779
 780                 /* plus the "drbd meta data super block",
 781                  * and the activity log; */
 782                 md_size_sect += MD_4kB_SECT + al_size_sect;
 783
 784                 bdev->md.md_size_sect = md_size_sect;
 785                 /* bitmap offset is adjusted by 'super' block size */
 786                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
 787                 break;
 788         }
 789 }
 790
 791 /* input size is expected to be in KB */
 792 char *ppsize(char *buf, unsigned long long size)
 793 {
 794         /* Needs 9 bytes at max including trailing NUL:
 795          * -1ULL ==> "16384 EB" */
 796         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 797         int base = 0;
 798         while (size >= 10000 && base < sizeof(units)-1) {
 799                 /* shift + round */
 800                 size = (size >> 10) + !!(size & (1<<9));
 801                 base++;
 802         }
 803         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 804
 805         return buf;
 806 }
 807
 808 /* there is still a theoretical deadlock when called from receiver
 809  * on an D_INCONSISTENT R_PRIMARY:
 810  *  remote READ does inc_ap_bio, receiver would need to receive answer
 811  *  packet from remote to dec_ap_bio again.
 812  *  receiver receive_sizes(), comes here,
 813  *  waits for ap_bio_cnt == 0. -> deadlock.
 814  * but this cannot happen, actually, because:
 815  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
 816  *  (not connected, or bad/no disk on peer):
 817  *  see drbd_fail_request_early, ap_bio_cnt is zero.
 818  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
 819  *  peer may not initiate a resize.
 820  */
 821 /* Note these are not to be confused with
 822  * drbd_adm_suspend_io/drbd_adm_resume_io,
 823  * which are (sub) state changes triggered by admin (drbdsetup),
 824  * and can be long lived.
 825  * This changes an device->flag, is triggered by drbd internals,
 826  * and should be short-lived. */
 827 void drbd_suspend_io(struct drbd_device *device)
 828 {
 829         set_bit(SUSPEND_IO, &device->flags);
 830         if (drbd_suspended(device))
 831                 return;
 832         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
 833 }
 834
 835 void drbd_resume_io(struct drbd_device *device)
 836 {
 837         clear_bit(SUSPEND_IO, &device->flags);
 838         wake_up(&device->misc_wait);
 839 }
 840
 841 /**
 842  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
 843  * @device:     DRBD device.
 844  *
 845  * Returns 0 on success, negative return values indicate errors.
 846  * You should call drbd_md_sync() after calling this function.
 847  */
 848 enum determine_dev_size
 849 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 850 {
 851         sector_t prev_first_sect, prev_size; /* previous meta location */
 852         sector_t la_size_sect, u_size;
 853         struct drbd_md *md = &device->ldev->md;
 854         u32 prev_al_stripe_size_4k;
 855         u32 prev_al_stripes;
 856         sector_t size;
 857         char ppb[10];
 858         void *buffer;
 859
 860         int md_moved, la_size_changed;
 861         enum determine_dev_size rv = DS_UNCHANGED;
 862
 863         /* race:
 864          * application request passes inc_ap_bio,
 865          * but then cannot get an AL-reference.
 866          * this function later may wait on ap_bio_cnt == 0. -> deadlock.
 867          *
 868          * to avoid that:
 869          * Suspend IO right here.
 870          * still lock the act_log to not trigger ASSERTs there.
 871          */
 872         drbd_suspend_io(device);
 873         buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
 874         if (!buffer) {
 875                 drbd_resume_io(device);
 876                 return DS_ERROR;
 877         }
 878
 879         /* no wait necessary anymore, actually we could assert that */
 880         wait_event(device->al_wait, lc_try_lock(device->act_log));
 881
 882         prev_first_sect = drbd_md_first_sector(device->ldev);
 883         prev_size = device->ldev->md.md_size_sect;
 884         la_size_sect = device->ldev->md.la_size_sect;
 885
 886         if (rs) {
 887                 /* rs is non NULL if we should change the AL layout only */
 888
 889                 prev_al_stripes = md->al_stripes;
 890                 prev_al_stripe_size_4k = md->al_stripe_size_4k;
 891
 892                 md->al_stripes = rs->al_stripes;
 893                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
 894                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
 895         }
 896
 897         drbd_md_set_sector_offsets(device, device->ldev);
 898
 899         rcu_read_lock();
 900         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
 901         rcu_read_unlock();
 902         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
 903
 904         if (size < la_size_sect) {
 905                 if (rs && u_size == 0) {
 906                         /* Remove "rs &&" later. This check should always be active, but
 907                            right now the receiver expects the permissive behavior */
 908                         drbd_warn(device, "Implicit shrink not allowed. "
 909                                  "Use --size=%llus for explicit shrink.\n",
 910                                  (unsigned long long)size);
 911                         rv = DS_ERROR_SHRINK;
 912                 }
 913                 if (u_size > size)
 914                         rv = DS_ERROR_SPACE_MD;
 915                 if (rv != DS_UNCHANGED)
 916                         goto err_out;
 917         }
 918
 919         if (drbd_get_capacity(device->this_bdev) != size ||
 920             drbd_bm_capacity(device) != size) {
 921                 int err;
 922                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
 923                 if (unlikely(err)) {
 924                         /* currently there is only one error: ENOMEM! */
 925                         size = drbd_bm_capacity(device)>>1;
 926                         if (size == 0) {
 927                                 drbd_err(device, "OUT OF MEMORY! "
 928                                     "Could not allocate bitmap!\n");
 929                         } else {
 930                                 drbd_err(device, "BM resizing failed. "
 931                                     "Leaving size unchanged at size = %lu KB\n",
 932                                     (unsigned long)size);
 933                         }
 934                         rv = DS_ERROR;
 935                 }
 936                 /* racy, see comments above. */
 937                 drbd_set_my_capacity(device, size);
 938                 device->ldev->md.la_size_sect = size;
 939                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 940                      (unsigned long long)size>>1);
 941         }
 942         if (rv <= DS_ERROR)
 943                 goto err_out;
 944
 945         la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
 946
 947         md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
 948                 || prev_size       != device->ldev->md.md_size_sect;
 949
 950         if (la_size_changed || md_moved || rs) {
 951                 u32 prev_flags;
 952
 953                 drbd_al_shrink(device); /* All extents inactive. */
 954
 955                 prev_flags = md->flags;
 956                 md->flags &= ~MDF_PRIMARY_IND;
 957                 drbd_md_write(device, buffer);
 958
 959                 drbd_info(device, "Writing the whole bitmap, %s\n",
 960                          la_size_changed && md_moved ? "size changed and md moved" :
 961                          la_size_changed ? "size changed" : "md moved");
 962                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
 963                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
 964                                "size changed", BM_LOCKED_MASK);
 965                 drbd_initialize_al(device, buffer);
 966
 967                 md->flags = prev_flags;
 968                 drbd_md_write(device, buffer);
 969
 970                 if (rs)
 971                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
 972                                   md->al_stripes, md->al_stripe_size_4k * 4);
 973         }
 974
 975         if (size > la_size_sect)
 976                 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
 977         if (size < la_size_sect)
 978                 rv = DS_SHRUNK;
 979
 980         if (0) {
 981         err_out:
 982                 if (rs) {
 983                         md->al_stripes = prev_al_stripes;
 984                         md->al_stripe_size_4k = prev_al_stripe_size_4k;
 985                         md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
 986
 987                         drbd_md_set_sector_offsets(device, device->ldev);
 988                 }
 989         }
 990         lc_unlock(device->act_log);
 991         wake_up(&device->al_wait);
 992         drbd_md_put_buffer(device);
 993         drbd_resume_io(device);
 994
 995         return rv;
 996 }
 997
 998 sector_t
 999 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1000                   sector_t u_size, int assume_peer_has_space)
1001 {
1002         sector_t p_size = device->p_size;   /* partner's disk size. */
1003         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1004         sector_t m_size; /* my size */
1005         sector_t size = 0;
1006
1007         m_size = drbd_get_max_capacity(bdev);
1008
1009         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1010                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1011                 p_size = m_size;
1012         }
1013
1014         if (p_size && m_size) {
1015                 size = min_t(sector_t, p_size, m_size);
1016         } else {
1017                 if (la_size_sect) {
1018                         size = la_size_sect;
1019                         if (m_size && m_size < size)
1020                                 size = m_size;
1021                         if (p_size && p_size < size)
1022                                 size = p_size;
1023                 } else {
1024                         if (m_size)
1025                                 size = m_size;
1026                         if (p_size)
1027                                 size = p_size;
1028                 }
1029         }
1030
1031         if (size == 0)
1032                 drbd_err(device, "Both nodes diskless!\n");
1033
1034         if (u_size) {
1035                 if (u_size > size)
1036                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1037                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1038                 else
1039                         size = u_size;
1040         }
1041
1042         return size;
1043 }
1044
1045 /**
1046  * drbd_check_al_size() - Ensures that the AL is of the right size
1047  * @device:     DRBD device.
1048  *
1049  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1050  * failed, and 0 on success. You should call drbd_md_sync() after you called
1051  * this function.
1052  */
1053 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1054 {
1055         struct lru_cache *n, *t;
1056         struct lc_element *e;
1057         unsigned int in_use;
1058         int i;
1059
1060         if (device->act_log &&
1061             device->act_log->nr_elements == dc->al_extents)
1062                 return 0;
1063
1064         in_use = 0;
1065         t = device->act_log;
1066         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1067                 dc->al_extents, sizeof(struct lc_element), 0);
1068
1069         if (n == NULL) {
1070                 drbd_err(device, "Cannot allocate act_log lru!\n");
1071                 return -ENOMEM;
1072         }
1073         spin_lock_irq(&device->al_lock);
1074         if (t) {
1075                 for (i = 0; i < t->nr_elements; i++) {
1076                         e = lc_element_by_index(t, i);
1077                         if (e->refcnt)
1078                                 drbd_err(device, "refcnt(%d)==%d\n",
1079                                     e->lc_number, e->refcnt);
1080                         in_use += e->refcnt;
1081                 }
1082         }
1083         if (!in_use)
1084                 device->act_log = n;
1085         spin_unlock_irq(&device->al_lock);
1086         if (in_use) {
1087                 drbd_err(device, "Activity log still in use!\n");
1088                 lc_destroy(n);
1089                 return -EBUSY;
1090         } else {
1091                 if (t)
1092                         lc_destroy(t);
1093         }
1094         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1095         return 0;
1096 }
1097
1098 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1099 {
1100         struct request_queue * const q = device->rq_queue;
1101         unsigned int max_hw_sectors = max_bio_size >> 9;
1102         unsigned int max_segments = 0;
1103
1104         if (get_ldev_if_state(device, D_ATTACHING)) {
1105                 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1106
1107                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1108                 rcu_read_lock();
1109                 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1110                 rcu_read_unlock();
1111                 put_ldev(device);
1112         }
1113
1114         blk_queue_logical_block_size(q, 512);
1115         blk_queue_max_hw_sectors(q, max_hw_sectors);
1116         /* This is the workaround for "bio would need to, but cannot, be split" */
1117         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1118         blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1119
1120         if (get_ldev_if_state(device, D_ATTACHING)) {
1121                 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1122
1123                 blk_queue_stack_limits(q, b);
1124
1125                 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1126                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1127                                  q->backing_dev_info.ra_pages,
1128                                  b->backing_dev_info.ra_pages);
1129                         q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1130                 }
1131                 put_ldev(device);
1132         }
1133 }
1134
1135 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1136 {
1137         unsigned int now, new, local, peer;
1138
1139         now = queue_max_hw_sectors(device->rq_queue) << 9;
1140         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1141         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1142
1143         if (get_ldev_if_state(device, D_ATTACHING)) {
1144                 local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1145                 device->local_max_bio_size = local;
1146                 put_ldev(device);
1147         }
1148         local = min(local, DRBD_MAX_BIO_SIZE);
1149
1150         /* We may ignore peer limits if the peer is modern enough.
1151            Because new from 8.3.8 onwards the peer can use multiple
1152            BIOs for a single peer_request */
1153         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1154                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1155                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1156                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1157                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1158                         peer = DRBD_MAX_SIZE_H80_PACKET;
1159                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1160                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1161                 else
1162                         peer = DRBD_MAX_BIO_SIZE;
1163         }
1164
1165         new = min(local, peer);
1166
1167         if (device->state.role == R_PRIMARY && new < now)
1168                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1169
1170         if (new != now)
1171                 drbd_info(device, "max BIO size = %u\n", new);
1172
1173         drbd_setup_queue_param(device, new);
1174 }
1175
1176 /* Starts the worker thread */
1177 static void conn_reconfig_start(struct drbd_connection *connection)
1178 {
1179         drbd_thread_start(&connection->worker);
1180         conn_flush_workqueue(connection);
1181 }
1182
1183 /* if still unconfigured, stops worker again. */
1184 static void conn_reconfig_done(struct drbd_connection *connection)
1185 {
1186         bool stop_threads;
1187         spin_lock_irq(&connection->req_lock);
1188         stop_threads = conn_all_vols_unconf(connection) &&
1189                 connection->cstate == C_STANDALONE;
1190         spin_unlock_irq(&connection->req_lock);
1191         if (stop_threads) {
1192                 /* asender is implicitly stopped by receiver
1193                  * in conn_disconnect() */
1194                 drbd_thread_stop(&connection->receiver);
1195                 drbd_thread_stop(&connection->worker);
1196         }
1197 }
1198
1199 /* Make sure IO is suspended before calling this function(). */
1200 static void drbd_suspend_al(struct drbd_device *device)
1201 {
1202         int s = 0;
1203
1204         if (!lc_try_lock(device->act_log)) {
1205                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1206                 return;
1207         }
1208
1209         drbd_al_shrink(device);
1210         spin_lock_irq(&first_peer_device(device)->connection->req_lock);
1211         if (device->state.conn < C_CONNECTED)
1212                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1213         spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
1214         lc_unlock(device->act_log);
1215
1216         if (s)
1217                 drbd_info(device, "Suspended AL updates\n");
1218 }
1219
1220
1221 static bool should_set_defaults(struct genl_info *info)
1222 {
1223         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1224         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1225 }
1226
1227 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1228 {
1229         /* This is limited by 16 bit "slot" numbers,
1230          * and by available on-disk context storage.
1231          *
1232          * Also (u16)~0 is special (denotes a "free" extent).
1233          *
1234          * One transaction occupies one 4kB on-disk block,
1235          * we have n such blocks in the on disk ring buffer,
1236          * the "current" transaction may fail (n-1),
1237          * and there is 919 slot numbers context information per transaction.
1238          *
1239          * 72 transaction blocks amounts to more than 2**16 context slots,
1240          * so cap there first.
1241          */
1242         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1243         const unsigned int sufficient_on_disk =
1244                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1245                 /AL_CONTEXT_PER_TRANSACTION;
1246
1247         unsigned int al_size_4k = bdev->md.al_size_4k;
1248
1249         if (al_size_4k > sufficient_on_disk)
1250                 return max_al_nr;
1251
1252         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1253 }
1254
1255 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1256 {
1257         enum drbd_ret_code retcode;
1258         struct drbd_device *device;
1259         struct disk_conf *new_disk_conf, *old_disk_conf;
1260         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1261         int err, fifo_size;
1262
1263         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1264         if (!adm_ctx.reply_skb)
1265                 return retcode;
1266         if (retcode != NO_ERROR)
1267                 goto out;
1268
1269         device = adm_ctx.device;
1270
1271         /* we also need a disk
1272          * to change the options on */
1273         if (!get_ldev(device)) {
1274                 retcode = ERR_NO_DISK;
1275                 goto out;
1276         }
1277
1278         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1279         if (!new_disk_conf) {
1280                 retcode = ERR_NOMEM;
1281                 goto fail;
1282         }
1283
1284         mutex_lock(&first_peer_device(device)->connection->conf_update);
1285         old_disk_conf = device->ldev->disk_conf;
1286         *new_disk_conf = *old_disk_conf;
1287         if (should_set_defaults(info))
1288                 set_disk_conf_defaults(new_disk_conf);
1289
1290         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1291         if (err && err != -ENOMSG) {
1292                 retcode = ERR_MANDATORY_TAG;
1293                 drbd_msg_put_info(from_attrs_err_to_txt(err));
1294                 goto fail_unlock;
1295         }
1296
1297         if (!expect(new_disk_conf->resync_rate >= 1))
1298                 new_disk_conf->resync_rate = 1;
1299
1300         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1301                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1302         if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1303                 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1304
1305         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1306                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1307
1308         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1309         if (fifo_size != device->rs_plan_s->size) {
1310                 new_plan = fifo_alloc(fifo_size);
1311                 if (!new_plan) {
1312                         drbd_err(device, "kmalloc of fifo_buffer failed");
1313                         retcode = ERR_NOMEM;
1314                         goto fail_unlock;
1315                 }
1316         }
1317
1318         drbd_suspend_io(device);
1319         wait_event(device->al_wait, lc_try_lock(device->act_log));
1320         drbd_al_shrink(device);
1321         err = drbd_check_al_size(device, new_disk_conf);
1322         lc_unlock(device->act_log);
1323         wake_up(&device->al_wait);
1324         drbd_resume_io(device);
1325
1326         if (err) {
1327                 retcode = ERR_NOMEM;
1328                 goto fail_unlock;
1329         }
1330
1331         write_lock_irq(&global_state_lock);
1332         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1333         if (retcode == NO_ERROR) {
1334                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1335                 drbd_resync_after_changed(device);
1336         }
1337         write_unlock_irq(&global_state_lock);
1338
1339         if (retcode != NO_ERROR)
1340                 goto fail_unlock;
1341
1342         if (new_plan) {
1343                 old_plan = device->rs_plan_s;
1344                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1345         }
1346
1347         mutex_unlock(&first_peer_device(device)->connection->conf_update);
1348
1349         if (new_disk_conf->al_updates)
1350                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1351         else
1352                 device->ldev->md.flags |= MDF_AL_DISABLED;
1353
1354         if (new_disk_conf->md_flushes)
1355                 clear_bit(MD_NO_FUA, &device->flags);
1356         else
1357                 set_bit(MD_NO_FUA, &device->flags);
1358
1359         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1360
1361         drbd_md_sync(device);
1362
1363         if (device->state.conn >= C_CONNECTED)
1364                 drbd_send_sync_param(device);
1365
1366         synchronize_rcu();
1367         kfree(old_disk_conf);
1368         kfree(old_plan);
1369         mod_timer(&device->request_timer, jiffies + HZ);
1370         goto success;
1371
1372 fail_unlock:
1373         mutex_unlock(&first_peer_device(device)->connection->conf_update);
1374  fail:
1375         kfree(new_disk_conf);
1376         kfree(new_plan);
1377 success:
1378         put_ldev(device);
1379  out:
1380         drbd_adm_finish(info, retcode);
1381         return 0;
1382 }
1383
1384 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1385 {
1386         struct drbd_device *device;
1387         int err;
1388         enum drbd_ret_code retcode;
1389         enum determine_dev_size dd;
1390         sector_t max_possible_sectors;
1391         sector_t min_md_device_sectors;
1392         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1393         struct disk_conf *new_disk_conf = NULL;
1394         struct block_device *bdev;
1395         struct lru_cache *resync_lru = NULL;
1396         struct fifo_buffer *new_plan = NULL;
1397         union drbd_state ns, os;
1398         enum drbd_state_rv rv;
1399         struct net_conf *nc;
1400
1401         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1402         if (!adm_ctx.reply_skb)
1403                 return retcode;
1404         if (retcode != NO_ERROR)
1405                 goto finish;
1406
1407         device = adm_ctx.device;
1408         conn_reconfig_start(first_peer_device(device)->connection);
1409
1410         /* if you want to reconfigure, please tear down first */
1411         if (device->state.disk > D_DISKLESS) {
1412                 retcode = ERR_DISK_CONFIGURED;
1413                 goto fail;
1414         }
1415         /* It may just now have detached because of IO error.  Make sure
1416          * drbd_ldev_destroy is done already, we may end up here very fast,
1417          * e.g. if someone calls attach from the on-io-error handler,
1418          * to realize a "hot spare" feature (not that I'd recommend that) */
1419         wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1420
1421         /* make sure there is no leftover from previous force-detach attempts */
1422         clear_bit(FORCE_DETACH, &device->flags);
1423         clear_bit(WAS_IO_ERROR, &device->flags);
1424         clear_bit(WAS_READ_ERROR, &device->flags);
1425
1426         /* and no leftover from previously aborted resync or verify, either */
1427         device->rs_total = 0;
1428         device->rs_failed = 0;
1429         atomic_set(&device->rs_pending_cnt, 0);
1430
1431         /* allocation not in the IO path, drbdsetup context */
1432         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1433         if (!nbc) {
1434                 retcode = ERR_NOMEM;
1435                 goto fail;
1436         }
1437         spin_lock_init(&nbc->md.uuid_lock);
1438
1439         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1440         if (!new_disk_conf) {
1441                 retcode = ERR_NOMEM;
1442                 goto fail;
1443         }
1444         nbc->disk_conf = new_disk_conf;
1445
1446         set_disk_conf_defaults(new_disk_conf);
1447         err = disk_conf_from_attrs(new_disk_conf, info);
1448         if (err) {
1449                 retcode = ERR_MANDATORY_TAG;
1450                 drbd_msg_put_info(from_attrs_err_to_txt(err));
1451                 goto fail;
1452         }
1453
1454         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1455                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1456
1457         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1458         if (!new_plan) {
1459                 retcode = ERR_NOMEM;
1460                 goto fail;
1461         }
1462
1463         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1464                 retcode = ERR_MD_IDX_INVALID;
1465                 goto fail;
1466         }
1467
1468         write_lock_irq(&global_state_lock);
1469         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1470         write_unlock_irq(&global_state_lock);
1471         if (retcode != NO_ERROR)
1472                 goto fail;
1473
1474         rcu_read_lock();
1475         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1476         if (nc) {
1477                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1478                         rcu_read_unlock();
1479                         retcode = ERR_STONITH_AND_PROT_A;
1480                         goto fail;
1481                 }
1482         }
1483         rcu_read_unlock();
1484
1485         bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1486                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1487         if (IS_ERR(bdev)) {
1488                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1489                         PTR_ERR(bdev));
1490                 retcode = ERR_OPEN_DISK;
1491                 goto fail;
1492         }
1493         nbc->backing_bdev = bdev;
1494
1495         /*
1496          * meta_dev_idx >= 0: external fixed size, possibly multiple
1497          * drbd sharing one meta device.  TODO in that case, paranoia
1498          * check that [md_bdev, meta_dev_idx] is not yet used by some
1499          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1500          * should check it for you already; but if you don't, or
1501          * someone fooled it, we need to double check here)
1502          */
1503         bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1504                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1505                                   (new_disk_conf->meta_dev_idx < 0) ?
1506                                   (void *)device : (void *)drbd_m_holder);
1507         if (IS_ERR(bdev)) {
1508                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1509                         PTR_ERR(bdev));
1510                 retcode = ERR_OPEN_MD_DISK;
1511                 goto fail;
1512         }
1513         nbc->md_bdev = bdev;
1514
1515         if ((nbc->backing_bdev == nbc->md_bdev) !=
1516             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1517              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1518                 retcode = ERR_MD_IDX_INVALID;
1519                 goto fail;
1520         }
1521
1522         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1523                         1, 61, sizeof(struct bm_extent),
1524                         offsetof(struct bm_extent, lce));
1525         if (!resync_lru) {
1526                 retcode = ERR_NOMEM;
1527                 goto fail;
1528         }
1529
1530         /* Read our meta data super block early.
1531          * This also sets other on-disk offsets. */
1532         retcode = drbd_md_read(device, nbc);
1533         if (retcode != NO_ERROR)
1534                 goto fail;
1535
1536         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1537                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1538         if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1539                 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1540
1541         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1542                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1543                         (unsigned long long) drbd_get_max_capacity(nbc),
1544                         (unsigned long long) new_disk_conf->disk_size);
1545                 retcode = ERR_DISK_TOO_SMALL;
1546                 goto fail;
1547         }
1548
1549         if (new_disk_conf->meta_dev_idx < 0) {
1550                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1551                 /* at least one MB, otherwise it does not make sense */
1552                 min_md_device_sectors = (2<<10);
1553         } else {
1554                 max_possible_sectors = DRBD_MAX_SECTORS;
1555                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1556         }
1557
1558         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1559                 retcode = ERR_MD_DISK_TOO_SMALL;
1560                 drbd_warn(device, "refusing attach: md-device too small, "
1561                      "at least %llu sectors needed for this meta-disk type\n",
1562                      (unsigned long long) min_md_device_sectors);
1563                 goto fail;
1564         }
1565
1566         /* Make sure the new disk is big enough
1567          * (we may currently be R_PRIMARY with no local disk...) */
1568         if (drbd_get_max_capacity(nbc) <
1569             drbd_get_capacity(device->this_bdev)) {
1570                 retcode = ERR_DISK_TOO_SMALL;
1571                 goto fail;
1572         }
1573
1574         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1575
1576         if (nbc->known_size > max_possible_sectors) {
1577                 drbd_warn(device, "==> truncating very big lower level device "
1578                         "to currently maximum possible %llu sectors <==\n",
1579                         (unsigned long long) max_possible_sectors);
1580                 if (new_disk_conf->meta_dev_idx >= 0)
1581                         drbd_warn(device, "==>> using internal or flexible "
1582                                       "meta data may help <<==\n");
1583         }
1584
1585         drbd_suspend_io(device);
1586         /* also wait for the last barrier ack. */
1587         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1588          * We need a way to either ignore barrier acks for barriers sent before a device
1589          * was attached, or a way to wait for all pending barrier acks to come in.
1590          * As barriers are counted per resource,
1591          * we'd need to suspend io on all devices of a resource.
1592          */
1593         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1594         /* and for any other previously queued work */
1595         drbd_flush_workqueue(device);
1596
1597         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1598         retcode = rv;  /* FIXME: Type mismatch. */
1599         drbd_resume_io(device);
1600         if (rv < SS_SUCCESS)
1601                 goto fail;
1602
1603         if (!get_ldev_if_state(device, D_ATTACHING))
1604                 goto force_diskless;
1605
1606         if (!device->bitmap) {
1607                 if (drbd_bm_init(device)) {
1608                         retcode = ERR_NOMEM;
1609                         goto force_diskless_dec;
1610                 }
1611         }
1612
1613         if (device->state.conn < C_CONNECTED &&
1614             device->state.role == R_PRIMARY &&
1615             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1616                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1617                     (unsigned long long)device->ed_uuid);
1618                 retcode = ERR_DATA_NOT_CURRENT;
1619                 goto force_diskless_dec;
1620         }
1621
1622         /* Since we are diskless, fix the activity log first... */
1623         if (drbd_check_al_size(device, new_disk_conf)) {
1624                 retcode = ERR_NOMEM;
1625                 goto force_diskless_dec;
1626         }
1627
1628         /* Prevent shrinking of consistent devices ! */
1629         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1630             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1631                 drbd_warn(device, "refusing to truncate a consistent device\n");
1632                 retcode = ERR_DISK_TOO_SMALL;
1633                 goto force_diskless_dec;
1634         }
1635
1636         /* Reset the "barriers don't work" bits here, then force meta data to
1637          * be written, to ensure we determine if barriers are supported. */
1638         if (new_disk_conf->md_flushes)
1639                 clear_bit(MD_NO_FUA, &device->flags);
1640         else
1641                 set_bit(MD_NO_FUA, &device->flags);
1642
1643         /* Point of no return reached.
1644          * Devices and memory are no longer released by error cleanup below.
1645          * now device takes over responsibility, and the state engine should
1646          * clean it up somewhere.  */
1647         D_ASSERT(device, device->ldev == NULL);
1648         device->ldev = nbc;
1649         device->resync = resync_lru;
1650         device->rs_plan_s = new_plan;
1651         nbc = NULL;
1652         resync_lru = NULL;
1653         new_disk_conf = NULL;
1654         new_plan = NULL;
1655
1656         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1657
1658         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1659                 set_bit(CRASHED_PRIMARY, &device->flags);
1660         else
1661                 clear_bit(CRASHED_PRIMARY, &device->flags);
1662
1663         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1664             !(device->state.role == R_PRIMARY &&
1665               first_peer_device(device)->connection->susp_nod))
1666                 set_bit(CRASHED_PRIMARY, &device->flags);
1667
1668         device->send_cnt = 0;
1669         device->recv_cnt = 0;
1670         device->read_cnt = 0;
1671         device->writ_cnt = 0;
1672
1673         drbd_reconsider_max_bio_size(device);
1674
1675         /* If I am currently not R_PRIMARY,
1676          * but meta data primary indicator is set,
1677          * I just now recover from a hard crash,
1678          * and have been R_PRIMARY before that crash.
1679          *
1680          * Now, if I had no connection before that crash
1681          * (have been degraded R_PRIMARY), chances are that
1682          * I won't find my peer now either.
1683          *
1684          * In that case, and _only_ in that case,
1685          * we use the degr-wfc-timeout instead of the default,
1686          * so we can automatically recover from a crash of a
1687          * degraded but active "cluster" after a certain timeout.
1688          */
1689         clear_bit(USE_DEGR_WFC_T, &device->flags);
1690         if (device->state.role != R_PRIMARY &&
1691              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1692             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1693                 set_bit(USE_DEGR_WFC_T, &device->flags);
1694
1695         dd = drbd_determine_dev_size(device, 0, NULL);
1696         if (dd <= DS_ERROR) {
1697                 retcode = ERR_NOMEM_BITMAP;
1698                 goto force_diskless_dec;
1699         } else if (dd == DS_GREW)
1700                 set_bit(RESYNC_AFTER_NEG, &device->flags);
1701
1702         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1703             (test_bit(CRASHED_PRIMARY, &device->flags) &&
1704              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1705                 drbd_info(device, "Assuming that all blocks are out of sync "
1706                      "(aka FullSync)\n");
1707                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1708                         "set_n_write from attaching", BM_LOCKED_MASK)) {
1709                         retcode = ERR_IO_MD_DISK;
1710                         goto force_diskless_dec;
1711                 }
1712         } else {
1713                 if (drbd_bitmap_io(device, &drbd_bm_read,
1714                         "read from attaching", BM_LOCKED_MASK)) {
1715                         retcode = ERR_IO_MD_DISK;
1716                         goto force_diskless_dec;
1717                 }
1718         }
1719
1720         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1721                 drbd_suspend_al(device); /* IO is still suspended here... */
1722
1723         spin_lock_irq(&first_peer_device(device)->connection->req_lock);
1724         os = drbd_read_state(device);
1725         ns = os;
1726         /* If MDF_CONSISTENT is not set go into inconsistent state,
1727            otherwise investigate MDF_WasUpToDate...
1728            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1729            otherwise into D_CONSISTENT state.
1730         */
1731         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1732                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1733                         ns.disk = D_CONSISTENT;
1734                 else
1735                         ns.disk = D_OUTDATED;
1736         } else {
1737                 ns.disk = D_INCONSISTENT;
1738         }
1739
1740         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1741                 ns.pdsk = D_OUTDATED;
1742
1743         rcu_read_lock();
1744         if (ns.disk == D_CONSISTENT &&
1745             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1746                 ns.disk = D_UP_TO_DATE;
1747
1748         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1749            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1750            this point, because drbd_request_state() modifies these
1751            flags. */
1752
1753         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1754                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1755         else
1756                 device->ldev->md.flags |= MDF_AL_DISABLED;
1757
1758         rcu_read_unlock();
1759
1760         /* In case we are C_CONNECTED postpone any decision on the new disk
1761            state after the negotiation phase. */
1762         if (device->state.conn == C_CONNECTED) {
1763                 device->new_state_tmp.i = ns.i;
1764                 ns.i = os.i;
1765                 ns.disk = D_NEGOTIATING;
1766
1767                 /* We expect to receive up-to-date UUIDs soon.
1768                    To avoid a race in receive_state, free p_uuid while
1769                    holding req_lock. I.e. atomic with the state change */
1770                 kfree(device->p_uuid);
1771                 device->p_uuid = NULL;
1772         }
1773
1774         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1775         spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
1776
1777         if (rv < SS_SUCCESS)
1778                 goto force_diskless_dec;
1779
1780         mod_timer(&device->request_timer, jiffies + HZ);
1781
1782         if (device->state.role == R_PRIMARY)
1783                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1784         else
1785                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1786
1787         drbd_md_mark_dirty(device);
1788         drbd_md_sync(device);
1789
1790         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1791         put_ldev(device);
1792         conn_reconfig_done(first_peer_device(device)->connection);
1793         drbd_adm_finish(info, retcode);
1794         return 0;
1795
1796  force_diskless_dec:
1797         put_ldev(device);
1798  force_diskless:
1799         drbd_force_state(device, NS(disk, D_DISKLESS));
1800         drbd_md_sync(device);
1801  fail:
1802         conn_reconfig_done(first_peer_device(device)->connection);
1803         if (nbc) {
1804                 if (nbc->backing_bdev)
1805                         blkdev_put(nbc->backing_bdev,
1806                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1807                 if (nbc->md_bdev)
1808                         blkdev_put(nbc->md_bdev,
1809                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1810                 kfree(nbc);
1811         }
1812         kfree(new_disk_conf);
1813         lc_destroy(resync_lru);
1814         kfree(new_plan);
1815
1816  finish:
1817         drbd_adm_finish(info, retcode);
1818         return 0;
1819 }
1820
1821 static int adm_detach(struct drbd_device *device, int force)
1822 {
1823         enum drbd_state_rv retcode;
1824         int ret;
1825
1826         if (force) {
1827                 set_bit(FORCE_DETACH, &device->flags);
1828                 drbd_force_state(device, NS(disk, D_FAILED));
1829                 retcode = SS_SUCCESS;
1830                 goto out;
1831         }
1832
1833         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1834         drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1835         retcode = drbd_request_state(device, NS(disk, D_FAILED));
1836         drbd_md_put_buffer(device);
1837         /* D_FAILED will transition to DISKLESS. */
1838         ret = wait_event_interruptible(device->misc_wait,
1839                         device->state.disk != D_FAILED);
1840         drbd_resume_io(device);
1841         if ((int)retcode == (int)SS_IS_DISKLESS)
1842                 retcode = SS_NOTHING_TO_DO;
1843         if (ret)
1844                 retcode = ERR_INTR;
1845 out:
1846         return retcode;
1847 }
1848
1849 /* Detaching the disk is a process in multiple stages.  First we need to lock
1850  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1851  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1852  * internal references as well.
1853  * Only then we have finally detached. */
1854 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1855 {
1856         enum drbd_ret_code retcode;
1857         struct detach_parms parms = { };
1858         int err;
1859
1860         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1861         if (!adm_ctx.reply_skb)
1862                 return retcode;
1863         if (retcode != NO_ERROR)
1864                 goto out;
1865
1866         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1867                 err = detach_parms_from_attrs(&parms, info);
1868                 if (err) {
1869                         retcode = ERR_MANDATORY_TAG;
1870                         drbd_msg_put_info(from_attrs_err_to_txt(err));
1871                         goto out;
1872                 }
1873         }
1874
1875         retcode = adm_detach(adm_ctx.device, parms.force_detach);
1876 out:
1877         drbd_adm_finish(info, retcode);
1878         return 0;
1879 }
1880
1881 static bool conn_resync_running(struct drbd_connection *connection)
1882 {
1883         struct drbd_peer_device *peer_device;
1884         bool rv = false;
1885         int vnr;
1886
1887         rcu_read_lock();
1888         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1889                 struct drbd_device *device = peer_device->device;
1890                 if (device->state.conn == C_SYNC_SOURCE ||
1891                     device->state.conn == C_SYNC_TARGET ||
1892                     device->state.conn == C_PAUSED_SYNC_S ||
1893                     device->state.conn == C_PAUSED_SYNC_T) {
1894                         rv = true;
1895                         break;
1896                 }
1897         }
1898         rcu_read_unlock();
1899
1900         return rv;
1901 }
1902
1903 static bool conn_ov_running(struct drbd_connection *connection)
1904 {
1905         struct drbd_peer_device *peer_device;
1906         bool rv = false;
1907         int vnr;
1908
1909         rcu_read_lock();
1910         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1911                 struct drbd_device *device = peer_device->device;
1912                 if (device->state.conn == C_VERIFY_S ||
1913                     device->state.conn == C_VERIFY_T) {
1914                         rv = true;
1915                         break;
1916                 }
1917         }
1918         rcu_read_unlock();
1919
1920         return rv;
1921 }
1922
1923 static enum drbd_ret_code
1924 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1925 {
1926         struct drbd_peer_device *peer_device;
1927         int i;
1928
1929         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1930                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1931                         return ERR_NEED_APV_100;
1932
1933                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1934                         return ERR_NEED_APV_100;
1935
1936                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1937                         return ERR_NEED_APV_100;
1938         }
1939
1940         if (!new_net_conf->two_primaries &&
1941             conn_highest_role(connection) == R_PRIMARY &&
1942             conn_highest_peer(connection) == R_PRIMARY)
1943                 return ERR_NEED_ALLOW_TWO_PRI;
1944
1945         if (new_net_conf->two_primaries &&
1946             (new_net_conf->wire_protocol != DRBD_PROT_C))
1947                 return ERR_NOT_PROTO_C;
1948
1949         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1950                 struct drbd_device *device = peer_device->device;
1951                 if (get_ldev(device)) {
1952                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1953                         put_ldev(device);
1954                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
1955                                 return ERR_STONITH_AND_PROT_A;
1956                 }
1957                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
1958                         return ERR_DISCARD_IMPOSSIBLE;
1959         }
1960
1961         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
1962                 return ERR_CONG_NOT_PROTO_A;
1963
1964         return NO_ERROR;
1965 }
1966
1967 static enum drbd_ret_code
1968 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
1969 {
1970         static enum drbd_ret_code rv;
1971         struct drbd_peer_device *peer_device;
1972         int i;
1973
1974         rcu_read_lock();
1975         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
1976         rcu_read_unlock();
1977
1978         /* connection->volumes protected by genl_lock() here */
1979         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1980                 struct drbd_device *device = peer_device->device;
1981                 if (!device->bitmap) {
1982                         if (drbd_bm_init(device))
1983                                 return ERR_NOMEM;
1984                 }
1985         }
1986
1987         return rv;
1988 }
1989
1990 struct crypto {
1991         struct crypto_hash *verify_tfm;
1992         struct crypto_hash *csums_tfm;
1993         struct crypto_hash *cram_hmac_tfm;
1994         struct crypto_hash *integrity_tfm;
1995 };
1996
1997 static int
1998 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
1999 {
2000         if (!tfm_name[0])
2001                 return NO_ERROR;
2002
2003         *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2004         if (IS_ERR(*tfm)) {
2005                 *tfm = NULL;
2006                 return err_alg;
2007         }
2008
2009         return NO_ERROR;
2010 }
2011
2012 static enum drbd_ret_code
2013 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2014 {
2015         char hmac_name[CRYPTO_MAX_ALG_NAME];
2016         enum drbd_ret_code rv;
2017
2018         rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2019                        ERR_CSUMS_ALG);
2020         if (rv != NO_ERROR)
2021                 return rv;
2022         rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2023                        ERR_VERIFY_ALG);
2024         if (rv != NO_ERROR)
2025                 return rv;
2026         rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2027                        ERR_INTEGRITY_ALG);
2028         if (rv != NO_ERROR)
2029                 return rv;
2030         if (new_net_conf->cram_hmac_alg[0] != 0) {
2031                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2032                          new_net_conf->cram_hmac_alg);
2033
2034                 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2035                                ERR_AUTH_ALG);
2036         }
2037
2038         return rv;
2039 }
2040
2041 static void free_crypto(struct crypto *crypto)
2042 {
2043         crypto_free_hash(crypto->cram_hmac_tfm);
2044         crypto_free_hash(crypto->integrity_tfm);
2045         crypto_free_hash(crypto->csums_tfm);
2046         crypto_free_hash(crypto->verify_tfm);
2047 }
2048
2049 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2050 {
2051         enum drbd_ret_code retcode;
2052         struct drbd_connection *connection;
2053         struct net_conf *old_net_conf, *new_net_conf = NULL;
2054         int err;
2055         int ovr; /* online verify running */
2056         int rsr; /* re-sync running */
2057         struct crypto crypto = { };
2058
2059         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
2060         if (!adm_ctx.reply_skb)
2061                 return retcode;
2062         if (retcode != NO_ERROR)
2063                 goto out;
2064
2065         connection = adm_ctx.connection;
2066
2067         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2068         if (!new_net_conf) {
2069                 retcode = ERR_NOMEM;
2070                 goto out;
2071         }
2072
2073         conn_reconfig_start(connection);
2074
2075         mutex_lock(&connection->data.mutex);
2076         mutex_lock(&connection->conf_update);
2077         old_net_conf = connection->net_conf;
2078
2079         if (!old_net_conf) {
2080                 drbd_msg_put_info("net conf missing, try connect");
2081                 retcode = ERR_INVALID_REQUEST;
2082                 goto fail;
2083         }
2084
2085         *new_net_conf = *old_net_conf;
2086         if (should_set_defaults(info))
2087                 set_net_conf_defaults(new_net_conf);
2088
2089         err = net_conf_from_attrs_for_change(new_net_conf, info);
2090         if (err && err != -ENOMSG) {
2091                 retcode = ERR_MANDATORY_TAG;
2092                 drbd_msg_put_info(from_attrs_err_to_txt(err));
2093                 goto fail;
2094         }
2095
2096         retcode = check_net_options(connection, new_net_conf);
2097         if (retcode != NO_ERROR)
2098                 goto fail;
2099
2100         /* re-sync running */
2101         rsr = conn_resync_running(connection);
2102         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2103                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2104                 goto fail;
2105         }
2106
2107         /* online verify running */
2108         ovr = conn_ov_running(connection);
2109         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2110                 retcode = ERR_VERIFY_RUNNING;
2111                 goto fail;
2112         }
2113
2114         retcode = alloc_crypto(&crypto, new_net_conf);
2115         if (retcode != NO_ERROR)
2116                 goto fail;
2117
2118         rcu_assign_pointer(connection->net_conf, new_net_conf);
2119
2120         if (!rsr) {
2121                 crypto_free_hash(connection->csums_tfm);
2122                 connection->csums_tfm = crypto.csums_tfm;
2123                 crypto.csums_tfm = NULL;
2124         }
2125         if (!ovr) {
2126                 crypto_free_hash(connection->verify_tfm);
2127                 connection->verify_tfm = crypto.verify_tfm;
2128                 crypto.verify_tfm = NULL;
2129         }
2130
2131         crypto_free_hash(connection->integrity_tfm);
2132         connection->integrity_tfm = crypto.integrity_tfm;
2133         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2134                 /* Do this without trying to take connection->data.mutex again.  */
2135                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2136
2137         crypto_free_hash(connection->cram_hmac_tfm);
2138         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2139
2140         mutex_unlock(&connection->conf_update);
2141         mutex_unlock(&connection->data.mutex);
2142         synchronize_rcu();
2143         kfree(old_net_conf);
2144
2145         if (connection->cstate >= C_WF_REPORT_PARAMS)
2146                 drbd_send_sync_param(minor_to_device(conn_lowest_minor(connection)));
2147
2148         goto done;
2149
2150  fail:
2151         mutex_unlock(&connection->conf_update);
2152         mutex_unlock(&connection->data.mutex);
2153         free_crypto(&crypto);
2154         kfree(new_net_conf);
2155  done:
2156         conn_reconfig_done(connection);
2157  out:
2158         drbd_adm_finish(info, retcode);
2159         return 0;
2160 }
2161
2162 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2163 {
2164         struct drbd_peer_device *peer_device;
2165         struct net_conf *old_net_conf, *new_net_conf = NULL;
2166         struct crypto crypto = { };
2167         struct drbd_resource *resource;
2168         struct drbd_connection *connection;
2169         enum drbd_ret_code retcode;
2170         int i;
2171         int err;
2172
2173         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2174
2175         if (!adm_ctx.reply_skb)
2176                 return retcode;
2177         if (retcode != NO_ERROR)
2178                 goto out;
2179         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2180                 drbd_msg_put_info("connection endpoint(s) missing");
2181                 retcode = ERR_INVALID_REQUEST;
2182                 goto out;
2183         }
2184
2185         /* No need for _rcu here. All reconfiguration is
2186          * strictly serialized on genl_lock(). We are protected against
2187          * concurrent reconfiguration/addition/deletion */
2188         for_each_resource(resource, &drbd_resources) {
2189                 for_each_connection(connection, resource) {
2190                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2191                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2192                                     connection->my_addr_len)) {
2193                                 retcode = ERR_LOCAL_ADDR;
2194                                 goto out;
2195                         }
2196
2197                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2198                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2199                                     connection->peer_addr_len)) {
2200                                 retcode = ERR_PEER_ADDR;
2201                                 goto out;
2202                         }
2203                 }
2204         }
2205
2206         connection = adm_ctx.connection;
2207         conn_reconfig_start(connection);
2208
2209         if (connection->cstate > C_STANDALONE) {
2210                 retcode = ERR_NET_CONFIGURED;
2211                 goto fail;
2212         }
2213
2214         /* allocation not in the IO path, drbdsetup / netlink process context */
2215         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2216         if (!new_net_conf) {
2217                 retcode = ERR_NOMEM;
2218                 goto fail;
2219         }
2220
2221         set_net_conf_defaults(new_net_conf);
2222
2223         err = net_conf_from_attrs(new_net_conf, info);
2224         if (err && err != -ENOMSG) {
2225                 retcode = ERR_MANDATORY_TAG;
2226                 drbd_msg_put_info(from_attrs_err_to_txt(err));
2227                 goto fail;
2228         }
2229
2230         retcode = check_net_options(connection, new_net_conf);
2231         if (retcode != NO_ERROR)
2232                 goto fail;
2233
2234         retcode = alloc_crypto(&crypto, new_net_conf);
2235         if (retcode != NO_ERROR)
2236                 goto fail;
2237
2238         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2239
2240         conn_flush_workqueue(connection);
2241
2242         mutex_lock(&connection->conf_update);
2243         old_net_conf = connection->net_conf;
2244         if (old_net_conf) {
2245                 retcode = ERR_NET_CONFIGURED;
2246                 mutex_unlock(&connection->conf_update);
2247                 goto fail;
2248         }
2249         rcu_assign_pointer(connection->net_conf, new_net_conf);
2250
2251         conn_free_crypto(connection);
2252         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2253         connection->integrity_tfm = crypto.integrity_tfm;
2254         connection->csums_tfm = crypto.csums_tfm;
2255         connection->verify_tfm = crypto.verify_tfm;
2256
2257         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2258         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2259         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2260         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2261
2262         mutex_unlock(&connection->conf_update);
2263
2264         rcu_read_lock();
2265         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2266                 struct drbd_device *device = peer_device->device;
2267                 device->send_cnt = 0;
2268                 device->recv_cnt = 0;
2269         }
2270         rcu_read_unlock();
2271
2272         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2273
2274         conn_reconfig_done(connection);
2275         drbd_adm_finish(info, retcode);
2276         return 0;
2277
2278 fail:
2279         free_crypto(&crypto);
2280         kfree(new_net_conf);
2281
2282         conn_reconfig_done(connection);
2283 out:
2284         drbd_adm_finish(info, retcode);
2285         return 0;
2286 }
2287
2288 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2289 {
2290         enum drbd_state_rv rv;
2291
2292         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2293                         force ? CS_HARD : 0);
2294
2295         switch (rv) {
2296         case SS_NOTHING_TO_DO:
2297                 break;
2298         case SS_ALREADY_STANDALONE:
2299                 return SS_SUCCESS;
2300         case SS_PRIMARY_NOP:
2301                 /* Our state checking code wants to see the peer outdated. */
2302                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2303
2304                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2305                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2306
2307                 break;
2308         case SS_CW_FAILED_BY_PEER:
2309                 /* The peer probably wants to see us outdated. */
2310                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2311                                                         disk, D_OUTDATED), 0);
2312                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2313                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2314                                         CS_HARD);
2315                 }
2316                 break;
2317         default:;
2318                 /* no special handling necessary */
2319         }
2320
2321         if (rv >= SS_SUCCESS) {
2322                 enum drbd_state_rv rv2;
2323                 /* No one else can reconfigure the network while I am here.
2324                  * The state handling only uses drbd_thread_stop_nowait(),
2325                  * we want to really wait here until the receiver is no more.
2326                  */
2327                 drbd_thread_stop(&connection->receiver);
2328
2329                 /* Race breaker.  This additional state change request may be
2330                  * necessary, if this was a forced disconnect during a receiver
2331                  * restart.  We may have "killed" the receiver thread just
2332                  * after drbdd_init() returned.  Typically, we should be
2333                  * C_STANDALONE already, now, and this becomes a no-op.
2334                  */
2335                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2336                                 CS_VERBOSE | CS_HARD);
2337                 if (rv2 < SS_SUCCESS)
2338                         drbd_err(connection,
2339                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2340                                 rv2);
2341         }
2342         return rv;
2343 }
2344
2345 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2346 {
2347         struct disconnect_parms parms;
2348         struct drbd_connection *connection;
2349         enum drbd_state_rv rv;
2350         enum drbd_ret_code retcode;
2351         int err;
2352
2353         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
2354         if (!adm_ctx.reply_skb)
2355                 return retcode;
2356         if (retcode != NO_ERROR)
2357                 goto fail;
2358
2359         connection = adm_ctx.connection;
2360         memset(&parms, 0, sizeof(parms));
2361         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2362                 err = disconnect_parms_from_attrs(&parms, info);
2363                 if (err) {
2364                         retcode = ERR_MANDATORY_TAG;
2365                         drbd_msg_put_info(from_attrs_err_to_txt(err));
2366                         goto fail;
2367                 }
2368         }
2369
2370         rv = conn_try_disconnect(connection, parms.force_disconnect);
2371         if (rv < SS_SUCCESS)
2372                 retcode = rv;  /* FIXME: Type mismatch. */
2373         else
2374                 retcode = NO_ERROR;
2375  fail:
2376         drbd_adm_finish(info, retcode);
2377         return 0;
2378 }
2379
2380 void resync_after_online_grow(struct drbd_device *device)
2381 {
2382         int iass; /* I am sync source */
2383
2384         drbd_info(device, "Resync of new storage after online grow\n");
2385         if (device->state.role != device->state.peer)
2386                 iass = (device->state.role == R_PRIMARY);
2387         else
2388                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2389
2390         if (iass)
2391                 drbd_start_resync(device, C_SYNC_SOURCE);
2392         else
2393                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2394 }
2395
2396 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2397 {
2398         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2399         struct resize_parms rs;
2400         struct drbd_device *device;
2401         enum drbd_ret_code retcode;
2402         enum determine_dev_size dd;
2403         bool change_al_layout = false;
2404         enum dds_flags ddsf;
2405         sector_t u_size;
2406         int err;
2407
2408         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2409         if (!adm_ctx.reply_skb)
2410                 return retcode;
2411         if (retcode != NO_ERROR)
2412                 goto fail;
2413
2414         device = adm_ctx.device;
2415         if (!get_ldev(device)) {
2416                 retcode = ERR_NO_DISK;
2417                 goto fail;
2418         }
2419
2420         memset(&rs, 0, sizeof(struct resize_parms));
2421         rs.al_stripes = device->ldev->md.al_stripes;
2422         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2423         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2424                 err = resize_parms_from_attrs(&rs, info);
2425                 if (err) {
2426                         retcode = ERR_MANDATORY_TAG;
2427                         drbd_msg_put_info(from_attrs_err_to_txt(err));
2428                         goto fail_ldev;
2429                 }
2430         }
2431
2432         if (device->state.conn > C_CONNECTED) {
2433                 retcode = ERR_RESIZE_RESYNC;
2434                 goto fail_ldev;
2435         }
2436
2437         if (device->state.role == R_SECONDARY &&
2438             device->state.peer == R_SECONDARY) {
2439                 retcode = ERR_NO_PRIMARY;
2440                 goto fail_ldev;
2441         }
2442
2443         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2444                 retcode = ERR_NEED_APV_93;
2445                 goto fail_ldev;
2446         }
2447
2448         rcu_read_lock();
2449         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2450         rcu_read_unlock();
2451         if (u_size != (sector_t)rs.resize_size) {
2452                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2453                 if (!new_disk_conf) {
2454                         retcode = ERR_NOMEM;
2455                         goto fail_ldev;
2456                 }
2457         }
2458
2459         if (device->ldev->md.al_stripes != rs.al_stripes ||
2460             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2461                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2462
2463                 if (al_size_k > (16 * 1024 * 1024)) {
2464                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2465                         goto fail_ldev;
2466                 }
2467
2468                 if (al_size_k < MD_32kB_SECT/2) {
2469                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2470                         goto fail_ldev;
2471                 }
2472
2473                 if (device->state.conn != C_CONNECTED) {
2474                         retcode = ERR_MD_LAYOUT_CONNECTED;
2475                         goto fail_ldev;
2476                 }
2477
2478                 change_al_layout = true;
2479         }
2480
2481         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2482                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2483
2484         if (new_disk_conf) {
2485                 mutex_lock(&first_peer_device(device)->connection->conf_update);
2486                 old_disk_conf = device->ldev->disk_conf;
2487                 *new_disk_conf = *old_disk_conf;
2488                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2489                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2490                 mutex_unlock(&first_peer_device(device)->connection->conf_update);
2491                 synchronize_rcu();
2492                 kfree(old_disk_conf);
2493         }
2494
2495         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2496         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2497         drbd_md_sync(device);
2498         put_ldev(device);
2499         if (dd == DS_ERROR) {
2500                 retcode = ERR_NOMEM_BITMAP;
2501                 goto fail;
2502         } else if (dd == DS_ERROR_SPACE_MD) {
2503                 retcode = ERR_MD_LAYOUT_NO_FIT;
2504                 goto fail;
2505         } else if (dd == DS_ERROR_SHRINK) {
2506                 retcode = ERR_IMPLICIT_SHRINK;
2507                 goto fail;
2508         }
2509
2510         if (device->state.conn == C_CONNECTED) {
2511                 if (dd == DS_GREW)
2512                         set_bit(RESIZE_PENDING, &device->flags);
2513
2514                 drbd_send_uuids(device);
2515                 drbd_send_sizes(device, 1, ddsf);
2516         }
2517
2518  fail:
2519         drbd_adm_finish(info, retcode);
2520         return 0;
2521
2522  fail_ldev:
2523         put_ldev(device);
2524         goto fail;
2525 }
2526
2527 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2528 {
2529         enum drbd_ret_code retcode;
2530         struct res_opts res_opts;
2531         int err;
2532
2533         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2534         if (!adm_ctx.reply_skb)
2535                 return retcode;
2536         if (retcode != NO_ERROR)
2537                 goto fail;
2538
2539         res_opts = adm_ctx.resource->res_opts;
2540         if (should_set_defaults(info))
2541                 set_res_opts_defaults(&res_opts);
2542
2543         err = res_opts_from_attrs(&res_opts, info);
2544         if (err && err != -ENOMSG) {
2545                 retcode = ERR_MANDATORY_TAG;
2546                 drbd_msg_put_info(from_attrs_err_to_txt(err));
2547                 goto fail;
2548         }
2549
2550         err = set_resource_options(adm_ctx.resource, &res_opts);
2551         if (err) {
2552                 retcode = ERR_INVALID_REQUEST;
2553                 if (err == -ENOMEM)
2554                         retcode = ERR_NOMEM;
2555         }
2556
2557 fail:
2558         drbd_adm_finish(info, retcode);
2559         return 0;
2560 }
2561
2562 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2563 {
2564         struct drbd_device *device;
2565         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2566
2567         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2568         if (!adm_ctx.reply_skb)
2569                 return retcode;
2570         if (retcode != NO_ERROR)
2571                 goto out;
2572
2573         device = adm_ctx.device;
2574
2575         /* If there is still bitmap IO pending, probably because of a previous
2576          * resync just being finished, wait for it before requesting a new resync.
2577          * Also wait for it's after_state_ch(). */
2578         drbd_suspend_io(device);
2579         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2580         drbd_flush_workqueue(device);
2581
2582         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2583          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2584          * try to start a resync handshake as sync target for full sync.
2585          */
2586         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2587                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2588                 if (retcode >= SS_SUCCESS) {
2589                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2590                                 "set_n_write from invalidate", BM_LOCKED_MASK))
2591                                 retcode = ERR_IO_MD_DISK;
2592                 }
2593         } else
2594                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2595         drbd_resume_io(device);
2596
2597 out:
2598         drbd_adm_finish(info, retcode);
2599         return 0;
2600 }
2601
2602 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2603                 union drbd_state mask, union drbd_state val)
2604 {
2605         enum drbd_ret_code retcode;
2606
2607         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2608         if (!adm_ctx.reply_skb)
2609                 return retcode;
2610         if (retcode != NO_ERROR)
2611                 goto out;
2612
2613         retcode = drbd_request_state(adm_ctx.device, mask, val);
2614 out:
2615         drbd_adm_finish(info, retcode);
2616         return 0;
2617 }
2618
2619 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2620 {
2621         int rv;
2622
2623         rv = drbd_bmio_set_n_write(device);
2624         drbd_suspend_al(device);
2625         return rv;
2626 }
2627
2628 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2629 {
2630         int retcode; /* drbd_ret_code, drbd_state_rv */
2631         struct drbd_device *device;
2632
2633         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2634         if (!adm_ctx.reply_skb)
2635                 return retcode;
2636         if (retcode != NO_ERROR)
2637                 goto out;
2638
2639         device = adm_ctx.device;
2640
2641         /* If there is still bitmap IO pending, probably because of a previous
2642          * resync just being finished, wait for it before requesting a new resync.
2643          * Also wait for it's after_state_ch(). */
2644         drbd_suspend_io(device);
2645         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2646         drbd_flush_workqueue(device);
2647
2648         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2649          * in the bitmap.  Otherwise, try to start a resync handshake
2650          * as sync source for full sync.
2651          */
2652         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2653                 /* The peer will get a resync upon connect anyways. Just make that
2654                    into a full resync. */
2655                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2656                 if (retcode >= SS_SUCCESS) {
2657                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2658                                 "set_n_write from invalidate_peer",
2659                                 BM_LOCKED_SET_ALLOWED))
2660                                 retcode = ERR_IO_MD_DISK;
2661                 }
2662         } else
2663                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2664         drbd_resume_io(device);
2665
2666 out:
2667         drbd_adm_finish(info, retcode);
2668         return 0;
2669 }
2670
2671 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2672 {
2673         enum drbd_ret_code retcode;
2674
2675         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2676         if (!adm_ctx.reply_skb)
2677                 return retcode;
2678         if (retcode != NO_ERROR)
2679                 goto out;
2680
2681         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2682                 retcode = ERR_PAUSE_IS_SET;
2683 out:
2684         drbd_adm_finish(info, retcode);
2685         return 0;
2686 }
2687
2688 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2689 {
2690         union drbd_dev_state s;
2691         enum drbd_ret_code retcode;
2692
2693         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2694         if (!adm_ctx.reply_skb)
2695                 return retcode;
2696         if (retcode != NO_ERROR)
2697                 goto out;
2698
2699         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2700                 s = adm_ctx.device->state;
2701                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2702                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2703                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2704                 } else {
2705                         retcode = ERR_PAUSE_IS_CLEAR;
2706                 }
2707         }
2708
2709 out:
2710         drbd_adm_finish(info, retcode);
2711         return 0;
2712 }
2713
2714 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2715 {
2716         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2717 }
2718
2719 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2720 {
2721         struct drbd_device *device;
2722         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2723
2724         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2725         if (!adm_ctx.reply_skb)
2726                 return retcode;
2727         if (retcode != NO_ERROR)
2728                 goto out;
2729
2730         device = adm_ctx.device;
2731         if (test_bit(NEW_CUR_UUID, &device->flags)) {
2732                 drbd_uuid_new_current(device);
2733                 clear_bit(NEW_CUR_UUID, &device->flags);
2734         }
2735         drbd_suspend_io(device);
2736         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2737         if (retcode == SS_SUCCESS) {
2738                 if (device->state.conn < C_CONNECTED)
2739                         tl_clear(first_peer_device(device)->connection);
2740                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2741                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2742         }
2743         drbd_resume_io(device);
2744
2745 out:
2746         drbd_adm_finish(info, retcode);
2747         return 0;
2748 }
2749
2750 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2751 {
2752         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2753 }
2754
2755 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2756                                     struct drbd_resource *resource,
2757                                     struct drbd_connection *connection,
2758                                     struct drbd_device *device)
2759 {
2760         struct nlattr *nla;
2761         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2762         if (!nla)
2763                 goto nla_put_failure;
2764         if (device &&
2765             nla_put_u32(skb, T_ctx_volume, device->vnr))
2766                 goto nla_put_failure;
2767         if (nla_put_string(skb, T_ctx_resource_name, connection->resource->name))
2768                 goto nla_put_failure;
2769         if (connection) {
2770                 if (connection->my_addr_len &&
2771                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2772                         goto nla_put_failure;
2773                 if (connection->peer_addr_len &&
2774                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2775                         goto nla_put_failure;
2776         }
2777         nla_nest_end(skb, nla);
2778         return 0;
2779
2780 nla_put_failure:
2781         if (nla)
2782                 nla_nest_cancel(skb, nla);
2783         return -EMSGSIZE;
2784 }
2785
2786 /*
2787  * Return the connection of @resource if @resource has exactly one connection.
2788  */
2789 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2790 {
2791         struct list_head *connections = &resource->connections;
2792
2793         if (list_empty(connections) || connections->next->next != connections)
2794                 return NULL;
2795         return list_first_entry(&resource->connections, struct drbd_connection, connections);
2796 }
2797
2798 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2799                 const struct sib_info *sib)
2800 {
2801         struct drbd_resource *resource = device->resource;
2802         struct state_info *si = NULL; /* for sizeof(si->member); */
2803         struct nlattr *nla;
2804         int got_ldev;
2805         int err = 0;
2806         int exclude_sensitive;
2807
2808         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2809          * to.  So we better exclude_sensitive information.
2810          *
2811          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2812          * in the context of the requesting user process. Exclude sensitive
2813          * information, unless current has superuser.
2814          *
2815          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2816          * relies on the current implementation of netlink_dump(), which
2817          * executes the dump callback successively from netlink_recvmsg(),
2818          * always in the context of the receiving process */
2819         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2820
2821         got_ldev = get_ldev(device);
2822
2823         /* We need to add connection name and volume number information still.
2824          * Minor number is in drbd_genlmsghdr. */
2825         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2826                 goto nla_put_failure;
2827
2828         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2829                 goto nla_put_failure;
2830
2831         rcu_read_lock();
2832         if (got_ldev) {
2833                 struct disk_conf *disk_conf;
2834
2835                 disk_conf = rcu_dereference(device->ldev->disk_conf);
2836                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2837         }
2838         if (!err) {
2839                 struct net_conf *nc;
2840
2841                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2842                 if (nc)
2843                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
2844         }
2845         rcu_read_unlock();
2846         if (err)
2847                 goto nla_put_failure;
2848
2849         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2850         if (!nla)
2851                 goto nla_put_failure;
2852         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2853             nla_put_u32(skb, T_current_state, device->state.i) ||
2854             nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2855             nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2856             nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2857             nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2858             nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2859             nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2860             nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2861             nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2862             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2863             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2864             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2865                 goto nla_put_failure;
2866
2867         if (got_ldev) {
2868                 int err;
2869
2870                 spin_lock_irq(&device->ldev->md.uuid_lock);
2871                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2872                 spin_unlock_irq(&device->ldev->md.uuid_lock);
2873
2874                 if (err)
2875                         goto nla_put_failure;
2876
2877                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2878                     nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2879                     nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2880                         goto nla_put_failure;
2881                 if (C_SYNC_SOURCE <= device->state.conn &&
2882                     C_PAUSED_SYNC_T >= device->state.conn) {
2883                         if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2884                             nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2885                                 goto nla_put_failure;
2886                 }
2887         }
2888
2889         if (sib) {
2890                 switch(sib->sib_reason) {
2891                 case SIB_SYNC_PROGRESS:
2892                 case SIB_GET_STATUS_REPLY:
2893                         break;
2894                 case SIB_STATE_CHANGE:
2895                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2896                             nla_put_u32(skb, T_new_state, sib->ns.i))
2897                                 goto nla_put_failure;
2898                         break;
2899                 case SIB_HELPER_POST:
2900                         if (nla_put_u32(skb, T_helper_exit_code,
2901                                         sib->helper_exit_code))
2902                                 goto nla_put_failure;
2903                         /* fall through */
2904                 case SIB_HELPER_PRE:
2905                         if (nla_put_string(skb, T_helper, sib->helper_name))
2906                                 goto nla_put_failure;
2907                         break;
2908                 }
2909         }
2910         nla_nest_end(skb, nla);
2911
2912         if (0)
2913 nla_put_failure:
2914                 err = -EMSGSIZE;
2915         if (got_ldev)
2916                 put_ldev(device);
2917         return err;
2918 }
2919
2920 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
2921 {
2922         enum drbd_ret_code retcode;
2923         int err;
2924
2925         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2926         if (!adm_ctx.reply_skb)
2927                 return retcode;
2928         if (retcode != NO_ERROR)
2929                 goto out;
2930
2931         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
2932         if (err) {
2933                 nlmsg_free(adm_ctx.reply_skb);
2934                 return err;
2935         }
2936 out:
2937         drbd_adm_finish(info, retcode);
2938         return 0;
2939 }
2940
2941 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
2942 {
2943         struct drbd_device *device;
2944         struct drbd_genlmsghdr *dh;
2945         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
2946         struct drbd_resource *resource = NULL;
2947         struct drbd_resource *tmp;
2948         unsigned volume = cb->args[1];
2949
2950         /* Open coded, deferred, iteration:
2951          * for_each_resource_safe(resource, tmp, &drbd_resources) {
2952          *      connection = "first connection of resource or undefined";
2953          *      idr_for_each_entry(&resource->devices, device, i) {
2954          *        ...
2955          *      }
2956          * }
2957          * where resource is cb->args[0];
2958          * and i is cb->args[1];
2959          *
2960          * cb->args[2] indicates if we shall loop over all resources,
2961          * or just dump all volumes of a single resource.
2962          *
2963          * This may miss entries inserted after this dump started,
2964          * or entries deleted before they are reached.
2965          *
2966          * We need to make sure the device won't disappear while
2967          * we are looking at it, and revalidate our iterators
2968          * on each iteration.
2969          */
2970
2971         /* synchronize with conn_create()/drbd_destroy_connection() */
2972         rcu_read_lock();
2973         /* revalidate iterator position */
2974         for_each_resource_rcu(tmp, &drbd_resources) {
2975                 if (pos == NULL) {
2976                         /* first iteration */
2977                         pos = tmp;
2978                         resource = pos;
2979                         break;
2980                 }
2981                 if (tmp == pos) {
2982                         resource = pos;
2983                         break;
2984                 }
2985         }
2986         if (resource) {
2987 next_resource:
2988                 device = idr_get_next(&resource->devices, &volume);
2989                 if (!device) {
2990                         /* No more volumes to dump on this resource.
2991                          * Advance resource iterator. */
2992                         pos = list_entry_rcu(resource->resources.next,
2993                                              struct drbd_resource, resources);
2994                         /* Did we dump any volume of this resource yet? */
2995                         if (volume != 0) {
2996                                 /* If we reached the end of the list,
2997                                  * or only a single resource dump was requested,
2998                                  * we are done. */
2999                                 if (&pos->resources == &drbd_resources || cb->args[2])
3000                                         goto out;
3001                                 volume = 0;
3002                                 resource = pos;
3003                                 goto next_resource;
3004                         }
3005                 }
3006
3007                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3008                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3009                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3010                 if (!dh)
3011                         goto out;
3012
3013                 if (!device) {
3014                         /* This is a connection without a single volume.
3015                          * Suprisingly enough, it may have a network
3016                          * configuration. */
3017                         struct drbd_connection *connection;
3018
3019                         dh->minor = -1U;
3020                         dh->ret_code = NO_ERROR;
3021                         connection = the_only_connection(resource);
3022                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3023                                 goto cancel;
3024                         if (connection) {
3025                                 struct net_conf *nc;
3026
3027                                 nc = rcu_dereference(connection->net_conf);
3028                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3029                                         goto cancel;
3030                         }
3031                         goto done;
3032                 }
3033
3034                 D_ASSERT(device, device->vnr == volume);
3035                 D_ASSERT(device, device->resource == resource);
3036
3037                 dh->minor = device_to_minor(device);
3038                 dh->ret_code = NO_ERROR;
3039
3040                 if (nla_put_status_info(skb, device, NULL)) {
3041 cancel:
3042                         genlmsg_cancel(skb, dh);
3043                         goto out;
3044                 }
3045 done:
3046                 genlmsg_end(skb, dh);
3047         }
3048
3049 out:
3050         rcu_read_unlock();
3051         /* where to start the next iteration */
3052         cb->args[0] = (long)pos;
3053         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3054
3055         /* No more resources/volumes/minors found results in an empty skb.
3056          * Which will terminate the dump. */
3057         return skb->len;
3058 }
3059
3060 /*
3061  * Request status of all resources, or of all volumes within a single resource.
3062  *
3063  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3064  * Which means we cannot use the family->attrbuf or other such members, because
3065  * dump is NOT protected by the genl_lock().  During dump, we only have access
3066  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3067  *
3068  * Once things are setup properly, we call into get_one_status().
3069  */
3070 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3071 {
3072         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3073         struct nlattr *nla;
3074         const char *resource_name;
3075         struct drbd_resource *resource;
3076         int maxtype;
3077
3078         /* Is this a followup call? */
3079         if (cb->args[0]) {
3080                 /* ... of a single resource dump,
3081                  * and the resource iterator has been advanced already? */
3082                 if (cb->args[2] && cb->args[2] != cb->args[0])
3083                         return 0; /* DONE. */
3084                 goto dump;
3085         }
3086
3087         /* First call (from netlink_dump_start).  We need to figure out
3088          * which resource(s) the user wants us to dump. */
3089         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3090                         nlmsg_attrlen(cb->nlh, hdrlen),
3091                         DRBD_NLA_CFG_CONTEXT);
3092
3093         /* No explicit context given.  Dump all. */
3094         if (!nla)
3095                 goto dump;
3096         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3097         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3098         if (IS_ERR(nla))
3099                 return PTR_ERR(nla);
3100         /* context given, but no name present? */
3101         if (!nla)
3102                 return -EINVAL;
3103         resource_name = nla_data(nla);
3104         if (!*resource_name)
3105                 return -ENODEV;
3106         resource = drbd_find_resource(resource_name);
3107         if (!resource)
3108                 return -ENODEV;
3109
3110         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3111
3112         /* prime iterators, and set "filter" mode mark:
3113          * only dump this connection. */
3114         cb->args[0] = (long)resource;
3115         /* cb->args[1] = 0; passed in this way. */
3116         cb->args[2] = (long)resource;
3117
3118 dump:
3119         return get_one_status(skb, cb);
3120 }
3121
3122 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3123 {
3124         enum drbd_ret_code retcode;
3125         struct timeout_parms tp;
3126         int err;
3127
3128         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3129         if (!adm_ctx.reply_skb)
3130                 return retcode;
3131         if (retcode != NO_ERROR)
3132                 goto out;
3133
3134         tp.timeout_type =
3135                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3136                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3137                 UT_DEFAULT;
3138
3139         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3140         if (err) {
3141                 nlmsg_free(adm_ctx.reply_skb);
3142                 return err;
3143         }
3144 out:
3145         drbd_adm_finish(info, retcode);
3146         return 0;
3147 }
3148
3149 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3150 {
3151         struct drbd_device *device;
3152         enum drbd_ret_code retcode;
3153         struct start_ov_parms parms;
3154
3155         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3156         if (!adm_ctx.reply_skb)
3157                 return retcode;
3158         if (retcode != NO_ERROR)
3159                 goto out;
3160
3161         device = adm_ctx.device;
3162
3163         /* resume from last known position, if possible */
3164         parms.ov_start_sector = device->ov_start_sector;
3165         parms.ov_stop_sector = ULLONG_MAX;
3166         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3167                 int err = start_ov_parms_from_attrs(&parms, info);
3168                 if (err) {
3169                         retcode = ERR_MANDATORY_TAG;
3170                         drbd_msg_put_info(from_attrs_err_to_txt(err));
3171                         goto out;
3172                 }
3173         }
3174         /* w_make_ov_request expects position to be aligned */
3175         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3176         device->ov_stop_sector = parms.ov_stop_sector;
3177
3178         /* If there is still bitmap IO pending, e.g. previous resync or verify
3179          * just being finished, wait for it before requesting a new resync. */
3180         drbd_suspend_io(device);
3181         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3182         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3183         drbd_resume_io(device);
3184 out:
3185         drbd_adm_finish(info, retcode);
3186         return 0;
3187 }
3188
3189
3190 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3191 {
3192         struct drbd_device *device;
3193         enum drbd_ret_code retcode;
3194         int skip_initial_sync = 0;
3195         int err;
3196         struct new_c_uuid_parms args;
3197
3198         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3199         if (!adm_ctx.reply_skb)
3200                 return retcode;
3201         if (retcode != NO_ERROR)
3202                 goto out_nolock;
3203
3204         device = adm_ctx.device;
3205         memset(&args, 0, sizeof(args));
3206         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3207                 err = new_c_uuid_parms_from_attrs(&args, info);
3208                 if (err) {
3209                         retcode = ERR_MANDATORY_TAG;
3210                         drbd_msg_put_info(from_attrs_err_to_txt(err));
3211                         goto out_nolock;
3212                 }
3213         }
3214
3215         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3216
3217         if (!get_ldev(device)) {
3218                 retcode = ERR_NO_DISK;
3219                 goto out;
3220         }
3221
3222         /* this is "skip initial sync", assume to be clean */
3223         if (device->state.conn == C_CONNECTED &&
3224             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3225             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3226                 drbd_info(device, "Preparing to skip initial sync\n");
3227                 skip_initial_sync = 1;
3228         } else if (device->state.conn != C_STANDALONE) {
3229                 retcode = ERR_CONNECTED;
3230                 goto out_dec;
3231         }
3232
3233         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3234         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3235
3236         if (args.clear_bm) {
3237                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3238                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3239                 if (err) {
3240                         drbd_err(device, "Writing bitmap failed with %d\n", err);
3241                         retcode = ERR_IO_MD_DISK;
3242                 }
3243                 if (skip_initial_sync) {
3244                         drbd_send_uuids_skip_initial_sync(device);
3245                         _drbd_uuid_set(device, UI_BITMAP, 0);
3246                         drbd_print_uuids(device, "cleared bitmap UUID");
3247                         spin_lock_irq(&first_peer_device(device)->connection->req_lock);
3248                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3249                                         CS_VERBOSE, NULL);
3250                         spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
3251                 }
3252         }
3253
3254         drbd_md_sync(device);
3255 out_dec:
3256         put_ldev(device);
3257 out:
3258         mutex_unlock(device->state_mutex);
3259 out_nolock:
3260         drbd_adm_finish(info, retcode);
3261         return 0;
3262 }
3263
3264 static enum drbd_ret_code
3265 drbd_check_resource_name(const char *name)
3266 {
3267         if (!name || !name[0]) {
3268                 drbd_msg_put_info("resource name missing");
3269                 return ERR_MANDATORY_TAG;
3270         }
3271         /* if we want to use these in sysfs/configfs/debugfs some day,
3272          * we must not allow slashes */
3273         if (strchr(name, '/')) {
3274                 drbd_msg_put_info("invalid resource name");
3275                 return ERR_INVALID_REQUEST;
3276         }
3277         return NO_ERROR;
3278 }
3279
3280 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3281 {
3282         enum drbd_ret_code retcode;
3283         struct res_opts res_opts;
3284         int err;
3285
3286         retcode = drbd_adm_prepare(skb, info, 0);
3287         if (!adm_ctx.reply_skb)
3288                 return retcode;
3289         if (retcode != NO_ERROR)
3290                 goto out;
3291
3292         set_res_opts_defaults(&res_opts);
3293         err = res_opts_from_attrs(&res_opts, info);
3294         if (err && err != -ENOMSG) {
3295                 retcode = ERR_MANDATORY_TAG;
3296                 drbd_msg_put_info(from_attrs_err_to_txt(err));
3297                 goto out;
3298         }
3299
3300         retcode = drbd_check_resource_name(adm_ctx.resource_name);
3301         if (retcode != NO_ERROR)
3302                 goto out;
3303
3304         if (adm_ctx.resource) {
3305                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3306                         retcode = ERR_INVALID_REQUEST;
3307                         drbd_msg_put_info("resource exists");
3308                 }
3309                 /* else: still NO_ERROR */
3310                 goto out;
3311         }
3312
3313         if (!conn_create(adm_ctx.resource_name, &res_opts))
3314                 retcode = ERR_NOMEM;
3315 out:
3316         drbd_adm_finish(info, retcode);
3317         return 0;
3318 }
3319
3320 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3321 {
3322         struct drbd_genlmsghdr *dh = info->userhdr;
3323         enum drbd_ret_code retcode;
3324
3325         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3326         if (!adm_ctx.reply_skb)
3327                 return retcode;
3328         if (retcode != NO_ERROR)
3329                 goto out;
3330
3331         if (dh->minor > MINORMASK) {
3332                 drbd_msg_put_info("requested minor out of range");
3333                 retcode = ERR_INVALID_REQUEST;
3334                 goto out;
3335         }
3336         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3337                 drbd_msg_put_info("requested volume id out of range");
3338                 retcode = ERR_INVALID_REQUEST;
3339                 goto out;
3340         }
3341
3342         /* drbd_adm_prepare made sure already
3343          * that first_peer_device(device)->connection and device->vnr match the request. */
3344         if (adm_ctx.device) {
3345                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3346                         retcode = ERR_MINOR_EXISTS;
3347                 /* else: still NO_ERROR */
3348                 goto out;
3349         }
3350
3351         retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume);
3352 out:
3353         drbd_adm_finish(info, retcode);
3354         return 0;
3355 }
3356
3357 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3358 {
3359         if (device->state.disk == D_DISKLESS &&
3360             /* no need to be device->state.conn == C_STANDALONE &&
3361              * we may want to delete a minor from a live replication group.
3362              */
3363             device->state.role == R_SECONDARY) {
3364                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3365                                     CS_VERBOSE + CS_WAIT_COMPLETE);
3366                 drbd_delete_device(device);
3367                 return NO_ERROR;
3368         } else
3369                 return ERR_MINOR_CONFIGURED;
3370 }
3371
3372 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3373 {
3374         enum drbd_ret_code retcode;
3375
3376         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3377         if (!adm_ctx.reply_skb)
3378                 return retcode;
3379         if (retcode != NO_ERROR)
3380                 goto out;
3381
3382         retcode = adm_del_minor(adm_ctx.device);
3383 out:
3384         drbd_adm_finish(info, retcode);
3385         return 0;
3386 }
3387
3388 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3389 {
3390         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3391         struct drbd_peer_device *peer_device;
3392         unsigned i;
3393
3394         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3395         if (!adm_ctx.reply_skb)
3396                 return retcode;
3397         if (retcode != NO_ERROR)
3398                 goto out;
3399
3400         /* demote */
3401         idr_for_each_entry(&adm_ctx.connection->peer_devices, peer_device, i) {
3402                 retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3403                 if (retcode < SS_SUCCESS) {
3404                         drbd_msg_put_info("failed to demote");
3405                         goto out;
3406                 }
3407         }
3408
3409         retcode = conn_try_disconnect(adm_ctx.connection, 0);
3410         if (retcode < SS_SUCCESS) {
3411                 drbd_msg_put_info("failed to disconnect");
3412                 goto out;
3413         }
3414
3415         /* detach */
3416         idr_for_each_entry(&adm_ctx.connection->peer_devices, peer_device, i) {
3417                 retcode = adm_detach(peer_device->device, 0);
3418                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3419                         drbd_msg_put_info("failed to detach");
3420                         goto out;
3421                 }
3422         }
3423
3424         /* If we reach this, all volumes (of this connection) are Secondary,
3425          * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3426          * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3427         drbd_thread_stop(&adm_ctx.connection->worker);
3428
3429         /* Now, nothing can fail anymore */
3430
3431         /* delete volumes */
3432         idr_for_each_entry(&adm_ctx.connection->peer_devices, peer_device, i) {
3433                 retcode = adm_del_minor(peer_device->device);
3434                 if (retcode != NO_ERROR) {
3435                         /* "can not happen" */
3436                         drbd_msg_put_info("failed to delete volume");
3437                         goto out;
3438                 }
3439         }
3440
3441         /* delete connection */
3442         if (conn_lowest_minor(adm_ctx.connection) < 0) {
3443                 struct drbd_resource *resource = adm_ctx.connection->resource;
3444
3445                 list_del_rcu(&resource->resources);
3446                 synchronize_rcu();
3447                 drbd_free_resource(resource);
3448
3449                 retcode = NO_ERROR;
3450         } else {
3451                 /* "can not happen" */
3452                 retcode = ERR_RES_IN_USE;
3453                 drbd_msg_put_info("failed to delete connection");
3454         }
3455         goto out;
3456 out:
3457         drbd_adm_finish(info, retcode);
3458         return 0;
3459 }
3460
3461 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3462 {
3463         struct drbd_resource *resource;
3464         struct drbd_connection *connection;
3465         enum drbd_ret_code retcode;
3466
3467         retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3468         if (!adm_ctx.reply_skb)
3469                 return retcode;
3470         if (retcode != NO_ERROR)
3471                 goto out;
3472
3473         resource = adm_ctx.resource;
3474         for_each_connection(connection, resource) {
3475                 if (connection->cstate > C_STANDALONE) {
3476                         retcode = ERR_NET_CONFIGURED;
3477                         goto out;
3478                 }
3479         }
3480         if (!idr_is_empty(&resource->devices)) {
3481                 retcode = ERR_RES_IN_USE;
3482                 goto out;
3483         }
3484
3485         list_del_rcu(&resource->resources);
3486         for_each_connection(connection, resource)
3487                 drbd_thread_stop(&connection->worker);
3488         synchronize_rcu();
3489         drbd_free_resource(resource);
3490         retcode = NO_ERROR;
3491 out:
3492         drbd_adm_finish(info, retcode);
3493         return 0;
3494 }
3495
3496 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3497 {
3498         static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3499         struct sk_buff *msg;
3500         struct drbd_genlmsghdr *d_out;
3501         unsigned seq;
3502         int err = -ENOMEM;
3503
3504         if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3505                 if (time_after(jiffies, device->rs_last_bcast + HZ))
3506                         device->rs_last_bcast = jiffies;
3507                 else
3508                         return;
3509         }
3510
3511         seq = atomic_inc_return(&drbd_genl_seq);
3512         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3513         if (!msg)
3514                 goto failed;
3515
3516         err = -EMSGSIZE;
3517         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3518         if (!d_out) /* cannot happen, but anyways. */
3519                 goto nla_put_failure;
3520         d_out->minor = device_to_minor(device);
3521         d_out->ret_code = NO_ERROR;
3522
3523         if (nla_put_status_info(msg, device, sib))
3524                 goto nla_put_failure;
3525         genlmsg_end(msg, d_out);
3526         err = drbd_genl_multicast_events(msg, 0);
3527         /* msg has been consumed or freed in netlink_broadcast() */
3528         if (err && err != -ESRCH)
3529                 goto failed;
3530
3531         return;
3532
3533 nla_put_failure:
3534         nlmsg_free(msg);
3535 failed:
3536         drbd_err(device, "Error %d while broadcasting event. "
3537                         "Event seq:%u sib_reason:%u\n",
3538                         err, seq, sib->sib_reason);
3539 }