drivers/block/drbd/drbd_worker.c

   1 /*
   2    drbd_worker.c
   3
   4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10    drbd is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    drbd is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with drbd; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24  */
  25
  26 #include <linux/module.h>
  27 #include <linux/drbd.h>
  28 #include <linux/sched.h>
  29 #include <linux/wait.h>
  30 #include <linux/mm.h>
  31 #include <linux/memcontrol.h>
  32 #include <linux/mm_inline.h>
  33 #include <linux/slab.h>
  34 #include <linux/random.h>
  35 #include <linux/string.h>
  36 #include <linux/scatterlist.h>
  37
  38 #include "drbd_int.h"
  39 #include "drbd_protocol.h"
  40 #include "drbd_req.h"
  41
  42 static int w_make_ov_request(struct drbd_work *w, int cancel);
  43
  44
  45 /* endio handlers:
  46  *   drbd_md_io_complete (defined here)
  47  *   drbd_request_endio (defined here)
  48  *   drbd_peer_request_endio (defined here)
  49  *   bm_async_io_complete (defined in drbd_bitmap.c)
  50  *
  51  * For all these callbacks, note the following:
  52  * The callbacks will be called in irq context by the IDE drivers,
  53  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
  54  * Try to get the locking right :)
  55  *
  56  */
  57
  58
  59 /* About the global_state_lock
  60    Each state transition on an device holds a read lock. In case we have
  61    to evaluate the resync after dependencies, we grab a write lock, because
  62    we need stable states on all devices for that.  */
  63 rwlock_t global_state_lock;
  64
  65 /* used for synchronous meta data and bitmap IO
  66  * submitted by drbd_md_sync_page_io()
  67  */
  68 void drbd_md_io_complete(struct bio *bio, int error)
  69 {
  70         struct drbd_md_io *md_io;
  71         struct drbd_device *device;
  72
  73         md_io = (struct drbd_md_io *)bio->bi_private;
  74         device = container_of(md_io, struct drbd_device, md_io);
  75
  76         md_io->error = error;
  77
  78         /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
  79          * to timeout on the lower level device, and eventually detach from it.
  80          * If this io completion runs after that timeout expired, this
  81          * drbd_md_put_buffer() may allow us to finally try and re-attach.
  82          * During normal operation, this only puts that extra reference
  83          * down to 1 again.
  84          * Make sure we first drop the reference, and only then signal
  85          * completion, or we may (in drbd_al_read_log()) cycle so fast into the
  86          * next drbd_md_sync_page_io(), that we trigger the
  87          * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
  88          */
  89         drbd_md_put_buffer(device);
  90         md_io->done = 1;
  91         wake_up(&device->misc_wait);
  92         bio_put(bio);
  93         if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
  94                 put_ldev(device);
  95 }
  96
  97 /* reads on behalf of the partner,
  98  * "submitted" by the receiver
  99  */
 100 static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 101 {
 102         unsigned long flags = 0;
 103         struct drbd_device *device = peer_req->w.device;
 104
 105         spin_lock_irqsave(&device->connection->req_lock, flags);
 106         device->read_cnt += peer_req->i.size >> 9;
 107         list_del(&peer_req->w.list);
 108         if (list_empty(&device->read_ee))
 109                 wake_up(&device->ee_wait);
 110         if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
 111                 __drbd_chk_io_error(device, DRBD_READ_ERROR);
 112         spin_unlock_irqrestore(&device->connection->req_lock, flags);
 113
 114         drbd_queue_work(&device->connection->sender_work, &peer_req->w);
 115         put_ldev(device);
 116 }
 117
 118 /* writes on behalf of the partner, or resync writes,
 119  * "submitted" by the receiver, final stage.  */
 120 static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 121 {
 122         unsigned long flags = 0;
 123         struct drbd_device *device = peer_req->w.device;
 124         struct drbd_interval i;
 125         int do_wake;
 126         u64 block_id;
 127         int do_al_complete_io;
 128
 129         /* after we moved peer_req to done_ee,
 130          * we may no longer access it,
 131          * it may be freed/reused already!
 132          * (as soon as we release the req_lock) */
 133         i = peer_req->i;
 134         do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
 135         block_id = peer_req->block_id;
 136
 137         spin_lock_irqsave(&device->connection->req_lock, flags);
 138         device->writ_cnt += peer_req->i.size >> 9;
 139         list_move_tail(&peer_req->w.list, &device->done_ee);
 140
 141         /*
 142          * Do not remove from the write_requests tree here: we did not send the
 143          * Ack yet and did not wake possibly waiting conflicting requests.
 144          * Removed from the tree from "drbd_process_done_ee" within the
 145          * appropriate w.cb (e_end_block/e_end_resync_block) or from
 146          * _drbd_clear_done_ee.
 147          */
 148
 149         do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
 150
 151         if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
 152                 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
 153         spin_unlock_irqrestore(&device->connection->req_lock, flags);
 154
 155         if (block_id == ID_SYNCER)
 156                 drbd_rs_complete_io(device, i.sector);
 157
 158         if (do_wake)
 159                 wake_up(&device->ee_wait);
 160
 161         if (do_al_complete_io)
 162                 drbd_al_complete_io(device, &i);
 163
 164         wake_asender(device->connection);
 165         put_ldev(device);
 166 }
 167
 168 /* writes on behalf of the partner, or resync writes,
 169  * "submitted" by the receiver.
 170  */
 171 void drbd_peer_request_endio(struct bio *bio, int error)
 172 {
 173         struct drbd_peer_request *peer_req = bio->bi_private;
 174         struct drbd_device *device = peer_req->w.device;
 175         int uptodate = bio_flagged(bio, BIO_UPTODATE);
 176         int is_write = bio_data_dir(bio) == WRITE;
 177
 178         if (error && __ratelimit(&drbd_ratelimit_state))
 179                 dev_warn(DEV, "%s: error=%d s=%llus\n",
 180                                 is_write ? "write" : "read", error,
 181                                 (unsigned long long)peer_req->i.sector);
 182         if (!error && !uptodate) {
 183                 if (__ratelimit(&drbd_ratelimit_state))
 184                         dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
 185                                         is_write ? "write" : "read",
 186                                         (unsigned long long)peer_req->i.sector);
 187                 /* strange behavior of some lower level drivers...
 188                  * fail the request by clearing the uptodate flag,
 189                  * but do not return any error?! */
 190                 error = -EIO;
 191         }
 192
 193         if (error)
 194                 set_bit(__EE_WAS_ERROR, &peer_req->flags);
 195
 196         bio_put(bio); /* no need for the bio anymore */
 197         if (atomic_dec_and_test(&peer_req->pending_bios)) {
 198                 if (is_write)
 199                         drbd_endio_write_sec_final(peer_req);
 200                 else
 201                         drbd_endio_read_sec_final(peer_req);
 202         }
 203 }
 204
 205 /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
 206  */
 207 void drbd_request_endio(struct bio *bio, int error)
 208 {
 209         unsigned long flags;
 210         struct drbd_request *req = bio->bi_private;
 211         struct drbd_device *device = req->w.device;
 212         struct bio_and_error m;
 213         enum drbd_req_event what;
 214         int uptodate = bio_flagged(bio, BIO_UPTODATE);
 215
 216         if (!error && !uptodate) {
 217                 dev_warn(DEV, "p %s: setting error to -EIO\n",
 218                          bio_data_dir(bio) == WRITE ? "write" : "read");
 219                 /* strange behavior of some lower level drivers...
 220                  * fail the request by clearing the uptodate flag,
 221                  * but do not return any error?! */
 222                 error = -EIO;
 223         }
 224
 225
 226         /* If this request was aborted locally before,
 227          * but now was completed "successfully",
 228          * chances are that this caused arbitrary data corruption.
 229          *
 230          * "aborting" requests, or force-detaching the disk, is intended for
 231          * completely blocked/hung local backing devices which do no longer
 232          * complete requests at all, not even do error completions.  In this
 233          * situation, usually a hard-reset and failover is the only way out.
 234          *
 235          * By "aborting", basically faking a local error-completion,
 236          * we allow for a more graceful swichover by cleanly migrating services.
 237          * Still the affected node has to be rebooted "soon".
 238          *
 239          * By completing these requests, we allow the upper layers to re-use
 240          * the associated data pages.
 241          *
 242          * If later the local backing device "recovers", and now DMAs some data
 243          * from disk into the original request pages, in the best case it will
 244          * just put random data into unused pages; but typically it will corrupt
 245          * meanwhile completely unrelated data, causing all sorts of damage.
 246          *
 247          * Which means delayed successful completion,
 248          * especially for READ requests,
 249          * is a reason to panic().
 250          *
 251          * We assume that a delayed *error* completion is OK,
 252          * though we still will complain noisily about it.
 253          */
 254         if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
 255                 if (__ratelimit(&drbd_ratelimit_state))
 256                         dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 257
 258                 if (!error)
 259                         panic("possible random memory corruption caused by delayed completion of aborted local request\n");
 260         }
 261
 262         /* to avoid recursion in __req_mod */
 263         if (unlikely(error)) {
 264                 what = (bio_data_dir(bio) == WRITE)
 265                         ? WRITE_COMPLETED_WITH_ERROR
 266                         : (bio_rw(bio) == READ)
 267                           ? READ_COMPLETED_WITH_ERROR
 268                           : READ_AHEAD_COMPLETED_WITH_ERROR;
 269         } else
 270                 what = COMPLETED_OK;
 271
 272         bio_put(req->private_bio);
 273         req->private_bio = ERR_PTR(error);
 274
 275         /* not req_mod(), we need irqsave here! */
 276         spin_lock_irqsave(&device->connection->req_lock, flags);
 277         __req_mod(req, what, &m);
 278         spin_unlock_irqrestore(&device->connection->req_lock, flags);
 279         put_ldev(device);
 280
 281         if (m.bio)
 282                 complete_master_bio(device, &m);
 283 }
 284
 285 void drbd_csum_ee(struct drbd_device *device, struct crypto_hash *tfm,
 286                   struct drbd_peer_request *peer_req, void *digest)
 287 {
 288         struct hash_desc desc;
 289         struct scatterlist sg;
 290         struct page *page = peer_req->pages;
 291         struct page *tmp;
 292         unsigned len;
 293
 294         desc.tfm = tfm;
 295         desc.flags = 0;
 296
 297         sg_init_table(&sg, 1);
 298         crypto_hash_init(&desc);
 299
 300         while ((tmp = page_chain_next(page))) {
 301                 /* all but the last page will be fully used */
 302                 sg_set_page(&sg, page, PAGE_SIZE, 0);
 303                 crypto_hash_update(&desc, &sg, sg.length);
 304                 page = tmp;
 305         }
 306         /* and now the last, possibly only partially used page */
 307         len = peer_req->i.size & (PAGE_SIZE - 1);
 308         sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
 309         crypto_hash_update(&desc, &sg, sg.length);
 310         crypto_hash_final(&desc, digest);
 311 }
 312
 313 void drbd_csum_bio(struct drbd_device *device, struct crypto_hash *tfm, struct bio *bio, void *digest)
 314 {
 315         struct hash_desc desc;
 316         struct scatterlist sg;
 317         struct bio_vec bvec;
 318         struct bvec_iter iter;
 319
 320         desc.tfm = tfm;
 321         desc.flags = 0;
 322
 323         sg_init_table(&sg, 1);
 324         crypto_hash_init(&desc);
 325
 326         bio_for_each_segment(bvec, bio, iter) {
 327                 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
 328                 crypto_hash_update(&desc, &sg, sg.length);
 329         }
 330         crypto_hash_final(&desc, digest);
 331 }
 332
 333 /* MAYBE merge common code with w_e_end_ov_req */
 334 static int w_e_send_csum(struct drbd_work *w, int cancel)
 335 {
 336         struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
 337         struct drbd_device *device = w->device;
 338         int digest_size;
 339         void *digest;
 340         int err = 0;
 341
 342         if (unlikely(cancel))
 343                 goto out;
 344
 345         if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
 346                 goto out;
 347
 348         digest_size = crypto_hash_digestsize(device->connection->csums_tfm);
 349         digest = kmalloc(digest_size, GFP_NOIO);
 350         if (digest) {
 351                 sector_t sector = peer_req->i.sector;
 352                 unsigned int size = peer_req->i.size;
 353                 drbd_csum_ee(device, device->connection->csums_tfm, peer_req, digest);
 354                 /* Free peer_req and pages before send.
 355                  * In case we block on congestion, we could otherwise run into
 356                  * some distributed deadlock, if the other side blocks on
 357                  * congestion as well, because our receiver blocks in
 358                  * drbd_alloc_pages due to pp_in_use > max_buffers. */
 359                 drbd_free_peer_req(device, peer_req);
 360                 peer_req = NULL;
 361                 inc_rs_pending(device);
 362                 err = drbd_send_drequest_csum(device, sector, size,
 363                                               digest, digest_size,
 364                                               P_CSUM_RS_REQUEST);
 365                 kfree(digest);
 366         } else {
 367                 dev_err(DEV, "kmalloc() of digest failed.\n");
 368                 err = -ENOMEM;
 369         }
 370
 371 out:
 372         if (peer_req)
 373                 drbd_free_peer_req(device, peer_req);
 374
 375         if (unlikely(err))
 376                 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
 377         return err;
 378 }
 379
 380 #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
 381
 382 static int read_for_csum(struct drbd_device *device, sector_t sector, int size)
 383 {
 384         struct drbd_peer_request *peer_req;
 385
 386         if (!get_ldev(device))
 387                 return -EIO;
 388
 389         if (drbd_rs_should_slow_down(device, sector))
 390                 goto defer;
 391
 392         /* GFP_TRY, because if there is no memory available right now, this may
 393          * be rescheduled for later. It is "only" background resync, after all. */
 394         peer_req = drbd_alloc_peer_req(device, ID_SYNCER /* unused */, sector,
 395                                        size, GFP_TRY);
 396         if (!peer_req)
 397                 goto defer;
 398
 399         peer_req->w.cb = w_e_send_csum;
 400         spin_lock_irq(&device->connection->req_lock);
 401         list_add(&peer_req->w.list, &device->read_ee);
 402         spin_unlock_irq(&device->connection->req_lock);
 403
 404         atomic_add(size >> 9, &device->rs_sect_ev);
 405         if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
 406                 return 0;
 407
 408         /* If it failed because of ENOMEM, retry should help.  If it failed
 409          * because bio_add_page failed (probably broken lower level driver),
 410          * retry may or may not help.
 411          * If it does not, you may need to force disconnect. */
 412         spin_lock_irq(&device->connection->req_lock);
 413         list_del(&peer_req->w.list);
 414         spin_unlock_irq(&device->connection->req_lock);
 415
 416         drbd_free_peer_req(device, peer_req);
 417 defer:
 418         put_ldev(device);
 419         return -EAGAIN;
 420 }
 421
 422 int w_resync_timer(struct drbd_work *w, int cancel)
 423 {
 424         struct drbd_device *device = w->device;
 425         switch (device->state.conn) {
 426         case C_VERIFY_S:
 427                 w_make_ov_request(w, cancel);
 428                 break;
 429         case C_SYNC_TARGET:
 430                 w_make_resync_request(w, cancel);
 431                 break;
 432         }
 433
 434         return 0;
 435 }
 436
 437 void resync_timer_fn(unsigned long data)
 438 {
 439         struct drbd_device *device = (struct drbd_device *) data;
 440
 441         if (list_empty(&device->resync_work.list))
 442                 drbd_queue_work(&device->connection->sender_work, &device->resync_work);
 443 }
 444
 445 static void fifo_set(struct fifo_buffer *fb, int value)
 446 {
 447         int i;
 448
 449         for (i = 0; i < fb->size; i++)
 450                 fb->values[i] = value;
 451 }
 452
 453 static int fifo_push(struct fifo_buffer *fb, int value)
 454 {
 455         int ov;
 456
 457         ov = fb->values[fb->head_index];
 458         fb->values[fb->head_index++] = value;
 459
 460         if (fb->head_index >= fb->size)
 461                 fb->head_index = 0;
 462
 463         return ov;
 464 }
 465
 466 static void fifo_add_val(struct fifo_buffer *fb, int value)
 467 {
 468         int i;
 469
 470         for (i = 0; i < fb->size; i++)
 471                 fb->values[i] += value;
 472 }
 473
 474 struct fifo_buffer *fifo_alloc(int fifo_size)
 475 {
 476         struct fifo_buffer *fb;
 477
 478         fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
 479         if (!fb)
 480                 return NULL;
 481
 482         fb->head_index = 0;
 483         fb->size = fifo_size;
 484         fb->total = 0;
 485
 486         return fb;
 487 }
 488
 489 static int drbd_rs_controller(struct drbd_device *device)
 490 {
 491         struct disk_conf *dc;
 492         unsigned int sect_in;  /* Number of sectors that came in since the last turn */
 493         unsigned int want;     /* The number of sectors we want in the proxy */
 494         int req_sect; /* Number of sectors to request in this turn */
 495         int correction; /* Number of sectors more we need in the proxy*/
 496         int cps; /* correction per invocation of drbd_rs_controller() */
 497         int steps; /* Number of time steps to plan ahead */
 498         int curr_corr;
 499         int max_sect;
 500         struct fifo_buffer *plan;
 501
 502         sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
 503         device->rs_in_flight -= sect_in;
 504
 505         dc = rcu_dereference(device->ldev->disk_conf);
 506         plan = rcu_dereference(device->rs_plan_s);
 507
 508         steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
 509
 510         if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
 511                 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
 512         } else { /* normal path */
 513                 want = dc->c_fill_target ? dc->c_fill_target :
 514                         sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
 515         }
 516
 517         correction = want - device->rs_in_flight - plan->total;
 518
 519         /* Plan ahead */
 520         cps = correction / steps;
 521         fifo_add_val(plan, cps);
 522         plan->total += cps * steps;
 523
 524         /* What we do in this step */
 525         curr_corr = fifo_push(plan, 0);
 526         plan->total -= curr_corr;
 527
 528         req_sect = sect_in + curr_corr;
 529         if (req_sect < 0)
 530                 req_sect = 0;
 531
 532         max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
 533         if (req_sect > max_sect)
 534                 req_sect = max_sect;
 535
 536         /*
 537         dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
 538                  sect_in, device->rs_in_flight, want, correction,
 539                  steps, cps, device->rs_planed, curr_corr, req_sect);
 540         */
 541
 542         return req_sect;
 543 }
 544
 545 static int drbd_rs_number_requests(struct drbd_device *device)
 546 {
 547         int number;
 548
 549         rcu_read_lock();
 550         if (rcu_dereference(device->rs_plan_s)->size) {
 551                 number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
 552                 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
 553         } else {
 554                 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
 555                 number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
 556         }
 557         rcu_read_unlock();
 558
 559         /* ignore the amount of pending requests, the resync controller should
 560          * throttle down to incoming reply rate soon enough anyways. */
 561         return number;
 562 }
 563
 564 int w_make_resync_request(struct drbd_work *w, int cancel)
 565 {
 566         struct drbd_device *device = w->device;
 567         unsigned long bit;
 568         sector_t sector;
 569         const sector_t capacity = drbd_get_capacity(device->this_bdev);
 570         int max_bio_size;
 571         int number, rollback_i, size;
 572         int align, queued, sndbuf;
 573         int i = 0;
 574
 575         if (unlikely(cancel))
 576                 return 0;
 577
 578         if (device->rs_total == 0) {
 579                 /* empty resync? */
 580                 drbd_resync_finished(device);
 581                 return 0;
 582         }
 583
 584         if (!get_ldev(device)) {
 585                 /* Since we only need to access device->rsync a
 586                    get_ldev_if_state(device,D_FAILED) would be sufficient, but
 587                    to continue resync with a broken disk makes no sense at
 588                    all */
 589                 dev_err(DEV, "Disk broke down during resync!\n");
 590                 return 0;
 591         }
 592
 593         max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
 594         number = drbd_rs_number_requests(device);
 595         if (number == 0)
 596                 goto requeue;
 597
 598         for (i = 0; i < number; i++) {
 599                 /* Stop generating RS requests, when half of the send buffer is filled */
 600                 mutex_lock(&device->connection->data.mutex);
 601                 if (device->connection->data.socket) {
 602                         queued = device->connection->data.socket->sk->sk_wmem_queued;
 603                         sndbuf = device->connection->data.socket->sk->sk_sndbuf;
 604                 } else {
 605                         queued = 1;
 606                         sndbuf = 0;
 607                 }
 608                 mutex_unlock(&device->connection->data.mutex);
 609                 if (queued > sndbuf / 2)
 610                         goto requeue;
 611
 612 next_sector:
 613                 size = BM_BLOCK_SIZE;
 614                 bit  = drbd_bm_find_next(device, device->bm_resync_fo);
 615
 616                 if (bit == DRBD_END_OF_BITMAP) {
 617                         device->bm_resync_fo = drbd_bm_bits(device);
 618                         put_ldev(device);
 619                         return 0;
 620                 }
 621
 622                 sector = BM_BIT_TO_SECT(bit);
 623
 624                 if (drbd_rs_should_slow_down(device, sector) ||
 625                     drbd_try_rs_begin_io(device, sector)) {
 626                         device->bm_resync_fo = bit;
 627                         goto requeue;
 628                 }
 629                 device->bm_resync_fo = bit + 1;
 630
 631                 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
 632                         drbd_rs_complete_io(device, sector);
 633                         goto next_sector;
 634                 }
 635
 636 #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
 637                 /* try to find some adjacent bits.
 638                  * we stop if we have already the maximum req size.
 639                  *
 640                  * Additionally always align bigger requests, in order to
 641                  * be prepared for all stripe sizes of software RAIDs.
 642                  */
 643                 align = 1;
 644                 rollback_i = i;
 645                 for (;;) {
 646                         if (size + BM_BLOCK_SIZE > max_bio_size)
 647                                 break;
 648
 649                         /* Be always aligned */
 650                         if (sector & ((1<<(align+3))-1))
 651                                 break;
 652
 653                         /* do not cross extent boundaries */
 654                         if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
 655                                 break;
 656                         /* now, is it actually dirty, after all?
 657                          * caution, drbd_bm_test_bit is tri-state for some
 658                          * obscure reason; ( b == 0 ) would get the out-of-band
 659                          * only accidentally right because of the "oddly sized"
 660                          * adjustment below */
 661                         if (drbd_bm_test_bit(device, bit+1) != 1)
 662                                 break;
 663                         bit++;
 664                         size += BM_BLOCK_SIZE;
 665                         if ((BM_BLOCK_SIZE << align) <= size)
 666                                 align++;
 667                         i++;
 668                 }
 669                 /* if we merged some,
 670                  * reset the offset to start the next drbd_bm_find_next from */
 671                 if (size > BM_BLOCK_SIZE)
 672                         device->bm_resync_fo = bit + 1;
 673 #endif
 674
 675                 /* adjust very last sectors, in case we are oddly sized */
 676                 if (sector + (size>>9) > capacity)
 677                         size = (capacity-sector)<<9;
 678                 if (device->connection->agreed_pro_version >= 89 && device->connection->csums_tfm) {
 679                         switch (read_for_csum(device, sector, size)) {
 680                         case -EIO: /* Disk failure */
 681                                 put_ldev(device);
 682                                 return -EIO;
 683                         case -EAGAIN: /* allocation failed, or ldev busy */
 684                                 drbd_rs_complete_io(device, sector);
 685                                 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
 686                                 i = rollback_i;
 687                                 goto requeue;
 688                         case 0:
 689                                 /* everything ok */
 690                                 break;
 691                         default:
 692                                 BUG();
 693                         }
 694                 } else {
 695                         int err;
 696
 697                         inc_rs_pending(device);
 698                         err = drbd_send_drequest(device, P_RS_DATA_REQUEST,
 699                                                  sector, size, ID_SYNCER);
 700                         if (err) {
 701                                 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
 702                                 dec_rs_pending(device);
 703                                 put_ldev(device);
 704                                 return err;
 705                         }
 706                 }
 707         }
 708
 709         if (device->bm_resync_fo >= drbd_bm_bits(device)) {
 710                 /* last syncer _request_ was sent,
 711                  * but the P_RS_DATA_REPLY not yet received.  sync will end (and
 712                  * next sync group will resume), as soon as we receive the last
 713                  * resync data block, and the last bit is cleared.
 714                  * until then resync "work" is "inactive" ...
 715                  */
 716                 put_ldev(device);
 717                 return 0;
 718         }
 719
 720  requeue:
 721         device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 722         mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
 723         put_ldev(device);
 724         return 0;
 725 }
 726
 727 static int w_make_ov_request(struct drbd_work *w, int cancel)
 728 {
 729         struct drbd_device *device = w->device;
 730         int number, i, size;
 731         sector_t sector;
 732         const sector_t capacity = drbd_get_capacity(device->this_bdev);
 733         bool stop_sector_reached = false;
 734
 735         if (unlikely(cancel))
 736                 return 1;
 737
 738         number = drbd_rs_number_requests(device);
 739
 740         sector = device->ov_position;
 741         for (i = 0; i < number; i++) {
 742                 if (sector >= capacity)
 743                         return 1;
 744
 745                 /* We check for "finished" only in the reply path:
 746                  * w_e_end_ov_reply().
 747                  * We need to send at least one request out. */
 748                 stop_sector_reached = i > 0
 749                         && verify_can_do_stop_sector(device)
 750                         && sector >= device->ov_stop_sector;
 751                 if (stop_sector_reached)
 752                         break;
 753
 754                 size = BM_BLOCK_SIZE;
 755
 756                 if (drbd_rs_should_slow_down(device, sector) ||
 757                     drbd_try_rs_begin_io(device, sector)) {
 758                         device->ov_position = sector;
 759                         goto requeue;
 760                 }
 761
 762                 if (sector + (size>>9) > capacity)
 763                         size = (capacity-sector)<<9;
 764
 765                 inc_rs_pending(device);
 766                 if (drbd_send_ov_request(device, sector, size)) {
 767                         dec_rs_pending(device);
 768                         return 0;
 769                 }
 770                 sector += BM_SECT_PER_BIT;
 771         }
 772         device->ov_position = sector;
 773
 774  requeue:
 775         device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 776         if (i == 0 || !stop_sector_reached)
 777                 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
 778         return 1;
 779 }
 780
 781 int w_ov_finished(struct drbd_work *w, int cancel)
 782 {
 783         struct drbd_device *device = w->device;
 784         kfree(w);
 785         ov_out_of_sync_print(device);
 786         drbd_resync_finished(device);
 787
 788         return 0;
 789 }
 790
 791 static int w_resync_finished(struct drbd_work *w, int cancel)
 792 {
 793         struct drbd_device *device = w->device;
 794         kfree(w);
 795
 796         drbd_resync_finished(device);
 797
 798         return 0;
 799 }
 800
 801 static void ping_peer(struct drbd_device *device)
 802 {
 803         struct drbd_connection *connection = device->connection;
 804
 805         clear_bit(GOT_PING_ACK, &connection->flags);
 806         request_ping(connection);
 807         wait_event(connection->ping_wait,
 808                    test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
 809 }
 810
 811 int drbd_resync_finished(struct drbd_device *device)
 812 {
 813         unsigned long db, dt, dbdt;
 814         unsigned long n_oos;
 815         union drbd_state os, ns;
 816         struct drbd_work *w;
 817         char *khelper_cmd = NULL;
 818         int verify_done = 0;
 819
 820         /* Remove all elements from the resync LRU. Since future actions
 821          * might set bits in the (main) bitmap, then the entries in the
 822          * resync LRU would be wrong. */
 823         if (drbd_rs_del_all(device)) {
 824                 /* In case this is not possible now, most probably because
 825                  * there are P_RS_DATA_REPLY Packets lingering on the worker's
 826                  * queue (or even the read operations for those packets
 827                  * is not finished by now).   Retry in 100ms. */
 828
 829                 schedule_timeout_interruptible(HZ / 10);
 830                 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
 831                 if (w) {
 832                         w->cb = w_resync_finished;
 833                         w->device = device;
 834                         drbd_queue_work(&device->connection->sender_work, w);
 835                         return 1;
 836                 }
 837                 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
 838         }
 839
 840         dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
 841         if (dt <= 0)
 842                 dt = 1;
 843
 844         db = device->rs_total;
 845         /* adjust for verify start and stop sectors, respective reached position */
 846         if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
 847                 db -= device->ov_left;
 848
 849         dbdt = Bit2KB(db/dt);
 850         device->rs_paused /= HZ;
 851
 852         if (!get_ldev(device))
 853                 goto out;
 854
 855         ping_peer(device);
 856
 857         spin_lock_irq(&device->connection->req_lock);
 858         os = drbd_read_state(device);
 859
 860         verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
 861
 862         /* This protects us against multiple calls (that can happen in the presence
 863            of application IO), and against connectivity loss just before we arrive here. */
 864         if (os.conn <= C_CONNECTED)
 865                 goto out_unlock;
 866
 867         ns = os;
 868         ns.conn = C_CONNECTED;
 869
 870         dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
 871              verify_done ? "Online verify" : "Resync",
 872              dt + device->rs_paused, device->rs_paused, dbdt);
 873
 874         n_oos = drbd_bm_total_weight(device);
 875
 876         if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
 877                 if (n_oos) {
 878                         dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
 879                               n_oos, Bit2KB(1));
 880                         khelper_cmd = "out-of-sync";
 881                 }
 882         } else {
 883                 D_ASSERT((n_oos - device->rs_failed) == 0);
 884
 885                 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 886                         khelper_cmd = "after-resync-target";
 887
 888                 if (device->connection->csums_tfm && device->rs_total) {
 889                         const unsigned long s = device->rs_same_csum;
 890                         const unsigned long t = device->rs_total;
 891                         const int ratio =
 892                                 (t == 0)     ? 0 :
 893                         (t < 100000) ? ((s*100)/t) : (s/(t/100));
 894                         dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
 895                              "transferred %luK total %luK\n",
 896                              ratio,
 897                              Bit2KB(device->rs_same_csum),
 898                              Bit2KB(device->rs_total - device->rs_same_csum),
 899                              Bit2KB(device->rs_total));
 900                 }
 901         }
 902
 903         if (device->rs_failed) {
 904                 dev_info(DEV, "            %lu failed blocks\n", device->rs_failed);
 905
 906                 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
 907                         ns.disk = D_INCONSISTENT;
 908                         ns.pdsk = D_UP_TO_DATE;
 909                 } else {
 910                         ns.disk = D_UP_TO_DATE;
 911                         ns.pdsk = D_INCONSISTENT;
 912                 }
 913         } else {
 914                 ns.disk = D_UP_TO_DATE;
 915                 ns.pdsk = D_UP_TO_DATE;
 916
 917                 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
 918                         if (device->p_uuid) {
 919                                 int i;
 920                                 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
 921                                         _drbd_uuid_set(device, i, device->p_uuid[i]);
 922                                 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
 923                                 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
 924                         } else {
 925                                 dev_err(DEV, "device->p_uuid is NULL! BUG\n");
 926                         }
 927                 }
 928
 929                 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
 930                         /* for verify runs, we don't update uuids here,
 931                          * so there would be nothing to report. */
 932                         drbd_uuid_set_bm(device, 0UL);
 933                         drbd_print_uuids(device, "updated UUIDs");
 934                         if (device->p_uuid) {
 935                                 /* Now the two UUID sets are equal, update what we
 936                                  * know of the peer. */
 937                                 int i;
 938                                 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
 939                                         device->p_uuid[i] = device->ldev->md.uuid[i];
 940                         }
 941                 }
 942         }
 943
 944         _drbd_set_state(device, ns, CS_VERBOSE, NULL);
 945 out_unlock:
 946         spin_unlock_irq(&device->connection->req_lock);
 947         put_ldev(device);
 948 out:
 949         device->rs_total  = 0;
 950         device->rs_failed = 0;
 951         device->rs_paused = 0;
 952
 953         /* reset start sector, if we reached end of device */
 954         if (verify_done && device->ov_left == 0)
 955                 device->ov_start_sector = 0;
 956
 957         drbd_md_sync(device);
 958
 959         if (khelper_cmd)
 960                 drbd_khelper(device, khelper_cmd);
 961
 962         return 1;
 963 }
 964
 965 /* helper */
 966 static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
 967 {
 968         if (drbd_peer_req_has_active_page(peer_req)) {
 969                 /* This might happen if sendpage() has not finished */
 970                 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
 971                 atomic_add(i, &device->pp_in_use_by_net);
 972                 atomic_sub(i, &device->pp_in_use);
 973                 spin_lock_irq(&device->connection->req_lock);
 974                 list_add_tail(&peer_req->w.list, &device->net_ee);
 975                 spin_unlock_irq(&device->connection->req_lock);
 976                 wake_up(&drbd_pp_wait);
 977         } else
 978                 drbd_free_peer_req(device, peer_req);
 979 }
 980
 981 /**
 982  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
 983  * @device:     DRBD device.
 984  * @w:          work object.
 985  * @cancel:     The connection will be closed anyways
 986  */
 987 int w_e_end_data_req(struct drbd_work *w, int cancel)
 988 {
 989         struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
 990         struct drbd_device *device = w->device;
 991         int err;
 992
 993         if (unlikely(cancel)) {
 994                 drbd_free_peer_req(device, peer_req);
 995                 dec_unacked(device);
 996                 return 0;
 997         }
 998
 999         if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1000                 err = drbd_send_block(device, P_DATA_REPLY, peer_req);
1001         } else {
1002                 if (__ratelimit(&drbd_ratelimit_state))
1003                         dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
1004                             (unsigned long long)peer_req->i.sector);
1005
1006                 err = drbd_send_ack(device, P_NEG_DREPLY, peer_req);
1007         }
1008
1009         dec_unacked(device);
1010
1011         move_to_net_ee_or_free(device, peer_req);
1012
1013         if (unlikely(err))
1014                 dev_err(DEV, "drbd_send_block() failed\n");
1015         return err;
1016 }
1017
1018 /**
1019  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1020  * @device:     DRBD device.
1021  * @w:          work object.
1022  * @cancel:     The connection will be closed anyways
1023  */
1024 int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1025 {
1026         struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1027         struct drbd_device *device = w->device;
1028         int err;
1029
1030         if (unlikely(cancel)) {
1031                 drbd_free_peer_req(device, peer_req);
1032                 dec_unacked(device);
1033                 return 0;
1034         }
1035
1036         if (get_ldev_if_state(device, D_FAILED)) {
1037                 drbd_rs_complete_io(device, peer_req->i.sector);
1038                 put_ldev(device);
1039         }
1040
1041         if (device->state.conn == C_AHEAD) {
1042                 err = drbd_send_ack(device, P_RS_CANCEL, peer_req);
1043         } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1044                 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1045                         inc_rs_pending(device);
1046                         err = drbd_send_block(device, P_RS_DATA_REPLY, peer_req);
1047                 } else {
1048                         if (__ratelimit(&drbd_ratelimit_state))
1049                                 dev_err(DEV, "Not sending RSDataReply, "
1050                                     "partner DISKLESS!\n");
1051                         err = 0;
1052                 }
1053         } else {
1054                 if (__ratelimit(&drbd_ratelimit_state))
1055                         dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
1056                             (unsigned long long)peer_req->i.sector);
1057
1058                 err = drbd_send_ack(device, P_NEG_RS_DREPLY, peer_req);
1059
1060                 /* update resync data with failure */
1061                 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1062         }
1063
1064         dec_unacked(device);
1065
1066         move_to_net_ee_or_free(device, peer_req);
1067
1068         if (unlikely(err))
1069                 dev_err(DEV, "drbd_send_block() failed\n");
1070         return err;
1071 }
1072
1073 int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1074 {
1075         struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1076         struct drbd_device *device = w->device;
1077         struct digest_info *di;
1078         int digest_size;
1079         void *digest = NULL;
1080         int err, eq = 0;
1081
1082         if (unlikely(cancel)) {
1083                 drbd_free_peer_req(device, peer_req);
1084                 dec_unacked(device);
1085                 return 0;
1086         }
1087
1088         if (get_ldev(device)) {
1089                 drbd_rs_complete_io(device, peer_req->i.sector);
1090                 put_ldev(device);
1091         }
1092
1093         di = peer_req->digest;
1094
1095         if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1096                 /* quick hack to try to avoid a race against reconfiguration.
1097                  * a real fix would be much more involved,
1098                  * introducing more locking mechanisms */
1099                 if (device->connection->csums_tfm) {
1100                         digest_size = crypto_hash_digestsize(device->connection->csums_tfm);
1101                         D_ASSERT(digest_size == di->digest_size);
1102                         digest = kmalloc(digest_size, GFP_NOIO);
1103                 }
1104                 if (digest) {
1105                         drbd_csum_ee(device, device->connection->csums_tfm, peer_req, digest);
1106                         eq = !memcmp(digest, di->digest, digest_size);
1107                         kfree(digest);
1108                 }
1109
1110                 if (eq) {
1111                         drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1112                         /* rs_same_csums unit is BM_BLOCK_SIZE */
1113                         device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
1114                         err = drbd_send_ack(device, P_RS_IS_IN_SYNC, peer_req);
1115                 } else {
1116                         inc_rs_pending(device);
1117                         peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1118                         peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1119                         kfree(di);
1120                         err = drbd_send_block(device, P_RS_DATA_REPLY, peer_req);
1121                 }
1122         } else {
1123                 err = drbd_send_ack(device, P_NEG_RS_DREPLY, peer_req);
1124                 if (__ratelimit(&drbd_ratelimit_state))
1125                         dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1126         }
1127
1128         dec_unacked(device);
1129         move_to_net_ee_or_free(device, peer_req);
1130
1131         if (unlikely(err))
1132                 dev_err(DEV, "drbd_send_block/ack() failed\n");
1133         return err;
1134 }
1135
1136 int w_e_end_ov_req(struct drbd_work *w, int cancel)
1137 {
1138         struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1139         struct drbd_device *device = w->device;
1140         sector_t sector = peer_req->i.sector;
1141         unsigned int size = peer_req->i.size;
1142         int digest_size;
1143         void *digest;
1144         int err = 0;
1145
1146         if (unlikely(cancel))
1147                 goto out;
1148
1149         digest_size = crypto_hash_digestsize(device->connection->verify_tfm);
1150         digest = kmalloc(digest_size, GFP_NOIO);
1151         if (!digest) {
1152                 err = 1;        /* terminate the connection in case the allocation failed */
1153                 goto out;
1154         }
1155
1156         if (likely(!(peer_req->flags & EE_WAS_ERROR)))
1157                 drbd_csum_ee(device, device->connection->verify_tfm, peer_req, digest);
1158         else
1159                 memset(digest, 0, digest_size);
1160
1161         /* Free e and pages before send.
1162          * In case we block on congestion, we could otherwise run into
1163          * some distributed deadlock, if the other side blocks on
1164          * congestion as well, because our receiver blocks in
1165          * drbd_alloc_pages due to pp_in_use > max_buffers. */
1166         drbd_free_peer_req(device, peer_req);
1167         peer_req = NULL;
1168         inc_rs_pending(device);
1169         err = drbd_send_drequest_csum(device, sector, size, digest, digest_size, P_OV_REPLY);
1170         if (err)
1171                 dec_rs_pending(device);
1172         kfree(digest);
1173
1174 out:
1175         if (peer_req)
1176                 drbd_free_peer_req(device, peer_req);
1177         dec_unacked(device);
1178         return err;
1179 }
1180
1181 void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1182 {
1183         if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1184                 device->ov_last_oos_size += size>>9;
1185         } else {
1186                 device->ov_last_oos_start = sector;
1187                 device->ov_last_oos_size = size>>9;
1188         }
1189         drbd_set_out_of_sync(device, sector, size);
1190 }
1191
1192 int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1193 {
1194         struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1195         struct drbd_device *device = w->device;
1196         struct digest_info *di;
1197         void *digest;
1198         sector_t sector = peer_req->i.sector;
1199         unsigned int size = peer_req->i.size;
1200         int digest_size;
1201         int err, eq = 0;
1202         bool stop_sector_reached = false;
1203
1204         if (unlikely(cancel)) {
1205                 drbd_free_peer_req(device, peer_req);
1206                 dec_unacked(device);
1207                 return 0;
1208         }
1209
1210         /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1211          * the resync lru has been cleaned up already */
1212         if (get_ldev(device)) {
1213                 drbd_rs_complete_io(device, peer_req->i.sector);
1214                 put_ldev(device);
1215         }
1216
1217         di = peer_req->digest;
1218
1219         if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1220                 digest_size = crypto_hash_digestsize(device->connection->verify_tfm);
1221                 digest = kmalloc(digest_size, GFP_NOIO);
1222                 if (digest) {
1223                         drbd_csum_ee(device, device->connection->verify_tfm, peer_req, digest);
1224
1225                         D_ASSERT(digest_size == di->digest_size);
1226                         eq = !memcmp(digest, di->digest, digest_size);
1227                         kfree(digest);
1228                 }
1229         }
1230
1231         /* Free peer_req and pages before send.
1232          * In case we block on congestion, we could otherwise run into
1233          * some distributed deadlock, if the other side blocks on
1234          * congestion as well, because our receiver blocks in
1235          * drbd_alloc_pages due to pp_in_use > max_buffers. */
1236         drbd_free_peer_req(device, peer_req);
1237         if (!eq)
1238                 drbd_ov_out_of_sync_found(device, sector, size);
1239         else
1240                 ov_out_of_sync_print(device);
1241
1242         err = drbd_send_ack_ex(device, P_OV_RESULT, sector, size,
1243                                eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1244
1245         dec_unacked(device);
1246
1247         --device->ov_left;
1248
1249         /* let's advance progress step marks only for every other megabyte */
1250         if ((device->ov_left & 0x200) == 0x200)
1251                 drbd_advance_rs_marks(device, device->ov_left);
1252
1253         stop_sector_reached = verify_can_do_stop_sector(device) &&
1254                 (sector + (size>>9)) >= device->ov_stop_sector;
1255
1256         if (device->ov_left == 0 || stop_sector_reached) {
1257                 ov_out_of_sync_print(device);
1258                 drbd_resync_finished(device);
1259         }
1260
1261         return err;
1262 }
1263
1264 int w_prev_work_done(struct drbd_work *w, int cancel)
1265 {
1266         struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
1267
1268         complete(&b->done);
1269         return 0;
1270 }
1271
1272 /* FIXME
1273  * We need to track the number of pending barrier acks,
1274  * and to be able to wait for them.
1275  * See also comment in drbd_adm_attach before drbd_suspend_io.
1276  */
1277 static int drbd_send_barrier(struct drbd_connection *connection)
1278 {
1279         struct p_barrier *p;
1280         struct drbd_socket *sock;
1281
1282         sock = &connection->data;
1283         p = conn_prepare_command(connection, sock);
1284         if (!p)
1285                 return -EIO;
1286         p->barrier = connection->send.current_epoch_nr;
1287         p->pad = 0;
1288         connection->send.current_epoch_writes = 0;
1289
1290         return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
1291 }
1292
1293 int w_send_write_hint(struct drbd_work *w, int cancel)
1294 {
1295         struct drbd_device *device = w->device;
1296         struct drbd_socket *sock;
1297
1298         if (cancel)
1299                 return 0;
1300         sock = &device->connection->data;
1301         if (!drbd_prepare_command(device, sock))
1302                 return -EIO;
1303         return drbd_send_command(device, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1304 }
1305
1306 static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
1307 {
1308         if (!connection->send.seen_any_write_yet) {
1309                 connection->send.seen_any_write_yet = true;
1310                 connection->send.current_epoch_nr = epoch;
1311                 connection->send.current_epoch_writes = 0;
1312         }
1313 }
1314
1315 static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
1316 {
1317         /* re-init if first write on this connection */
1318         if (!connection->send.seen_any_write_yet)
1319                 return;
1320         if (connection->send.current_epoch_nr != epoch) {
1321                 if (connection->send.current_epoch_writes)
1322                         drbd_send_barrier(connection);
1323                 connection->send.current_epoch_nr = epoch;
1324         }
1325 }
1326
1327 int w_send_out_of_sync(struct drbd_work *w, int cancel)
1328 {
1329         struct drbd_request *req = container_of(w, struct drbd_request, w);
1330         struct drbd_device *device = w->device;
1331         struct drbd_connection *connection = device->connection;
1332         int err;
1333
1334         if (unlikely(cancel)) {
1335                 req_mod(req, SEND_CANCELED);
1336                 return 0;
1337         }
1338
1339         /* this time, no connection->send.current_epoch_writes++;
1340          * If it was sent, it was the closing barrier for the last
1341          * replicated epoch, before we went into AHEAD mode.
1342          * No more barriers will be sent, until we leave AHEAD mode again. */
1343         maybe_send_barrier(connection, req->epoch);
1344
1345         err = drbd_send_out_of_sync(device, req);
1346         req_mod(req, OOS_HANDED_TO_NETWORK);
1347
1348         return err;
1349 }
1350
1351 /**
1352  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1353  * @device:     DRBD device.
1354  * @w:          work object.
1355  * @cancel:     The connection will be closed anyways
1356  */
1357 int w_send_dblock(struct drbd_work *w, int cancel)
1358 {
1359         struct drbd_request *req = container_of(w, struct drbd_request, w);
1360         struct drbd_device *device = w->device;
1361         struct drbd_connection *connection = device->connection;
1362         int err;
1363
1364         if (unlikely(cancel)) {
1365                 req_mod(req, SEND_CANCELED);
1366                 return 0;
1367         }
1368
1369         re_init_if_first_write(connection, req->epoch);
1370         maybe_send_barrier(connection, req->epoch);
1371         connection->send.current_epoch_writes++;
1372
1373         err = drbd_send_dblock(device, req);
1374         req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1375
1376         return err;
1377 }
1378
1379 /**
1380  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1381  * @device:     DRBD device.
1382  * @w:          work object.
1383  * @cancel:     The connection will be closed anyways
1384  */
1385 int w_send_read_req(struct drbd_work *w, int cancel)
1386 {
1387         struct drbd_request *req = container_of(w, struct drbd_request, w);
1388         struct drbd_device *device = w->device;
1389         struct drbd_connection *connection = device->connection;
1390         int err;
1391
1392         if (unlikely(cancel)) {
1393                 req_mod(req, SEND_CANCELED);
1394                 return 0;
1395         }
1396
1397         /* Even read requests may close a write epoch,
1398          * if there was any yet. */
1399         maybe_send_barrier(connection, req->epoch);
1400
1401         err = drbd_send_drequest(device, P_DATA_REQUEST, req->i.sector, req->i.size,
1402                                  (unsigned long)req);
1403
1404         req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1405
1406         return err;
1407 }
1408
1409 int w_restart_disk_io(struct drbd_work *w, int cancel)
1410 {
1411         struct drbd_request *req = container_of(w, struct drbd_request, w);
1412         struct drbd_device *device = w->device;
1413
1414         if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1415                 drbd_al_begin_io(device, &req->i, false);
1416
1417         drbd_req_make_private_bio(req, req->master_bio);
1418         req->private_bio->bi_bdev = device->ldev->backing_bdev;
1419         generic_make_request(req->private_bio);
1420
1421         return 0;
1422 }
1423
1424 static int _drbd_may_sync_now(struct drbd_device *device)
1425 {
1426         struct drbd_device *odev = device;
1427         int resync_after;
1428
1429         while (1) {
1430                 if (!odev->ldev || odev->state.disk == D_DISKLESS)
1431                         return 1;
1432                 rcu_read_lock();
1433                 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1434                 rcu_read_unlock();
1435                 if (resync_after == -1)
1436                         return 1;
1437                 odev = minor_to_device(resync_after);
1438                 if (!odev)
1439                         return 1;
1440                 if ((odev->state.conn >= C_SYNC_SOURCE &&
1441                      odev->state.conn <= C_PAUSED_SYNC_T) ||
1442                     odev->state.aftr_isp || odev->state.peer_isp ||
1443                     odev->state.user_isp)
1444                         return 0;
1445         }
1446 }
1447
1448 /**
1449  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1450  * @device:     DRBD device.
1451  *
1452  * Called from process context only (admin command and after_state_ch).
1453  */
1454 static int _drbd_pause_after(struct drbd_device *device)
1455 {
1456         struct drbd_device *odev;
1457         int i, rv = 0;
1458
1459         rcu_read_lock();
1460         idr_for_each_entry(&minors, odev, i) {
1461                 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1462                         continue;
1463                 if (!_drbd_may_sync_now(odev))
1464                         rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1465                                != SS_NOTHING_TO_DO);
1466         }
1467         rcu_read_unlock();
1468
1469         return rv;
1470 }
1471
1472 /**
1473  * _drbd_resume_next() - Resume resync on all devices that may resync now
1474  * @device:     DRBD device.
1475  *
1476  * Called from process context only (admin command and worker).
1477  */
1478 static int _drbd_resume_next(struct drbd_device *device)
1479 {
1480         struct drbd_device *odev;
1481         int i, rv = 0;
1482
1483         rcu_read_lock();
1484         idr_for_each_entry(&minors, odev, i) {
1485                 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1486                         continue;
1487                 if (odev->state.aftr_isp) {
1488                         if (_drbd_may_sync_now(odev))
1489                                 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1490                                                         CS_HARD, NULL)
1491                                        != SS_NOTHING_TO_DO) ;
1492                 }
1493         }
1494         rcu_read_unlock();
1495         return rv;
1496 }
1497
1498 void resume_next_sg(struct drbd_device *device)
1499 {
1500         write_lock_irq(&global_state_lock);
1501         _drbd_resume_next(device);
1502         write_unlock_irq(&global_state_lock);
1503 }
1504
1505 void suspend_other_sg(struct drbd_device *device)
1506 {
1507         write_lock_irq(&global_state_lock);
1508         _drbd_pause_after(device);
1509         write_unlock_irq(&global_state_lock);
1510 }
1511
1512 /* caller must hold global_state_lock */
1513 enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1514 {
1515         struct drbd_device *odev;
1516         int resync_after;
1517
1518         if (o_minor == -1)
1519                 return NO_ERROR;
1520         if (o_minor < -1 || o_minor > MINORMASK)
1521                 return ERR_RESYNC_AFTER;
1522
1523         /* check for loops */
1524         odev = minor_to_device(o_minor);
1525         while (1) {
1526                 if (odev == device)
1527                         return ERR_RESYNC_AFTER_CYCLE;
1528
1529                 /* You are free to depend on diskless, non-existing,
1530                  * or not yet/no longer existing minors.
1531                  * We only reject dependency loops.
1532                  * We cannot follow the dependency chain beyond a detached or
1533                  * missing minor.
1534                  */
1535                 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1536                         return NO_ERROR;
1537
1538                 rcu_read_lock();
1539                 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1540                 rcu_read_unlock();
1541                 /* dependency chain ends here, no cycles. */
1542                 if (resync_after == -1)
1543                         return NO_ERROR;
1544
1545                 /* follow the dependency chain */
1546                 odev = minor_to_device(resync_after);
1547         }
1548 }
1549
1550 /* caller must hold global_state_lock */
1551 void drbd_resync_after_changed(struct drbd_device *device)
1552 {
1553         int changes;
1554
1555         do {
1556                 changes  = _drbd_pause_after(device);
1557                 changes |= _drbd_resume_next(device);
1558         } while (changes);
1559 }
1560
1561 void drbd_rs_controller_reset(struct drbd_device *device)
1562 {
1563         struct fifo_buffer *plan;
1564
1565         atomic_set(&device->rs_sect_in, 0);
1566         atomic_set(&device->rs_sect_ev, 0);
1567         device->rs_in_flight = 0;
1568
1569         /* Updating the RCU protected object in place is necessary since
1570            this function gets called from atomic context.
1571            It is valid since all other updates also lead to an completely
1572            empty fifo */
1573         rcu_read_lock();
1574         plan = rcu_dereference(device->rs_plan_s);
1575         plan->total = 0;
1576         fifo_set(plan, 0);
1577         rcu_read_unlock();
1578 }
1579
1580 void start_resync_timer_fn(unsigned long data)
1581 {
1582         struct drbd_device *device = (struct drbd_device *) data;
1583
1584         drbd_queue_work(&device->connection->sender_work, &device->start_resync_work);
1585 }
1586
1587 int w_start_resync(struct drbd_work *w, int cancel)
1588 {
1589         struct drbd_device *device = w->device;
1590
1591         if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1592                 dev_warn(DEV, "w_start_resync later...\n");
1593                 device->start_resync_timer.expires = jiffies + HZ/10;
1594                 add_timer(&device->start_resync_timer);
1595                 return 0;
1596         }
1597
1598         drbd_start_resync(device, C_SYNC_SOURCE);
1599         clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
1600         return 0;
1601 }
1602
1603 /**
1604  * drbd_start_resync() - Start the resync process
1605  * @device:     DRBD device.
1606  * @side:       Either C_SYNC_SOURCE or C_SYNC_TARGET
1607  *
1608  * This function might bring you directly into one of the
1609  * C_PAUSED_SYNC_* states.
1610  */
1611 void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1612 {
1613         union drbd_state ns;
1614         int r;
1615
1616         if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1617                 dev_err(DEV, "Resync already running!\n");
1618                 return;
1619         }
1620
1621         if (!test_bit(B_RS_H_DONE, &device->flags)) {
1622                 if (side == C_SYNC_TARGET) {
1623                         /* Since application IO was locked out during C_WF_BITMAP_T and
1624                            C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1625                            we check that we might make the data inconsistent. */
1626                         r = drbd_khelper(device, "before-resync-target");
1627                         r = (r >> 8) & 0xff;
1628                         if (r > 0) {
1629                                 dev_info(DEV, "before-resync-target handler returned %d, "
1630                                          "dropping connection.\n", r);
1631                                 conn_request_state(device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
1632                                 return;
1633                         }
1634                 } else /* C_SYNC_SOURCE */ {
1635                         r = drbd_khelper(device, "before-resync-source");
1636                         r = (r >> 8) & 0xff;
1637                         if (r > 0) {
1638                                 if (r == 3) {
1639                                         dev_info(DEV, "before-resync-source handler returned %d, "
1640                                                  "ignoring. Old userland tools?", r);
1641                                 } else {
1642                                         dev_info(DEV, "before-resync-source handler returned %d, "
1643                                                  "dropping connection.\n", r);
1644                                         conn_request_state(device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
1645                                         return;
1646                                 }
1647                         }
1648                 }
1649         }
1650
1651         if (current == device->connection->worker.task) {
1652                 /* The worker should not sleep waiting for state_mutex,
1653                    that can take long */
1654                 if (!mutex_trylock(device->state_mutex)) {
1655                         set_bit(B_RS_H_DONE, &device->flags);
1656                         device->start_resync_timer.expires = jiffies + HZ/5;
1657                         add_timer(&device->start_resync_timer);
1658                         return;
1659                 }
1660         } else {
1661                 mutex_lock(device->state_mutex);
1662         }
1663         clear_bit(B_RS_H_DONE, &device->flags);
1664
1665         write_lock_irq(&global_state_lock);
1666         /* Did some connection breakage or IO error race with us? */
1667         if (device->state.conn < C_CONNECTED
1668         || !get_ldev_if_state(device, D_NEGOTIATING)) {
1669                 write_unlock_irq(&global_state_lock);
1670                 mutex_unlock(device->state_mutex);
1671                 return;
1672         }
1673
1674         ns = drbd_read_state(device);
1675
1676         ns.aftr_isp = !_drbd_may_sync_now(device);
1677
1678         ns.conn = side;
1679
1680         if (side == C_SYNC_TARGET)
1681                 ns.disk = D_INCONSISTENT;
1682         else /* side == C_SYNC_SOURCE */
1683                 ns.pdsk = D_INCONSISTENT;
1684
1685         r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1686         ns = drbd_read_state(device);
1687
1688         if (ns.conn < C_CONNECTED)
1689                 r = SS_UNKNOWN_ERROR;
1690
1691         if (r == SS_SUCCESS) {
1692                 unsigned long tw = drbd_bm_total_weight(device);
1693                 unsigned long now = jiffies;
1694                 int i;
1695
1696                 device->rs_failed    = 0;
1697                 device->rs_paused    = 0;
1698                 device->rs_same_csum = 0;
1699                 device->rs_last_events = 0;
1700                 device->rs_last_sect_ev = 0;
1701                 device->rs_total     = tw;
1702                 device->rs_start     = now;
1703                 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1704                         device->rs_mark_left[i] = tw;
1705                         device->rs_mark_time[i] = now;
1706                 }
1707                 _drbd_pause_after(device);
1708         }
1709         write_unlock_irq(&global_state_lock);
1710
1711         if (r == SS_SUCCESS) {
1712                 /* reset rs_last_bcast when a resync or verify is started,
1713                  * to deal with potential jiffies wrap. */
1714                 device->rs_last_bcast = jiffies - HZ;
1715
1716                 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1717                      drbd_conn_str(ns.conn),
1718                      (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1719                      (unsigned long) device->rs_total);
1720                 if (side == C_SYNC_TARGET)
1721                         device->bm_resync_fo = 0;
1722
1723                 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1724                  * with w_send_oos, or the sync target will get confused as to
1725                  * how much bits to resync.  We cannot do that always, because for an
1726                  * empty resync and protocol < 95, we need to do it here, as we call
1727                  * drbd_resync_finished from here in that case.
1728                  * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1729                  * and from after_state_ch otherwise. */
1730                 if (side == C_SYNC_SOURCE && device->connection->agreed_pro_version < 96)
1731                         drbd_gen_and_send_sync_uuid(device);
1732
1733                 if (device->connection->agreed_pro_version < 95 && device->rs_total == 0) {
1734                         /* This still has a race (about when exactly the peers
1735                          * detect connection loss) that can lead to a full sync
1736                          * on next handshake. In 8.3.9 we fixed this with explicit
1737                          * resync-finished notifications, but the fix
1738                          * introduces a protocol change.  Sleeping for some
1739                          * time longer than the ping interval + timeout on the
1740                          * SyncSource, to give the SyncTarget the chance to
1741                          * detect connection loss, then waiting for a ping
1742                          * response (implicit in drbd_resync_finished) reduces
1743                          * the race considerably, but does not solve it. */
1744                         if (side == C_SYNC_SOURCE) {
1745                                 struct net_conf *nc;
1746                                 int timeo;
1747
1748                                 rcu_read_lock();
1749                                 nc = rcu_dereference(device->connection->net_conf);
1750                                 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1751                                 rcu_read_unlock();
1752                                 schedule_timeout_interruptible(timeo);
1753                         }
1754                         drbd_resync_finished(device);
1755                 }
1756
1757                 drbd_rs_controller_reset(device);
1758                 /* ns.conn may already be != device->state.conn,
1759                  * we may have been paused in between, or become paused until
1760                  * the timer triggers.
1761                  * No matter, that is handled in resync_timer_fn() */
1762                 if (ns.conn == C_SYNC_TARGET)
1763                         mod_timer(&device->resync_timer, jiffies);
1764
1765                 drbd_md_sync(device);
1766         }
1767         put_ldev(device);
1768         mutex_unlock(device->state_mutex);
1769 }
1770
1771 /* If the resource already closed the current epoch, but we did not
1772  * (because we have not yet seen new requests), we should send the
1773  * corresponding barrier now.  Must be checked within the same spinlock
1774  * that is used to check for new requests. */
1775 static bool need_to_send_barrier(struct drbd_connection *connection)
1776 {
1777         if (!connection->send.seen_any_write_yet)
1778                 return false;
1779
1780         /* Skip barriers that do not contain any writes.
1781          * This may happen during AHEAD mode. */
1782         if (!connection->send.current_epoch_writes)
1783                 return false;
1784
1785         /* ->req_lock is held when requests are queued on
1786          * connection->sender_work, and put into ->transfer_log.
1787          * It is also held when ->current_tle_nr is increased.
1788          * So either there are already new requests queued,
1789          * and corresponding barriers will be send there.
1790          * Or nothing new is queued yet, so the difference will be 1.
1791          */
1792         if (atomic_read(&connection->current_tle_nr) !=
1793             connection->send.current_epoch_nr + 1)
1794                 return false;
1795
1796         return true;
1797 }
1798
1799 static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
1800 {
1801         spin_lock_irq(&queue->q_lock);
1802         list_splice_init(&queue->q, work_list);
1803         spin_unlock_irq(&queue->q_lock);
1804         return !list_empty(work_list);
1805 }
1806
1807 static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
1808 {
1809         spin_lock_irq(&queue->q_lock);
1810         if (!list_empty(&queue->q))
1811                 list_move(queue->q.next, work_list);
1812         spin_unlock_irq(&queue->q_lock);
1813         return !list_empty(work_list);
1814 }
1815
1816 static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
1817 {
1818         DEFINE_WAIT(wait);
1819         struct net_conf *nc;
1820         int uncork, cork;
1821
1822         dequeue_work_item(&connection->sender_work, work_list);
1823         if (!list_empty(work_list))
1824                 return;
1825
1826         /* Still nothing to do?
1827          * Maybe we still need to close the current epoch,
1828          * even if no new requests are queued yet.
1829          *
1830          * Also, poke TCP, just in case.
1831          * Then wait for new work (or signal). */
1832         rcu_read_lock();
1833         nc = rcu_dereference(connection->net_conf);
1834         uncork = nc ? nc->tcp_cork : 0;
1835         rcu_read_unlock();
1836         if (uncork) {
1837                 mutex_lock(&connection->data.mutex);
1838                 if (connection->data.socket)
1839                         drbd_tcp_uncork(connection->data.socket);
1840                 mutex_unlock(&connection->data.mutex);
1841         }
1842
1843         for (;;) {
1844                 int send_barrier;
1845                 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
1846                 spin_lock_irq(&connection->req_lock);
1847                 spin_lock(&connection->sender_work.q_lock);     /* FIXME get rid of this one? */
1848                 /* dequeue single item only,
1849                  * we still use drbd_queue_work_front() in some places */
1850                 if (!list_empty(&connection->sender_work.q))
1851                         list_move(connection->sender_work.q.next, work_list);
1852                 spin_unlock(&connection->sender_work.q_lock);   /* FIXME get rid of this one? */
1853                 if (!list_empty(work_list) || signal_pending(current)) {
1854                         spin_unlock_irq(&connection->req_lock);
1855                         break;
1856                 }
1857                 send_barrier = need_to_send_barrier(connection);
1858                 spin_unlock_irq(&connection->req_lock);
1859                 if (send_barrier) {
1860                         drbd_send_barrier(connection);
1861                         connection->send.current_epoch_nr++;
1862                 }
1863                 schedule();
1864                 /* may be woken up for other things but new work, too,
1865                  * e.g. if the current epoch got closed.
1866                  * In which case we send the barrier above. */
1867         }
1868         finish_wait(&connection->sender_work.q_wait, &wait);
1869
1870         /* someone may have changed the config while we have been waiting above. */
1871         rcu_read_lock();
1872         nc = rcu_dereference(connection->net_conf);
1873         cork = nc ? nc->tcp_cork : 0;
1874         rcu_read_unlock();
1875         mutex_lock(&connection->data.mutex);
1876         if (connection->data.socket) {
1877                 if (cork)
1878                         drbd_tcp_cork(connection->data.socket);
1879                 else if (!uncork)
1880                         drbd_tcp_uncork(connection->data.socket);
1881         }
1882         mutex_unlock(&connection->data.mutex);
1883 }
1884
1885 int drbd_worker(struct drbd_thread *thi)
1886 {
1887         struct drbd_connection *connection = thi->connection;
1888         struct drbd_work *w = NULL;
1889         struct drbd_device *device;
1890         LIST_HEAD(work_list);
1891         int vnr;
1892
1893         while (get_t_state(thi) == RUNNING) {
1894                 drbd_thread_current_set_cpu(thi);
1895
1896                 /* as long as we use drbd_queue_work_front(),
1897                  * we may only dequeue single work items here, not batches. */
1898                 if (list_empty(&work_list))
1899                         wait_for_work(connection, &work_list);
1900
1901                 if (signal_pending(current)) {
1902                         flush_signals(current);
1903                         if (get_t_state(thi) == RUNNING) {
1904                                 conn_warn(connection, "Worker got an unexpected signal\n");
1905                                 continue;
1906                         }
1907                         break;
1908                 }
1909
1910                 if (get_t_state(thi) != RUNNING)
1911                         break;
1912
1913                 while (!list_empty(&work_list)) {
1914                         w = list_first_entry(&work_list, struct drbd_work, list);
1915                         list_del_init(&w->list);
1916                         if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
1917                                 continue;
1918                         if (connection->cstate >= C_WF_REPORT_PARAMS)
1919                                 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
1920                 }
1921         }
1922
1923         do {
1924                 while (!list_empty(&work_list)) {
1925                         w = list_first_entry(&work_list, struct drbd_work, list);
1926                         list_del_init(&w->list);
1927                         w->cb(w, 1);
1928                 }
1929                 dequeue_work_batch(&connection->sender_work, &work_list);
1930         } while (!list_empty(&work_list));
1931
1932         rcu_read_lock();
1933         idr_for_each_entry(&connection->volumes, device, vnr) {
1934                 D_ASSERT(device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
1935                 kref_get(&device->kref);
1936                 rcu_read_unlock();
1937                 drbd_device_cleanup(device);
1938                 kref_put(&device->kref, &drbd_minor_destroy);
1939                 rcu_read_lock();
1940         }
1941         rcu_read_unlock();
1942
1943         return 0;
1944 }