drbd: Turn w_make_ov_request and make_resync_request into "normal" functions
[deliverable/linux.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
84b8c06b 24*/
b411b363 25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
a3603a6e 39#include "drbd_protocol.h"
b411b363 40#include "drbd_req.h"
b411b363 41
d448a2e1
AG
42static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
b411b363 44
c5a91619
AG
45/* endio handlers:
46 * drbd_md_io_complete (defined here)
fcefa62e
AG
47 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
c5a91619
AG
49 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
b411b363
PR
51 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
95f8efd0 61 to evaluate the resync after dependencies, we grab a write lock, because
b411b363
PR
62 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
b30ab791 71 struct drbd_device *device;
b411b363
PR
72
73 md_io = (struct drbd_md_io *)bio->bi_private;
b30ab791 74 device = container_of(md_io, struct drbd_device, md_io);
cdfda633 75
b411b363
PR
76 md_io->error = error;
77
0cfac5dd
PR
78 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
b30ab791 87 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
0cfac5dd 88 */
b30ab791 89 drbd_md_put_buffer(device);
cdfda633 90 md_io->done = 1;
b30ab791 91 wake_up(&device->misc_wait);
cdfda633 92 bio_put(bio);
b30ab791
AG
93 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(device);
b411b363
PR
95}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
a186e478 100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
101{
102 unsigned long flags = 0;
a8cd15ba 103 struct drbd_device *device = peer_req->peer_device->device;
b411b363 104
0500813f 105 spin_lock_irqsave(&device->resource->req_lock, flags);
b30ab791 106 device->read_cnt += peer_req->i.size >> 9;
a8cd15ba 107 list_del(&peer_req->w.list);
b30ab791
AG
108 if (list_empty(&device->read_ee))
109 wake_up(&device->ee_wait);
db830c46 110 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
b30ab791 111 __drbd_chk_io_error(device, DRBD_READ_ERROR);
0500813f 112 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b411b363 113
84b8c06b 114 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
a8cd15ba 115 &peer_req->w);
b30ab791 116 put_ldev(device);
b411b363
PR
117}
118
119/* writes on behalf of the partner, or resync writes,
45bb912b 120 * "submitted" by the receiver, final stage. */
db830c46 121static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
122{
123 unsigned long flags = 0;
a8cd15ba 124 struct drbd_device *device = peer_req->peer_device->device;
181286ad 125 struct drbd_interval i;
b411b363 126 int do_wake;
579b57ed 127 u64 block_id;
b411b363 128 int do_al_complete_io;
b411b363 129
db830c46 130 /* after we moved peer_req to done_ee,
b411b363
PR
131 * we may no longer access it,
132 * it may be freed/reused already!
133 * (as soon as we release the req_lock) */
181286ad 134 i = peer_req->i;
db830c46
AG
135 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
136 block_id = peer_req->block_id;
b411b363 137
0500813f 138 spin_lock_irqsave(&device->resource->req_lock, flags);
b30ab791 139 device->writ_cnt += peer_req->i.size >> 9;
a8cd15ba 140 list_move_tail(&peer_req->w.list, &device->done_ee);
b411b363 141
bb3bfe96 142 /*
5e472264 143 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
144 * Ack yet and did not wake possibly waiting conflicting requests.
145 * Removed from the tree from "drbd_process_done_ee" within the
84b8c06b 146 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
bb3bfe96
AG
147 * _drbd_clear_done_ee.
148 */
b411b363 149
b30ab791 150 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
b411b363 151
db830c46 152 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
b30ab791 153 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
0500813f 154 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b411b363 155
579b57ed 156 if (block_id == ID_SYNCER)
b30ab791 157 drbd_rs_complete_io(device, i.sector);
b411b363
PR
158
159 if (do_wake)
b30ab791 160 wake_up(&device->ee_wait);
b411b363
PR
161
162 if (do_al_complete_io)
b30ab791 163 drbd_al_complete_io(device, &i);
b411b363 164
a6b32bc3 165 wake_asender(first_peer_device(device)->connection);
b30ab791 166 put_ldev(device);
45bb912b 167}
b411b363 168
45bb912b
LE
169/* writes on behalf of the partner, or resync writes,
170 * "submitted" by the receiver.
171 */
fcefa62e 172void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 173{
db830c46 174 struct drbd_peer_request *peer_req = bio->bi_private;
a8cd15ba 175 struct drbd_device *device = peer_req->peer_device->device;
45bb912b
LE
176 int uptodate = bio_flagged(bio, BIO_UPTODATE);
177 int is_write = bio_data_dir(bio) == WRITE;
178
07194272 179 if (error && __ratelimit(&drbd_ratelimit_state))
d0180171 180 drbd_warn(device, "%s: error=%d s=%llus\n",
45bb912b 181 is_write ? "write" : "read", error,
db830c46 182 (unsigned long long)peer_req->i.sector);
45bb912b 183 if (!error && !uptodate) {
07194272 184 if (__ratelimit(&drbd_ratelimit_state))
d0180171 185 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
07194272 186 is_write ? "write" : "read",
db830c46 187 (unsigned long long)peer_req->i.sector);
45bb912b
LE
188 /* strange behavior of some lower level drivers...
189 * fail the request by clearing the uptodate flag,
190 * but do not return any error?! */
191 error = -EIO;
192 }
193
194 if (error)
db830c46 195 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
196
197 bio_put(bio); /* no need for the bio anymore */
db830c46 198 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 199 if (is_write)
db830c46 200 drbd_endio_write_sec_final(peer_req);
45bb912b 201 else
db830c46 202 drbd_endio_read_sec_final(peer_req);
45bb912b 203 }
b411b363
PR
204}
205
206/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
207 */
fcefa62e 208void drbd_request_endio(struct bio *bio, int error)
b411b363 209{
a115413d 210 unsigned long flags;
b411b363 211 struct drbd_request *req = bio->bi_private;
84b8c06b 212 struct drbd_device *device = req->device;
a115413d 213 struct bio_and_error m;
b411b363
PR
214 enum drbd_req_event what;
215 int uptodate = bio_flagged(bio, BIO_UPTODATE);
216
b411b363 217 if (!error && !uptodate) {
d0180171 218 drbd_warn(device, "p %s: setting error to -EIO\n",
b411b363
PR
219 bio_data_dir(bio) == WRITE ? "write" : "read");
220 /* strange behavior of some lower level drivers...
221 * fail the request by clearing the uptodate flag,
222 * but do not return any error?! */
223 error = -EIO;
224 }
225
1b6dd252
PR
226
227 /* If this request was aborted locally before,
228 * but now was completed "successfully",
229 * chances are that this caused arbitrary data corruption.
230 *
231 * "aborting" requests, or force-detaching the disk, is intended for
232 * completely blocked/hung local backing devices which do no longer
233 * complete requests at all, not even do error completions. In this
234 * situation, usually a hard-reset and failover is the only way out.
235 *
236 * By "aborting", basically faking a local error-completion,
237 * we allow for a more graceful swichover by cleanly migrating services.
238 * Still the affected node has to be rebooted "soon".
239 *
240 * By completing these requests, we allow the upper layers to re-use
241 * the associated data pages.
242 *
243 * If later the local backing device "recovers", and now DMAs some data
244 * from disk into the original request pages, in the best case it will
245 * just put random data into unused pages; but typically it will corrupt
246 * meanwhile completely unrelated data, causing all sorts of damage.
247 *
248 * Which means delayed successful completion,
249 * especially for READ requests,
250 * is a reason to panic().
251 *
252 * We assume that a delayed *error* completion is OK,
253 * though we still will complain noisily about it.
254 */
255 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
256 if (__ratelimit(&drbd_ratelimit_state))
d0180171 257 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
1b6dd252
PR
258
259 if (!error)
260 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
261 }
262
b411b363
PR
263 /* to avoid recursion in __req_mod */
264 if (unlikely(error)) {
265 what = (bio_data_dir(bio) == WRITE)
8554df1c 266 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 267 : (bio_rw(bio) == READ)
8554df1c
AG
268 ? READ_COMPLETED_WITH_ERROR
269 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 270 } else
8554df1c 271 what = COMPLETED_OK;
b411b363
PR
272
273 bio_put(req->private_bio);
274 req->private_bio = ERR_PTR(error);
275
a115413d 276 /* not req_mod(), we need irqsave here! */
0500813f 277 spin_lock_irqsave(&device->resource->req_lock, flags);
a115413d 278 __req_mod(req, what, &m);
0500813f 279 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b30ab791 280 put_ldev(device);
a115413d
LE
281
282 if (m.bio)
b30ab791 283 complete_master_bio(device, &m);
b411b363
PR
284}
285
79a3c8d3 286void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
287{
288 struct hash_desc desc;
289 struct scatterlist sg;
db830c46 290 struct page *page = peer_req->pages;
45bb912b
LE
291 struct page *tmp;
292 unsigned len;
293
294 desc.tfm = tfm;
295 desc.flags = 0;
296
297 sg_init_table(&sg, 1);
298 crypto_hash_init(&desc);
299
300 while ((tmp = page_chain_next(page))) {
301 /* all but the last page will be fully used */
302 sg_set_page(&sg, page, PAGE_SIZE, 0);
303 crypto_hash_update(&desc, &sg, sg.length);
304 page = tmp;
305 }
306 /* and now the last, possibly only partially used page */
db830c46 307 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
308 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
309 crypto_hash_update(&desc, &sg, sg.length);
310 crypto_hash_final(&desc, digest);
311}
312
79a3c8d3 313void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
314{
315 struct hash_desc desc;
316 struct scatterlist sg;
7988613b
KO
317 struct bio_vec bvec;
318 struct bvec_iter iter;
b411b363
PR
319
320 desc.tfm = tfm;
321 desc.flags = 0;
322
323 sg_init_table(&sg, 1);
324 crypto_hash_init(&desc);
325
7988613b
KO
326 bio_for_each_segment(bvec, bio, iter) {
327 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
b411b363
PR
328 crypto_hash_update(&desc, &sg, sg.length);
329 }
330 crypto_hash_final(&desc, digest);
331}
332
9676c760 333/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 334static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 335{
a8cd15ba
AG
336 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
337 struct drbd_device *device = peer_req->peer_device->device;
b411b363
PR
338 int digest_size;
339 void *digest;
99920dc5 340 int err = 0;
b411b363 341
53ea4331
LE
342 if (unlikely(cancel))
343 goto out;
b411b363 344
9676c760 345 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 346 goto out;
b411b363 347
a6b32bc3 348 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->csums_tfm);
53ea4331
LE
349 digest = kmalloc(digest_size, GFP_NOIO);
350 if (digest) {
db830c46
AG
351 sector_t sector = peer_req->i.sector;
352 unsigned int size = peer_req->i.size;
79a3c8d3 353 drbd_csum_ee(first_peer_device(device)->connection->csums_tfm, peer_req, digest);
9676c760 354 /* Free peer_req and pages before send.
53ea4331
LE
355 * In case we block on congestion, we could otherwise run into
356 * some distributed deadlock, if the other side blocks on
357 * congestion as well, because our receiver blocks in
c37c8ecf 358 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 359 drbd_free_peer_req(device, peer_req);
db830c46 360 peer_req = NULL;
b30ab791 361 inc_rs_pending(device);
69a22773 362 err = drbd_send_drequest_csum(first_peer_device(device), sector, size,
db1b0b72
AG
363 digest, digest_size,
364 P_CSUM_RS_REQUEST);
53ea4331
LE
365 kfree(digest);
366 } else {
d0180171 367 drbd_err(device, "kmalloc() of digest failed.\n");
99920dc5 368 err = -ENOMEM;
53ea4331 369 }
b411b363 370
53ea4331 371out:
db830c46 372 if (peer_req)
b30ab791 373 drbd_free_peer_req(device, peer_req);
b411b363 374
99920dc5 375 if (unlikely(err))
d0180171 376 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
99920dc5 377 return err;
b411b363
PR
378}
379
380#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
381
69a22773 382static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
b411b363 383{
69a22773 384 struct drbd_device *device = peer_device->device;
db830c46 385 struct drbd_peer_request *peer_req;
b411b363 386
b30ab791 387 if (!get_ldev(device))
80a40e43 388 return -EIO;
b411b363 389
b30ab791 390 if (drbd_rs_should_slow_down(device, sector))
0f0601f4
LE
391 goto defer;
392
b411b363
PR
393 /* GFP_TRY, because if there is no memory available right now, this may
394 * be rescheduled for later. It is "only" background resync, after all. */
69a22773 395 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
0db55363 396 size, GFP_TRY);
db830c46 397 if (!peer_req)
80a40e43 398 goto defer;
b411b363 399
a8cd15ba 400 peer_req->w.cb = w_e_send_csum;
0500813f 401 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 402 list_add(&peer_req->w.list, &device->read_ee);
0500813f 403 spin_unlock_irq(&device->resource->req_lock);
b411b363 404
b30ab791
AG
405 atomic_add(size >> 9, &device->rs_sect_ev);
406 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 407 return 0;
b411b363 408
10f6d992
LE
409 /* If it failed because of ENOMEM, retry should help. If it failed
410 * because bio_add_page failed (probably broken lower level driver),
411 * retry may or may not help.
412 * If it does not, you may need to force disconnect. */
0500813f 413 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 414 list_del(&peer_req->w.list);
0500813f 415 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 416
b30ab791 417 drbd_free_peer_req(device, peer_req);
80a40e43 418defer:
b30ab791 419 put_ldev(device);
80a40e43 420 return -EAGAIN;
b411b363
PR
421}
422
99920dc5 423int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 424{
84b8c06b
AG
425 struct drbd_device *device =
426 container_of(w, struct drbd_device, resync_work);
427
b30ab791 428 switch (device->state.conn) {
63106d3c 429 case C_VERIFY_S:
d448a2e1 430 make_ov_request(device, cancel);
63106d3c
PR
431 break;
432 case C_SYNC_TARGET:
d448a2e1 433 make_resync_request(device, cancel);
63106d3c 434 break;
b411b363
PR
435 }
436
99920dc5 437 return 0;
794abb75
PR
438}
439
440void resync_timer_fn(unsigned long data)
441{
b30ab791 442 struct drbd_device *device = (struct drbd_device *) data;
794abb75 443
b30ab791 444 if (list_empty(&device->resync_work.list))
84b8c06b
AG
445 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
446 &device->resync_work);
b411b363
PR
447}
448
778f271d
PR
449static void fifo_set(struct fifo_buffer *fb, int value)
450{
451 int i;
452
453 for (i = 0; i < fb->size; i++)
f10f2623 454 fb->values[i] = value;
778f271d
PR
455}
456
457static int fifo_push(struct fifo_buffer *fb, int value)
458{
459 int ov;
460
461 ov = fb->values[fb->head_index];
462 fb->values[fb->head_index++] = value;
463
464 if (fb->head_index >= fb->size)
465 fb->head_index = 0;
466
467 return ov;
468}
469
470static void fifo_add_val(struct fifo_buffer *fb, int value)
471{
472 int i;
473
474 for (i = 0; i < fb->size; i++)
475 fb->values[i] += value;
476}
477
9958c857
PR
478struct fifo_buffer *fifo_alloc(int fifo_size)
479{
480 struct fifo_buffer *fb;
481
8747d30a 482 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
9958c857
PR
483 if (!fb)
484 return NULL;
485
486 fb->head_index = 0;
487 fb->size = fifo_size;
488 fb->total = 0;
489
490 return fb;
491}
492
b30ab791 493static int drbd_rs_controller(struct drbd_device *device)
778f271d 494{
daeda1cc 495 struct disk_conf *dc;
778f271d
PR
496 unsigned int sect_in; /* Number of sectors that came in since the last turn */
497 unsigned int want; /* The number of sectors we want in the proxy */
498 int req_sect; /* Number of sectors to request in this turn */
499 int correction; /* Number of sectors more we need in the proxy*/
500 int cps; /* correction per invocation of drbd_rs_controller() */
501 int steps; /* Number of time steps to plan ahead */
502 int curr_corr;
503 int max_sect;
813472ce 504 struct fifo_buffer *plan;
778f271d 505
b30ab791
AG
506 sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
507 device->rs_in_flight -= sect_in;
778f271d 508
b30ab791
AG
509 dc = rcu_dereference(device->ldev->disk_conf);
510 plan = rcu_dereference(device->rs_plan_s);
778f271d 511
813472ce 512 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d 513
b30ab791 514 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 515 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 516 } else { /* normal path */
daeda1cc
PR
517 want = dc->c_fill_target ? dc->c_fill_target :
518 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
519 }
520
b30ab791 521 correction = want - device->rs_in_flight - plan->total;
778f271d
PR
522
523 /* Plan ahead */
524 cps = correction / steps;
813472ce
PR
525 fifo_add_val(plan, cps);
526 plan->total += cps * steps;
778f271d
PR
527
528 /* What we do in this step */
813472ce
PR
529 curr_corr = fifo_push(plan, 0);
530 plan->total -= curr_corr;
778f271d
PR
531
532 req_sect = sect_in + curr_corr;
533 if (req_sect < 0)
534 req_sect = 0;
535
daeda1cc 536 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
537 if (req_sect > max_sect)
538 req_sect = max_sect;
539
540 /*
d0180171 541 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
b30ab791
AG
542 sect_in, device->rs_in_flight, want, correction,
543 steps, cps, device->rs_planed, curr_corr, req_sect);
778f271d
PR
544 */
545
546 return req_sect;
547}
548
b30ab791 549static int drbd_rs_number_requests(struct drbd_device *device)
e65f440d
LE
550{
551 int number;
813472ce
PR
552
553 rcu_read_lock();
b30ab791
AG
554 if (rcu_dereference(device->rs_plan_s)->size) {
555 number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
556 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
e65f440d 557 } else {
b30ab791
AG
558 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
559 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
e65f440d 560 }
813472ce 561 rcu_read_unlock();
e65f440d 562
e65f440d
LE
563 /* ignore the amount of pending requests, the resync controller should
564 * throttle down to incoming reply rate soon enough anyways. */
565 return number;
566}
567
d448a2e1 568static int make_resync_request(struct drbd_device *device, int cancel)
b411b363
PR
569{
570 unsigned long bit;
571 sector_t sector;
b30ab791 572 const sector_t capacity = drbd_get_capacity(device->this_bdev);
1816a2b4 573 int max_bio_size;
e65f440d 574 int number, rollback_i, size;
b411b363 575 int align, queued, sndbuf;
0f0601f4 576 int i = 0;
b411b363
PR
577
578 if (unlikely(cancel))
99920dc5 579 return 0;
b411b363 580
b30ab791 581 if (device->rs_total == 0) {
af85e8e8 582 /* empty resync? */
b30ab791 583 drbd_resync_finished(device);
99920dc5 584 return 0;
af85e8e8
LE
585 }
586
b30ab791
AG
587 if (!get_ldev(device)) {
588 /* Since we only need to access device->rsync a
589 get_ldev_if_state(device,D_FAILED) would be sufficient, but
b411b363
PR
590 to continue resync with a broken disk makes no sense at
591 all */
d0180171 592 drbd_err(device, "Disk broke down during resync!\n");
99920dc5 593 return 0;
b411b363
PR
594 }
595
b30ab791
AG
596 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
597 number = drbd_rs_number_requests(device);
e65f440d 598 if (number == 0)
0f0601f4 599 goto requeue;
b411b363 600
b411b363
PR
601 for (i = 0; i < number; i++) {
602 /* Stop generating RS requests, when half of the send buffer is filled */
a6b32bc3
AG
603 mutex_lock(&first_peer_device(device)->connection->data.mutex);
604 if (first_peer_device(device)->connection->data.socket) {
605 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
606 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
b411b363
PR
607 } else {
608 queued = 1;
609 sndbuf = 0;
610 }
a6b32bc3 611 mutex_unlock(&first_peer_device(device)->connection->data.mutex);
b411b363
PR
612 if (queued > sndbuf / 2)
613 goto requeue;
614
615next_sector:
616 size = BM_BLOCK_SIZE;
b30ab791 617 bit = drbd_bm_find_next(device, device->bm_resync_fo);
b411b363 618
4b0715f0 619 if (bit == DRBD_END_OF_BITMAP) {
b30ab791
AG
620 device->bm_resync_fo = drbd_bm_bits(device);
621 put_ldev(device);
99920dc5 622 return 0;
b411b363
PR
623 }
624
625 sector = BM_BIT_TO_SECT(bit);
626
b30ab791
AG
627 if (drbd_rs_should_slow_down(device, sector) ||
628 drbd_try_rs_begin_io(device, sector)) {
629 device->bm_resync_fo = bit;
b411b363
PR
630 goto requeue;
631 }
b30ab791 632 device->bm_resync_fo = bit + 1;
b411b363 633
b30ab791
AG
634 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
635 drbd_rs_complete_io(device, sector);
b411b363
PR
636 goto next_sector;
637 }
638
1816a2b4 639#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
640 /* try to find some adjacent bits.
641 * we stop if we have already the maximum req size.
642 *
643 * Additionally always align bigger requests, in order to
644 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
645 */
646 align = 1;
d207450c 647 rollback_i = i;
b411b363 648 for (;;) {
1816a2b4 649 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
650 break;
651
652 /* Be always aligned */
653 if (sector & ((1<<(align+3))-1))
654 break;
655
656 /* do not cross extent boundaries */
657 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
658 break;
659 /* now, is it actually dirty, after all?
660 * caution, drbd_bm_test_bit is tri-state for some
661 * obscure reason; ( b == 0 ) would get the out-of-band
662 * only accidentally right because of the "oddly sized"
663 * adjustment below */
b30ab791 664 if (drbd_bm_test_bit(device, bit+1) != 1)
b411b363
PR
665 break;
666 bit++;
667 size += BM_BLOCK_SIZE;
668 if ((BM_BLOCK_SIZE << align) <= size)
669 align++;
670 i++;
671 }
672 /* if we merged some,
673 * reset the offset to start the next drbd_bm_find_next from */
674 if (size > BM_BLOCK_SIZE)
b30ab791 675 device->bm_resync_fo = bit + 1;
b411b363
PR
676#endif
677
678 /* adjust very last sectors, in case we are oddly sized */
679 if (sector + (size>>9) > capacity)
680 size = (capacity-sector)<<9;
a6b32bc3
AG
681 if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
682 first_peer_device(device)->connection->csums_tfm) {
69a22773 683 switch (read_for_csum(first_peer_device(device), sector, size)) {
80a40e43 684 case -EIO: /* Disk failure */
b30ab791 685 put_ldev(device);
99920dc5 686 return -EIO;
80a40e43 687 case -EAGAIN: /* allocation failed, or ldev busy */
b30ab791
AG
688 drbd_rs_complete_io(device, sector);
689 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 690 i = rollback_i;
b411b363 691 goto requeue;
80a40e43
LE
692 case 0:
693 /* everything ok */
694 break;
695 default:
696 BUG();
b411b363
PR
697 }
698 } else {
99920dc5
AG
699 int err;
700
b30ab791 701 inc_rs_pending(device);
69a22773 702 err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
99920dc5
AG
703 sector, size, ID_SYNCER);
704 if (err) {
d0180171 705 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
b30ab791
AG
706 dec_rs_pending(device);
707 put_ldev(device);
99920dc5 708 return err;
b411b363
PR
709 }
710 }
711 }
712
b30ab791 713 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
b411b363
PR
714 /* last syncer _request_ was sent,
715 * but the P_RS_DATA_REPLY not yet received. sync will end (and
716 * next sync group will resume), as soon as we receive the last
717 * resync data block, and the last bit is cleared.
718 * until then resync "work" is "inactive" ...
719 */
b30ab791 720 put_ldev(device);
99920dc5 721 return 0;
b411b363
PR
722 }
723
724 requeue:
b30ab791
AG
725 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
726 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
727 put_ldev(device);
99920dc5 728 return 0;
b411b363
PR
729}
730
d448a2e1 731static int make_ov_request(struct drbd_device *device, int cancel)
b411b363
PR
732{
733 int number, i, size;
734 sector_t sector;
b30ab791 735 const sector_t capacity = drbd_get_capacity(device->this_bdev);
58ffa580 736 bool stop_sector_reached = false;
b411b363
PR
737
738 if (unlikely(cancel))
739 return 1;
740
b30ab791 741 number = drbd_rs_number_requests(device);
b411b363 742
b30ab791 743 sector = device->ov_position;
b411b363 744 for (i = 0; i < number; i++) {
58ffa580 745 if (sector >= capacity)
b411b363 746 return 1;
58ffa580
LE
747
748 /* We check for "finished" only in the reply path:
749 * w_e_end_ov_reply().
750 * We need to send at least one request out. */
751 stop_sector_reached = i > 0
b30ab791
AG
752 && verify_can_do_stop_sector(device)
753 && sector >= device->ov_stop_sector;
58ffa580
LE
754 if (stop_sector_reached)
755 break;
b411b363
PR
756
757 size = BM_BLOCK_SIZE;
758
b30ab791
AG
759 if (drbd_rs_should_slow_down(device, sector) ||
760 drbd_try_rs_begin_io(device, sector)) {
761 device->ov_position = sector;
b411b363
PR
762 goto requeue;
763 }
764
765 if (sector + (size>>9) > capacity)
766 size = (capacity-sector)<<9;
767
b30ab791 768 inc_rs_pending(device);
69a22773 769 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
b30ab791 770 dec_rs_pending(device);
b411b363
PR
771 return 0;
772 }
773 sector += BM_SECT_PER_BIT;
774 }
b30ab791 775 device->ov_position = sector;
b411b363
PR
776
777 requeue:
b30ab791 778 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
58ffa580 779 if (i == 0 || !stop_sector_reached)
b30ab791 780 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
b411b363
PR
781 return 1;
782}
783
99920dc5 784int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 785{
84b8c06b
AG
786 struct drbd_device_work *dw =
787 container_of(w, struct drbd_device_work, w);
788 struct drbd_device *device = dw->device;
789 kfree(dw);
b30ab791
AG
790 ov_out_of_sync_print(device);
791 drbd_resync_finished(device);
b411b363 792
99920dc5 793 return 0;
b411b363
PR
794}
795
99920dc5 796static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 797{
84b8c06b
AG
798 struct drbd_device_work *dw =
799 container_of(w, struct drbd_device_work, w);
800 struct drbd_device *device = dw->device;
801 kfree(dw);
b411b363 802
b30ab791 803 drbd_resync_finished(device);
b411b363 804
99920dc5 805 return 0;
b411b363
PR
806}
807
b30ab791 808static void ping_peer(struct drbd_device *device)
af85e8e8 809{
a6b32bc3 810 struct drbd_connection *connection = first_peer_device(device)->connection;
2a67d8b9 811
bde89a9e
AG
812 clear_bit(GOT_PING_ACK, &connection->flags);
813 request_ping(connection);
814 wait_event(connection->ping_wait,
815 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
af85e8e8
LE
816}
817
b30ab791 818int drbd_resync_finished(struct drbd_device *device)
b411b363
PR
819{
820 unsigned long db, dt, dbdt;
821 unsigned long n_oos;
822 union drbd_state os, ns;
84b8c06b 823 struct drbd_device_work *dw;
b411b363 824 char *khelper_cmd = NULL;
26525618 825 int verify_done = 0;
b411b363
PR
826
827 /* Remove all elements from the resync LRU. Since future actions
828 * might set bits in the (main) bitmap, then the entries in the
829 * resync LRU would be wrong. */
b30ab791 830 if (drbd_rs_del_all(device)) {
b411b363
PR
831 /* In case this is not possible now, most probably because
832 * there are P_RS_DATA_REPLY Packets lingering on the worker's
833 * queue (or even the read operations for those packets
834 * is not finished by now). Retry in 100ms. */
835
20ee6390 836 schedule_timeout_interruptible(HZ / 10);
84b8c06b
AG
837 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
838 if (dw) {
839 dw->w.cb = w_resync_finished;
840 dw->device = device;
841 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
842 &dw->w);
b411b363
PR
843 return 1;
844 }
84b8c06b 845 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
b411b363
PR
846 }
847
b30ab791 848 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
b411b363
PR
849 if (dt <= 0)
850 dt = 1;
84b8c06b 851
b30ab791 852 db = device->rs_total;
58ffa580 853 /* adjust for verify start and stop sectors, respective reached position */
b30ab791
AG
854 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
855 db -= device->ov_left;
58ffa580 856
b411b363 857 dbdt = Bit2KB(db/dt);
b30ab791 858 device->rs_paused /= HZ;
b411b363 859
b30ab791 860 if (!get_ldev(device))
b411b363
PR
861 goto out;
862
b30ab791 863 ping_peer(device);
af85e8e8 864
0500813f 865 spin_lock_irq(&device->resource->req_lock);
b30ab791 866 os = drbd_read_state(device);
b411b363 867
26525618
LE
868 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
869
b411b363
PR
870 /* This protects us against multiple calls (that can happen in the presence
871 of application IO), and against connectivity loss just before we arrive here. */
872 if (os.conn <= C_CONNECTED)
873 goto out_unlock;
874
875 ns = os;
876 ns.conn = C_CONNECTED;
877
d0180171 878 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
58ffa580 879 verify_done ? "Online verify" : "Resync",
b30ab791 880 dt + device->rs_paused, device->rs_paused, dbdt);
b411b363 881
b30ab791 882 n_oos = drbd_bm_total_weight(device);
b411b363
PR
883
884 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
885 if (n_oos) {
d0180171 886 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
b411b363
PR
887 n_oos, Bit2KB(1));
888 khelper_cmd = "out-of-sync";
889 }
890 } else {
0b0ba1ef 891 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
b411b363
PR
892
893 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
894 khelper_cmd = "after-resync-target";
895
a6b32bc3 896 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
b30ab791
AG
897 const unsigned long s = device->rs_same_csum;
898 const unsigned long t = device->rs_total;
b411b363
PR
899 const int ratio =
900 (t == 0) ? 0 :
901 (t < 100000) ? ((s*100)/t) : (s/(t/100));
d0180171 902 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
903 "transferred %luK total %luK\n",
904 ratio,
b30ab791
AG
905 Bit2KB(device->rs_same_csum),
906 Bit2KB(device->rs_total - device->rs_same_csum),
907 Bit2KB(device->rs_total));
b411b363
PR
908 }
909 }
910
b30ab791 911 if (device->rs_failed) {
d0180171 912 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
b411b363
PR
913
914 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
915 ns.disk = D_INCONSISTENT;
916 ns.pdsk = D_UP_TO_DATE;
917 } else {
918 ns.disk = D_UP_TO_DATE;
919 ns.pdsk = D_INCONSISTENT;
920 }
921 } else {
922 ns.disk = D_UP_TO_DATE;
923 ns.pdsk = D_UP_TO_DATE;
924
925 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
b30ab791 926 if (device->p_uuid) {
b411b363
PR
927 int i;
928 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
b30ab791
AG
929 _drbd_uuid_set(device, i, device->p_uuid[i]);
930 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
931 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
b411b363 932 } else {
d0180171 933 drbd_err(device, "device->p_uuid is NULL! BUG\n");
b411b363
PR
934 }
935 }
936
62b0da3a
LE
937 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
938 /* for verify runs, we don't update uuids here,
939 * so there would be nothing to report. */
b30ab791
AG
940 drbd_uuid_set_bm(device, 0UL);
941 drbd_print_uuids(device, "updated UUIDs");
942 if (device->p_uuid) {
62b0da3a
LE
943 /* Now the two UUID sets are equal, update what we
944 * know of the peer. */
945 int i;
946 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
b30ab791 947 device->p_uuid[i] = device->ldev->md.uuid[i];
62b0da3a 948 }
b411b363
PR
949 }
950 }
951
b30ab791 952 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
b411b363 953out_unlock:
0500813f 954 spin_unlock_irq(&device->resource->req_lock);
b30ab791 955 put_ldev(device);
b411b363 956out:
b30ab791
AG
957 device->rs_total = 0;
958 device->rs_failed = 0;
959 device->rs_paused = 0;
58ffa580
LE
960
961 /* reset start sector, if we reached end of device */
b30ab791
AG
962 if (verify_done && device->ov_left == 0)
963 device->ov_start_sector = 0;
b411b363 964
b30ab791 965 drbd_md_sync(device);
13d42685 966
b411b363 967 if (khelper_cmd)
b30ab791 968 drbd_khelper(device, khelper_cmd);
b411b363
PR
969
970 return 1;
971}
972
973/* helper */
b30ab791 974static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
b411b363 975{
045417f7 976 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 977 /* This might happen if sendpage() has not finished */
db830c46 978 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
b30ab791
AG
979 atomic_add(i, &device->pp_in_use_by_net);
980 atomic_sub(i, &device->pp_in_use);
0500813f 981 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 982 list_add_tail(&peer_req->w.list, &device->net_ee);
0500813f 983 spin_unlock_irq(&device->resource->req_lock);
435f0740 984 wake_up(&drbd_pp_wait);
b411b363 985 } else
b30ab791 986 drbd_free_peer_req(device, peer_req);
b411b363
PR
987}
988
989/**
990 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
b30ab791 991 * @device: DRBD device.
b411b363
PR
992 * @w: work object.
993 * @cancel: The connection will be closed anyways
994 */
99920dc5 995int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 996{
a8cd15ba
AG
997 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
998 struct drbd_device *device = peer_req->peer_device->device;
99920dc5 999 int err;
b411b363
PR
1000
1001 if (unlikely(cancel)) {
b30ab791
AG
1002 drbd_free_peer_req(device, peer_req);
1003 dec_unacked(device);
99920dc5 1004 return 0;
b411b363
PR
1005 }
1006
db830c46 1007 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
69a22773 1008 err = drbd_send_block(first_peer_device(device), P_DATA_REPLY, peer_req);
b411b363
PR
1009 } else {
1010 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1011 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
db830c46 1012 (unsigned long long)peer_req->i.sector);
b411b363 1013
69a22773 1014 err = drbd_send_ack(first_peer_device(device), P_NEG_DREPLY, peer_req);
b411b363
PR
1015 }
1016
b30ab791 1017 dec_unacked(device);
b411b363 1018
b30ab791 1019 move_to_net_ee_or_free(device, peer_req);
b411b363 1020
99920dc5 1021 if (unlikely(err))
d0180171 1022 drbd_err(device, "drbd_send_block() failed\n");
99920dc5 1023 return err;
b411b363
PR
1024}
1025
1026/**
a209b4ae 1027 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
1028 * @w: work object.
1029 * @cancel: The connection will be closed anyways
1030 */
99920dc5 1031int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 1032{
a8cd15ba
AG
1033 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1034 struct drbd_device *device = peer_req->peer_device->device;
99920dc5 1035 int err;
b411b363
PR
1036
1037 if (unlikely(cancel)) {
b30ab791
AG
1038 drbd_free_peer_req(device, peer_req);
1039 dec_unacked(device);
99920dc5 1040 return 0;
b411b363
PR
1041 }
1042
b30ab791
AG
1043 if (get_ldev_if_state(device, D_FAILED)) {
1044 drbd_rs_complete_io(device, peer_req->i.sector);
1045 put_ldev(device);
b411b363
PR
1046 }
1047
b30ab791 1048 if (device->state.conn == C_AHEAD) {
69a22773 1049 err = drbd_send_ack(first_peer_device(device), P_RS_CANCEL, peer_req);
db830c46 1050 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1051 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1052 inc_rs_pending(device);
69a22773 1053 err = drbd_send_block(first_peer_device(device), P_RS_DATA_REPLY, peer_req);
b411b363
PR
1054 } else {
1055 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1056 drbd_err(device, "Not sending RSDataReply, "
b411b363 1057 "partner DISKLESS!\n");
99920dc5 1058 err = 0;
b411b363
PR
1059 }
1060 } else {
1061 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1062 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
db830c46 1063 (unsigned long long)peer_req->i.sector);
b411b363 1064
69a22773 1065 err = drbd_send_ack(first_peer_device(device), P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1066
1067 /* update resync data with failure */
b30ab791 1068 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1069 }
1070
b30ab791 1071 dec_unacked(device);
b411b363 1072
b30ab791 1073 move_to_net_ee_or_free(device, peer_req);
b411b363 1074
99920dc5 1075 if (unlikely(err))
d0180171 1076 drbd_err(device, "drbd_send_block() failed\n");
99920dc5 1077 return err;
b411b363
PR
1078}
1079
99920dc5 1080int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1081{
a8cd15ba
AG
1082 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1083 struct drbd_device *device = peer_req->peer_device->device;
b411b363
PR
1084 struct digest_info *di;
1085 int digest_size;
1086 void *digest = NULL;
99920dc5 1087 int err, eq = 0;
b411b363
PR
1088
1089 if (unlikely(cancel)) {
b30ab791
AG
1090 drbd_free_peer_req(device, peer_req);
1091 dec_unacked(device);
99920dc5 1092 return 0;
b411b363
PR
1093 }
1094
b30ab791
AG
1095 if (get_ldev(device)) {
1096 drbd_rs_complete_io(device, peer_req->i.sector);
1097 put_ldev(device);
1d53f09e 1098 }
b411b363 1099
db830c46 1100 di = peer_req->digest;
b411b363 1101
db830c46 1102 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1103 /* quick hack to try to avoid a race against reconfiguration.
1104 * a real fix would be much more involved,
1105 * introducing more locking mechanisms */
a6b32bc3
AG
1106 if (first_peer_device(device)->connection->csums_tfm) {
1107 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->csums_tfm);
0b0ba1ef 1108 D_ASSERT(device, digest_size == di->digest_size);
b411b363
PR
1109 digest = kmalloc(digest_size, GFP_NOIO);
1110 }
1111 if (digest) {
79a3c8d3 1112 drbd_csum_ee(first_peer_device(device)->connection->csums_tfm, peer_req, digest);
b411b363
PR
1113 eq = !memcmp(digest, di->digest, digest_size);
1114 kfree(digest);
1115 }
1116
1117 if (eq) {
b30ab791 1118 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
676396d5 1119 /* rs_same_csums unit is BM_BLOCK_SIZE */
b30ab791 1120 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
69a22773 1121 err = drbd_send_ack(first_peer_device(device), P_RS_IS_IN_SYNC, peer_req);
b411b363 1122 } else {
b30ab791 1123 inc_rs_pending(device);
db830c46
AG
1124 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1125 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1126 kfree(di);
69a22773 1127 err = drbd_send_block(first_peer_device(device), P_RS_DATA_REPLY, peer_req);
b411b363
PR
1128 }
1129 } else {
69a22773 1130 err = drbd_send_ack(first_peer_device(device), P_NEG_RS_DREPLY, peer_req);
b411b363 1131 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1132 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
b411b363
PR
1133 }
1134
b30ab791
AG
1135 dec_unacked(device);
1136 move_to_net_ee_or_free(device, peer_req);
b411b363 1137
99920dc5 1138 if (unlikely(err))
d0180171 1139 drbd_err(device, "drbd_send_block/ack() failed\n");
99920dc5 1140 return err;
b411b363
PR
1141}
1142
99920dc5 1143int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1144{
a8cd15ba
AG
1145 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1146 struct drbd_device *device = peer_req->peer_device->device;
db830c46
AG
1147 sector_t sector = peer_req->i.sector;
1148 unsigned int size = peer_req->i.size;
b411b363
PR
1149 int digest_size;
1150 void *digest;
99920dc5 1151 int err = 0;
b411b363
PR
1152
1153 if (unlikely(cancel))
1154 goto out;
1155
a6b32bc3 1156 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->verify_tfm);
b411b363 1157 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1158 if (!digest) {
99920dc5 1159 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1160 goto out;
b411b363
PR
1161 }
1162
db830c46 1163 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
79a3c8d3 1164 drbd_csum_ee(first_peer_device(device)->connection->verify_tfm, peer_req, digest);
8f21420e
PR
1165 else
1166 memset(digest, 0, digest_size);
1167
53ea4331
LE
1168 /* Free e and pages before send.
1169 * In case we block on congestion, we could otherwise run into
1170 * some distributed deadlock, if the other side blocks on
1171 * congestion as well, because our receiver blocks in
c37c8ecf 1172 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 1173 drbd_free_peer_req(device, peer_req);
db830c46 1174 peer_req = NULL;
b30ab791 1175 inc_rs_pending(device);
69a22773 1176 err = drbd_send_drequest_csum(first_peer_device(device), sector, size, digest, digest_size, P_OV_REPLY);
99920dc5 1177 if (err)
b30ab791 1178 dec_rs_pending(device);
8f21420e
PR
1179 kfree(digest);
1180
b411b363 1181out:
db830c46 1182 if (peer_req)
b30ab791
AG
1183 drbd_free_peer_req(device, peer_req);
1184 dec_unacked(device);
99920dc5 1185 return err;
b411b363
PR
1186}
1187
b30ab791 1188void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
b411b363 1189{
b30ab791
AG
1190 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1191 device->ov_last_oos_size += size>>9;
b411b363 1192 } else {
b30ab791
AG
1193 device->ov_last_oos_start = sector;
1194 device->ov_last_oos_size = size>>9;
b411b363 1195 }
b30ab791 1196 drbd_set_out_of_sync(device, sector, size);
b411b363
PR
1197}
1198
99920dc5 1199int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1200{
a8cd15ba
AG
1201 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1202 struct drbd_device *device = peer_req->peer_device->device;
b411b363 1203 struct digest_info *di;
b411b363 1204 void *digest;
db830c46
AG
1205 sector_t sector = peer_req->i.sector;
1206 unsigned int size = peer_req->i.size;
53ea4331 1207 int digest_size;
99920dc5 1208 int err, eq = 0;
58ffa580 1209 bool stop_sector_reached = false;
b411b363
PR
1210
1211 if (unlikely(cancel)) {
b30ab791
AG
1212 drbd_free_peer_req(device, peer_req);
1213 dec_unacked(device);
99920dc5 1214 return 0;
b411b363
PR
1215 }
1216
1217 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1218 * the resync lru has been cleaned up already */
b30ab791
AG
1219 if (get_ldev(device)) {
1220 drbd_rs_complete_io(device, peer_req->i.sector);
1221 put_ldev(device);
1d53f09e 1222 }
b411b363 1223
db830c46 1224 di = peer_req->digest;
b411b363 1225
db830c46 1226 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
a6b32bc3 1227 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->verify_tfm);
b411b363
PR
1228 digest = kmalloc(digest_size, GFP_NOIO);
1229 if (digest) {
79a3c8d3 1230 drbd_csum_ee(first_peer_device(device)->connection->verify_tfm, peer_req, digest);
b411b363 1231
0b0ba1ef 1232 D_ASSERT(device, digest_size == di->digest_size);
b411b363
PR
1233 eq = !memcmp(digest, di->digest, digest_size);
1234 kfree(digest);
1235 }
b411b363
PR
1236 }
1237
9676c760
LE
1238 /* Free peer_req and pages before send.
1239 * In case we block on congestion, we could otherwise run into
1240 * some distributed deadlock, if the other side blocks on
1241 * congestion as well, because our receiver blocks in
c37c8ecf 1242 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 1243 drbd_free_peer_req(device, peer_req);
b411b363 1244 if (!eq)
b30ab791 1245 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 1246 else
b30ab791 1247 ov_out_of_sync_print(device);
b411b363 1248
69a22773 1249 err = drbd_send_ack_ex(first_peer_device(device), P_OV_RESULT, sector, size,
fa79abd8 1250 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1251
b30ab791 1252 dec_unacked(device);
b411b363 1253
b30ab791 1254 --device->ov_left;
ea5442af
LE
1255
1256 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
1257 if ((device->ov_left & 0x200) == 0x200)
1258 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 1259
b30ab791
AG
1260 stop_sector_reached = verify_can_do_stop_sector(device) &&
1261 (sector + (size>>9)) >= device->ov_stop_sector;
58ffa580 1262
b30ab791
AG
1263 if (device->ov_left == 0 || stop_sector_reached) {
1264 ov_out_of_sync_print(device);
1265 drbd_resync_finished(device);
b411b363
PR
1266 }
1267
99920dc5 1268 return err;
b411b363
PR
1269}
1270
b6dd1a89
LE
1271/* FIXME
1272 * We need to track the number of pending barrier acks,
1273 * and to be able to wait for them.
1274 * See also comment in drbd_adm_attach before drbd_suspend_io.
1275 */
bde89a9e 1276static int drbd_send_barrier(struct drbd_connection *connection)
b411b363 1277{
9f5bdc33 1278 struct p_barrier *p;
b6dd1a89 1279 struct drbd_socket *sock;
b411b363 1280
bde89a9e
AG
1281 sock = &connection->data;
1282 p = conn_prepare_command(connection, sock);
9f5bdc33
AG
1283 if (!p)
1284 return -EIO;
bde89a9e 1285 p->barrier = connection->send.current_epoch_nr;
b6dd1a89 1286 p->pad = 0;
bde89a9e 1287 connection->send.current_epoch_writes = 0;
b6dd1a89 1288
bde89a9e 1289 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1290}
1291
99920dc5 1292int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1293{
84b8c06b
AG
1294 struct drbd_device *device =
1295 container_of(w, struct drbd_device, unplug_work);
9f5bdc33
AG
1296 struct drbd_socket *sock;
1297
b411b363 1298 if (cancel)
99920dc5 1299 return 0;
a6b32bc3 1300 sock = &first_peer_device(device)->connection->data;
69a22773 1301 if (!drbd_prepare_command(first_peer_device(device), sock))
9f5bdc33 1302 return -EIO;
69a22773 1303 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1304}
1305
bde89a9e 1306static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
4eb9b3cb 1307{
bde89a9e
AG
1308 if (!connection->send.seen_any_write_yet) {
1309 connection->send.seen_any_write_yet = true;
1310 connection->send.current_epoch_nr = epoch;
1311 connection->send.current_epoch_writes = 0;
4eb9b3cb
LE
1312 }
1313}
1314
bde89a9e 1315static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
4eb9b3cb
LE
1316{
1317 /* re-init if first write on this connection */
bde89a9e 1318 if (!connection->send.seen_any_write_yet)
4eb9b3cb 1319 return;
bde89a9e
AG
1320 if (connection->send.current_epoch_nr != epoch) {
1321 if (connection->send.current_epoch_writes)
1322 drbd_send_barrier(connection);
1323 connection->send.current_epoch_nr = epoch;
4eb9b3cb
LE
1324 }
1325}
1326
8f7bed77 1327int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1328{
1329 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1330 struct drbd_device *device = req->device;
a6b32bc3 1331 struct drbd_connection *connection = first_peer_device(device)->connection;
99920dc5 1332 int err;
73a01a18
PR
1333
1334 if (unlikely(cancel)) {
8554df1c 1335 req_mod(req, SEND_CANCELED);
99920dc5 1336 return 0;
73a01a18
PR
1337 }
1338
bde89a9e 1339 /* this time, no connection->send.current_epoch_writes++;
b6dd1a89
LE
1340 * If it was sent, it was the closing barrier for the last
1341 * replicated epoch, before we went into AHEAD mode.
1342 * No more barriers will be sent, until we leave AHEAD mode again. */
bde89a9e 1343 maybe_send_barrier(connection, req->epoch);
b6dd1a89 1344
69a22773 1345 err = drbd_send_out_of_sync(first_peer_device(device), req);
8554df1c 1346 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1347
99920dc5 1348 return err;
73a01a18
PR
1349}
1350
b411b363
PR
1351/**
1352 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
b411b363
PR
1353 * @w: work object.
1354 * @cancel: The connection will be closed anyways
1355 */
99920dc5 1356int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1357{
1358 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1359 struct drbd_device *device = req->device;
a6b32bc3 1360 struct drbd_connection *connection = first_peer_device(device)->connection;
99920dc5 1361 int err;
b411b363
PR
1362
1363 if (unlikely(cancel)) {
8554df1c 1364 req_mod(req, SEND_CANCELED);
99920dc5 1365 return 0;
b411b363
PR
1366 }
1367
bde89a9e
AG
1368 re_init_if_first_write(connection, req->epoch);
1369 maybe_send_barrier(connection, req->epoch);
1370 connection->send.current_epoch_writes++;
b6dd1a89 1371
69a22773 1372 err = drbd_send_dblock(first_peer_device(device), req);
99920dc5 1373 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1374
99920dc5 1375 return err;
b411b363
PR
1376}
1377
1378/**
1379 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
b411b363
PR
1380 * @w: work object.
1381 * @cancel: The connection will be closed anyways
1382 */
99920dc5 1383int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1384{
1385 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1386 struct drbd_device *device = req->device;
a6b32bc3 1387 struct drbd_connection *connection = first_peer_device(device)->connection;
99920dc5 1388 int err;
b411b363
PR
1389
1390 if (unlikely(cancel)) {
8554df1c 1391 req_mod(req, SEND_CANCELED);
99920dc5 1392 return 0;
b411b363
PR
1393 }
1394
b6dd1a89
LE
1395 /* Even read requests may close a write epoch,
1396 * if there was any yet. */
bde89a9e 1397 maybe_send_barrier(connection, req->epoch);
b6dd1a89 1398
69a22773 1399 err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1400 (unsigned long)req);
b411b363 1401
99920dc5 1402 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1403
99920dc5 1404 return err;
b411b363
PR
1405}
1406
99920dc5 1407int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1408{
1409 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1410 struct drbd_device *device = req->device;
265be2d0 1411
0778286a 1412 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
b30ab791 1413 drbd_al_begin_io(device, &req->i, false);
265be2d0
PR
1414
1415 drbd_req_make_private_bio(req, req->master_bio);
b30ab791 1416 req->private_bio->bi_bdev = device->ldev->backing_bdev;
265be2d0
PR
1417 generic_make_request(req->private_bio);
1418
99920dc5 1419 return 0;
265be2d0
PR
1420}
1421
b30ab791 1422static int _drbd_may_sync_now(struct drbd_device *device)
b411b363 1423{
b30ab791 1424 struct drbd_device *odev = device;
95f8efd0 1425 int resync_after;
b411b363
PR
1426
1427 while (1) {
a3f8f7dc 1428 if (!odev->ldev || odev->state.disk == D_DISKLESS)
438c8374 1429 return 1;
daeda1cc 1430 rcu_read_lock();
95f8efd0 1431 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1432 rcu_read_unlock();
95f8efd0 1433 if (resync_after == -1)
b411b363 1434 return 1;
b30ab791 1435 odev = minor_to_device(resync_after);
a3f8f7dc 1436 if (!odev)
841ce241 1437 return 1;
b411b363
PR
1438 if ((odev->state.conn >= C_SYNC_SOURCE &&
1439 odev->state.conn <= C_PAUSED_SYNC_T) ||
1440 odev->state.aftr_isp || odev->state.peer_isp ||
1441 odev->state.user_isp)
1442 return 0;
1443 }
1444}
1445
1446/**
1447 * _drbd_pause_after() - Pause resync on all devices that may not resync now
b30ab791 1448 * @device: DRBD device.
b411b363
PR
1449 *
1450 * Called from process context only (admin command and after_state_ch).
1451 */
b30ab791 1452static int _drbd_pause_after(struct drbd_device *device)
b411b363 1453{
54761697 1454 struct drbd_device *odev;
b411b363
PR
1455 int i, rv = 0;
1456
695d08fa 1457 rcu_read_lock();
05a10ec7 1458 idr_for_each_entry(&drbd_devices, odev, i) {
b411b363
PR
1459 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1460 continue;
1461 if (!_drbd_may_sync_now(odev))
1462 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1463 != SS_NOTHING_TO_DO);
1464 }
695d08fa 1465 rcu_read_unlock();
b411b363
PR
1466
1467 return rv;
1468}
1469
1470/**
1471 * _drbd_resume_next() - Resume resync on all devices that may resync now
b30ab791 1472 * @device: DRBD device.
b411b363
PR
1473 *
1474 * Called from process context only (admin command and worker).
1475 */
b30ab791 1476static int _drbd_resume_next(struct drbd_device *device)
b411b363 1477{
54761697 1478 struct drbd_device *odev;
b411b363
PR
1479 int i, rv = 0;
1480
695d08fa 1481 rcu_read_lock();
05a10ec7 1482 idr_for_each_entry(&drbd_devices, odev, i) {
b411b363
PR
1483 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1484 continue;
1485 if (odev->state.aftr_isp) {
1486 if (_drbd_may_sync_now(odev))
1487 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1488 CS_HARD, NULL)
1489 != SS_NOTHING_TO_DO) ;
1490 }
1491 }
695d08fa 1492 rcu_read_unlock();
b411b363
PR
1493 return rv;
1494}
1495
b30ab791 1496void resume_next_sg(struct drbd_device *device)
b411b363
PR
1497{
1498 write_lock_irq(&global_state_lock);
b30ab791 1499 _drbd_resume_next(device);
b411b363
PR
1500 write_unlock_irq(&global_state_lock);
1501}
1502
b30ab791 1503void suspend_other_sg(struct drbd_device *device)
b411b363
PR
1504{
1505 write_lock_irq(&global_state_lock);
b30ab791 1506 _drbd_pause_after(device);
b411b363
PR
1507 write_unlock_irq(&global_state_lock);
1508}
1509
dc97b708 1510/* caller must hold global_state_lock */
b30ab791 1511enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
b411b363 1512{
54761697 1513 struct drbd_device *odev;
95f8efd0 1514 int resync_after;
b411b363
PR
1515
1516 if (o_minor == -1)
1517 return NO_ERROR;
a3f8f7dc 1518 if (o_minor < -1 || o_minor > MINORMASK)
95f8efd0 1519 return ERR_RESYNC_AFTER;
b411b363
PR
1520
1521 /* check for loops */
b30ab791 1522 odev = minor_to_device(o_minor);
b411b363 1523 while (1) {
b30ab791 1524 if (odev == device)
95f8efd0 1525 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1526
a3f8f7dc
LE
1527 /* You are free to depend on diskless, non-existing,
1528 * or not yet/no longer existing minors.
1529 * We only reject dependency loops.
1530 * We cannot follow the dependency chain beyond a detached or
1531 * missing minor.
1532 */
1533 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1534 return NO_ERROR;
1535
daeda1cc 1536 rcu_read_lock();
95f8efd0 1537 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1538 rcu_read_unlock();
b411b363 1539 /* dependency chain ends here, no cycles. */
95f8efd0 1540 if (resync_after == -1)
b411b363
PR
1541 return NO_ERROR;
1542
1543 /* follow the dependency chain */
b30ab791 1544 odev = minor_to_device(resync_after);
b411b363
PR
1545 }
1546}
1547
dc97b708 1548/* caller must hold global_state_lock */
b30ab791 1549void drbd_resync_after_changed(struct drbd_device *device)
b411b363
PR
1550{
1551 int changes;
b411b363 1552
dc97b708 1553 do {
b30ab791
AG
1554 changes = _drbd_pause_after(device);
1555 changes |= _drbd_resume_next(device);
dc97b708 1556 } while (changes);
b411b363
PR
1557}
1558
b30ab791 1559void drbd_rs_controller_reset(struct drbd_device *device)
9bd28d3c 1560{
813472ce
PR
1561 struct fifo_buffer *plan;
1562
b30ab791
AG
1563 atomic_set(&device->rs_sect_in, 0);
1564 atomic_set(&device->rs_sect_ev, 0);
1565 device->rs_in_flight = 0;
813472ce
PR
1566
1567 /* Updating the RCU protected object in place is necessary since
1568 this function gets called from atomic context.
1569 It is valid since all other updates also lead to an completely
1570 empty fifo */
1571 rcu_read_lock();
b30ab791 1572 plan = rcu_dereference(device->rs_plan_s);
813472ce
PR
1573 plan->total = 0;
1574 fifo_set(plan, 0);
1575 rcu_read_unlock();
9bd28d3c
LE
1576}
1577
1f04af33
PR
1578void start_resync_timer_fn(unsigned long data)
1579{
b30ab791 1580 struct drbd_device *device = (struct drbd_device *) data;
1f04af33 1581
84b8c06b
AG
1582 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
1583 &device->start_resync_work);
1f04af33
PR
1584}
1585
99920dc5 1586int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1587{
84b8c06b
AG
1588 struct drbd_device *device =
1589 container_of(w, struct drbd_device, start_resync_work);
00d56944 1590
b30ab791 1591 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
d0180171 1592 drbd_warn(device, "w_start_resync later...\n");
b30ab791
AG
1593 device->start_resync_timer.expires = jiffies + HZ/10;
1594 add_timer(&device->start_resync_timer);
99920dc5 1595 return 0;
1f04af33
PR
1596 }
1597
b30ab791
AG
1598 drbd_start_resync(device, C_SYNC_SOURCE);
1599 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
99920dc5 1600 return 0;
1f04af33
PR
1601}
1602
b411b363
PR
1603/**
1604 * drbd_start_resync() - Start the resync process
b30ab791 1605 * @device: DRBD device.
b411b363
PR
1606 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1607 *
1608 * This function might bring you directly into one of the
1609 * C_PAUSED_SYNC_* states.
1610 */
b30ab791 1611void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
b411b363
PR
1612{
1613 union drbd_state ns;
1614 int r;
1615
b30ab791 1616 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
d0180171 1617 drbd_err(device, "Resync already running!\n");
b411b363
PR
1618 return;
1619 }
1620
b30ab791 1621 if (!test_bit(B_RS_H_DONE, &device->flags)) {
e64a3294
PR
1622 if (side == C_SYNC_TARGET) {
1623 /* Since application IO was locked out during C_WF_BITMAP_T and
1624 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1625 we check that we might make the data inconsistent. */
b30ab791 1626 r = drbd_khelper(device, "before-resync-target");
e64a3294
PR
1627 r = (r >> 8) & 0xff;
1628 if (r > 0) {
d0180171 1629 drbd_info(device, "before-resync-target handler returned %d, "
09b9e797 1630 "dropping connection.\n", r);
a6b32bc3 1631 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1632 return;
1633 }
e64a3294 1634 } else /* C_SYNC_SOURCE */ {
b30ab791 1635 r = drbd_khelper(device, "before-resync-source");
e64a3294
PR
1636 r = (r >> 8) & 0xff;
1637 if (r > 0) {
1638 if (r == 3) {
d0180171 1639 drbd_info(device, "before-resync-source handler returned %d, "
e64a3294
PR
1640 "ignoring. Old userland tools?", r);
1641 } else {
d0180171 1642 drbd_info(device, "before-resync-source handler returned %d, "
e64a3294 1643 "dropping connection.\n", r);
a6b32bc3
AG
1644 conn_request_state(first_peer_device(device)->connection,
1645 NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1646 return;
1647 }
1648 }
09b9e797 1649 }
b411b363
PR
1650 }
1651
a6b32bc3 1652 if (current == first_peer_device(device)->connection->worker.task) {
dad20554 1653 /* The worker should not sleep waiting for state_mutex,
e64a3294 1654 that can take long */
b30ab791
AG
1655 if (!mutex_trylock(device->state_mutex)) {
1656 set_bit(B_RS_H_DONE, &device->flags);
1657 device->start_resync_timer.expires = jiffies + HZ/5;
1658 add_timer(&device->start_resync_timer);
e64a3294
PR
1659 return;
1660 }
1661 } else {
b30ab791 1662 mutex_lock(device->state_mutex);
e64a3294 1663 }
b30ab791 1664 clear_bit(B_RS_H_DONE, &device->flags);
b411b363 1665
0cfac5dd 1666 write_lock_irq(&global_state_lock);
a700471b 1667 /* Did some connection breakage or IO error race with us? */
b30ab791
AG
1668 if (device->state.conn < C_CONNECTED
1669 || !get_ldev_if_state(device, D_NEGOTIATING)) {
0cfac5dd 1670 write_unlock_irq(&global_state_lock);
b30ab791 1671 mutex_unlock(device->state_mutex);
b411b363
PR
1672 return;
1673 }
1674
b30ab791 1675 ns = drbd_read_state(device);
b411b363 1676
b30ab791 1677 ns.aftr_isp = !_drbd_may_sync_now(device);
b411b363
PR
1678
1679 ns.conn = side;
1680
1681 if (side == C_SYNC_TARGET)
1682 ns.disk = D_INCONSISTENT;
1683 else /* side == C_SYNC_SOURCE */
1684 ns.pdsk = D_INCONSISTENT;
1685
b30ab791
AG
1686 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1687 ns = drbd_read_state(device);
b411b363
PR
1688
1689 if (ns.conn < C_CONNECTED)
1690 r = SS_UNKNOWN_ERROR;
1691
1692 if (r == SS_SUCCESS) {
b30ab791 1693 unsigned long tw = drbd_bm_total_weight(device);
1d7734a0
LE
1694 unsigned long now = jiffies;
1695 int i;
1696
b30ab791
AG
1697 device->rs_failed = 0;
1698 device->rs_paused = 0;
1699 device->rs_same_csum = 0;
1700 device->rs_last_events = 0;
1701 device->rs_last_sect_ev = 0;
1702 device->rs_total = tw;
1703 device->rs_start = now;
1d7734a0 1704 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
1705 device->rs_mark_left[i] = tw;
1706 device->rs_mark_time[i] = now;
1d7734a0 1707 }
b30ab791 1708 _drbd_pause_after(device);
b411b363
PR
1709 }
1710 write_unlock_irq(&global_state_lock);
5a22db89 1711
b411b363 1712 if (r == SS_SUCCESS) {
328e0f12
PR
1713 /* reset rs_last_bcast when a resync or verify is started,
1714 * to deal with potential jiffies wrap. */
b30ab791 1715 device->rs_last_bcast = jiffies - HZ;
328e0f12 1716
d0180171 1717 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
b411b363 1718 drbd_conn_str(ns.conn),
b30ab791
AG
1719 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1720 (unsigned long) device->rs_total);
6c922ed5 1721 if (side == C_SYNC_TARGET)
b30ab791 1722 device->bm_resync_fo = 0;
6c922ed5
LE
1723
1724 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1725 * with w_send_oos, or the sync target will get confused as to
1726 * how much bits to resync. We cannot do that always, because for an
1727 * empty resync and protocol < 95, we need to do it here, as we call
1728 * drbd_resync_finished from here in that case.
1729 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1730 * and from after_state_ch otherwise. */
a6b32bc3
AG
1731 if (side == C_SYNC_SOURCE &&
1732 first_peer_device(device)->connection->agreed_pro_version < 96)
69a22773 1733 drbd_gen_and_send_sync_uuid(first_peer_device(device));
b411b363 1734
a6b32bc3
AG
1735 if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
1736 device->rs_total == 0) {
af85e8e8
LE
1737 /* This still has a race (about when exactly the peers
1738 * detect connection loss) that can lead to a full sync
1739 * on next handshake. In 8.3.9 we fixed this with explicit
1740 * resync-finished notifications, but the fix
1741 * introduces a protocol change. Sleeping for some
1742 * time longer than the ping interval + timeout on the
1743 * SyncSource, to give the SyncTarget the chance to
1744 * detect connection loss, then waiting for a ping
1745 * response (implicit in drbd_resync_finished) reduces
1746 * the race considerably, but does not solve it. */
44ed167d
PR
1747 if (side == C_SYNC_SOURCE) {
1748 struct net_conf *nc;
1749 int timeo;
1750
1751 rcu_read_lock();
a6b32bc3 1752 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
44ed167d
PR
1753 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1754 rcu_read_unlock();
1755 schedule_timeout_interruptible(timeo);
1756 }
b30ab791 1757 drbd_resync_finished(device);
b411b363
PR
1758 }
1759
b30ab791
AG
1760 drbd_rs_controller_reset(device);
1761 /* ns.conn may already be != device->state.conn,
b411b363
PR
1762 * we may have been paused in between, or become paused until
1763 * the timer triggers.
1764 * No matter, that is handled in resync_timer_fn() */
1765 if (ns.conn == C_SYNC_TARGET)
b30ab791 1766 mod_timer(&device->resync_timer, jiffies);
b411b363 1767
b30ab791 1768 drbd_md_sync(device);
b411b363 1769 }
b30ab791
AG
1770 put_ldev(device);
1771 mutex_unlock(device->state_mutex);
b411b363
PR
1772}
1773
b6dd1a89
LE
1774/* If the resource already closed the current epoch, but we did not
1775 * (because we have not yet seen new requests), we should send the
1776 * corresponding barrier now. Must be checked within the same spinlock
1777 * that is used to check for new requests. */
bde89a9e 1778static bool need_to_send_barrier(struct drbd_connection *connection)
b6dd1a89
LE
1779{
1780 if (!connection->send.seen_any_write_yet)
1781 return false;
1782
1783 /* Skip barriers that do not contain any writes.
1784 * This may happen during AHEAD mode. */
1785 if (!connection->send.current_epoch_writes)
1786 return false;
1787
1788 /* ->req_lock is held when requests are queued on
1789 * connection->sender_work, and put into ->transfer_log.
1790 * It is also held when ->current_tle_nr is increased.
1791 * So either there are already new requests queued,
1792 * and corresponding barriers will be send there.
1793 * Or nothing new is queued yet, so the difference will be 1.
1794 */
1795 if (atomic_read(&connection->current_tle_nr) !=
1796 connection->send.current_epoch_nr + 1)
1797 return false;
1798
1799 return true;
1800}
1801
a186e478 1802static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
8c0785a5
LE
1803{
1804 spin_lock_irq(&queue->q_lock);
1805 list_splice_init(&queue->q, work_list);
1806 spin_unlock_irq(&queue->q_lock);
1807 return !list_empty(work_list);
1808}
1809
a186e478 1810static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
8c0785a5
LE
1811{
1812 spin_lock_irq(&queue->q_lock);
1813 if (!list_empty(&queue->q))
1814 list_move(queue->q.next, work_list);
1815 spin_unlock_irq(&queue->q_lock);
1816 return !list_empty(work_list);
1817}
1818
bde89a9e 1819static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
b6dd1a89
LE
1820{
1821 DEFINE_WAIT(wait);
1822 struct net_conf *nc;
1823 int uncork, cork;
1824
1825 dequeue_work_item(&connection->sender_work, work_list);
1826 if (!list_empty(work_list))
1827 return;
1828
1829 /* Still nothing to do?
1830 * Maybe we still need to close the current epoch,
1831 * even if no new requests are queued yet.
1832 *
1833 * Also, poke TCP, just in case.
1834 * Then wait for new work (or signal). */
1835 rcu_read_lock();
1836 nc = rcu_dereference(connection->net_conf);
1837 uncork = nc ? nc->tcp_cork : 0;
1838 rcu_read_unlock();
1839 if (uncork) {
1840 mutex_lock(&connection->data.mutex);
1841 if (connection->data.socket)
1842 drbd_tcp_uncork(connection->data.socket);
1843 mutex_unlock(&connection->data.mutex);
1844 }
1845
1846 for (;;) {
1847 int send_barrier;
1848 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
0500813f 1849 spin_lock_irq(&connection->resource->req_lock);
b6dd1a89 1850 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
bc317a9e
LE
1851 /* dequeue single item only,
1852 * we still use drbd_queue_work_front() in some places */
1853 if (!list_empty(&connection->sender_work.q))
1854 list_move(connection->sender_work.q.next, work_list);
b6dd1a89
LE
1855 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1856 if (!list_empty(work_list) || signal_pending(current)) {
0500813f 1857 spin_unlock_irq(&connection->resource->req_lock);
b6dd1a89
LE
1858 break;
1859 }
1860 send_barrier = need_to_send_barrier(connection);
0500813f 1861 spin_unlock_irq(&connection->resource->req_lock);
b6dd1a89
LE
1862 if (send_barrier) {
1863 drbd_send_barrier(connection);
1864 connection->send.current_epoch_nr++;
1865 }
1866 schedule();
1867 /* may be woken up for other things but new work, too,
1868 * e.g. if the current epoch got closed.
1869 * In which case we send the barrier above. */
1870 }
1871 finish_wait(&connection->sender_work.q_wait, &wait);
1872
1873 /* someone may have changed the config while we have been waiting above. */
1874 rcu_read_lock();
1875 nc = rcu_dereference(connection->net_conf);
1876 cork = nc ? nc->tcp_cork : 0;
1877 rcu_read_unlock();
1878 mutex_lock(&connection->data.mutex);
1879 if (connection->data.socket) {
1880 if (cork)
1881 drbd_tcp_cork(connection->data.socket);
1882 else if (!uncork)
1883 drbd_tcp_uncork(connection->data.socket);
1884 }
1885 mutex_unlock(&connection->data.mutex);
1886}
1887
b411b363
PR
1888int drbd_worker(struct drbd_thread *thi)
1889{
bde89a9e 1890 struct drbd_connection *connection = thi->connection;
84b8c06b 1891 struct drbd_device_work *dw = NULL;
c06ece6b 1892 struct drbd_peer_device *peer_device;
b411b363 1893 LIST_HEAD(work_list);
8c0785a5 1894 int vnr;
b411b363 1895
e77a0a5c 1896 while (get_t_state(thi) == RUNNING) {
80822284 1897 drbd_thread_current_set_cpu(thi);
b411b363 1898
8c0785a5
LE
1899 /* as long as we use drbd_queue_work_front(),
1900 * we may only dequeue single work items here, not batches. */
1901 if (list_empty(&work_list))
bde89a9e 1902 wait_for_work(connection, &work_list);
b411b363 1903
8c0785a5 1904 if (signal_pending(current)) {
b411b363 1905 flush_signals(current);
19393e10 1906 if (get_t_state(thi) == RUNNING) {
1ec861eb 1907 drbd_warn(connection, "Worker got an unexpected signal\n");
b411b363 1908 continue;
19393e10 1909 }
b411b363
PR
1910 break;
1911 }
1912
e77a0a5c 1913 if (get_t_state(thi) != RUNNING)
b411b363 1914 break;
b411b363 1915
8c0785a5 1916 while (!list_empty(&work_list)) {
84b8c06b
AG
1917 dw = list_first_entry(&work_list, struct drbd_device_work, w.list);
1918 list_del_init(&dw->w.list);
1919 if (dw->w.cb(&dw->w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
8c0785a5 1920 continue;
bde89a9e
AG
1921 if (connection->cstate >= C_WF_REPORT_PARAMS)
1922 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1923 }
1924 }
b411b363 1925
8c0785a5 1926 do {
b411b363 1927 while (!list_empty(&work_list)) {
84b8c06b
AG
1928 dw = list_first_entry(&work_list, struct drbd_device_work, w.list);
1929 list_del_init(&dw->w.list);
1930 dw->w.cb(&dw->w, 1);
b411b363 1931 }
bde89a9e 1932 dequeue_work_batch(&connection->sender_work, &work_list);
8c0785a5 1933 } while (!list_empty(&work_list));
b411b363 1934
c141ebda 1935 rcu_read_lock();
c06ece6b
AG
1936 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1937 struct drbd_device *device = peer_device->device;
0b0ba1ef 1938 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
b30ab791 1939 kref_get(&device->kref);
c141ebda 1940 rcu_read_unlock();
b30ab791 1941 drbd_device_cleanup(device);
05a10ec7 1942 kref_put(&device->kref, drbd_destroy_device);
c141ebda 1943 rcu_read_lock();
0e29d163 1944 }
c141ebda 1945 rcu_read_unlock();
b411b363
PR
1946
1947 return 0;
1948}
This page took 0.48722 seconds and 5 git commands to generate.