drbd: mutex_unlock "... must no be used in interrupt context"
[deliverable/linux.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
b411b363 40
00d56944 41static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
42
43
c5a91619
AG
44/* endio handlers:
45 * drbd_md_io_complete (defined here)
fcefa62e
AG
46 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
c5a91619
AG
48 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
b411b363
PR
50 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
95f8efd0 60 to evaluate the resync after dependencies, we grab a write lock, because
b411b363
PR
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
cdfda633 70 struct drbd_conf *mdev;
b411b363
PR
71
72 md_io = (struct drbd_md_io *)bio->bi_private;
cdfda633
PR
73 mdev = container_of(md_io, struct drbd_conf, md_io);
74
b411b363
PR
75 md_io->error = error;
76
0cfac5dd
PR
77 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
78 * to timeout on the lower level device, and eventually detach from it.
79 * If this io completion runs after that timeout expired, this
80 * drbd_md_put_buffer() may allow us to finally try and re-attach.
81 * During normal operation, this only puts that extra reference
82 * down to 1 again.
83 * Make sure we first drop the reference, and only then signal
84 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
85 * next drbd_md_sync_page_io(), that we trigger the
86 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
87 */
88 drbd_md_put_buffer(mdev);
cdfda633
PR
89 md_io->done = 1;
90 wake_up(&mdev->misc_wait);
91 bio_put(bio);
cdfda633 92 put_ldev(mdev);
b411b363
PR
93}
94
95/* reads on behalf of the partner,
96 * "submitted" by the receiver
97 */
db830c46 98void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
99{
100 unsigned long flags = 0;
a21e9298 101 struct drbd_conf *mdev = peer_req->w.mdev;
b411b363 102
87eeee41 103 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
104 mdev->read_cnt += peer_req->i.size >> 9;
105 list_del(&peer_req->w.list);
b411b363
PR
106 if (list_empty(&mdev->read_ee))
107 wake_up(&mdev->ee_wait);
db830c46 108 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
0c849666 109 __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
87eeee41 110 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 111
d5b27b01 112 drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
b411b363 113 put_ldev(mdev);
b411b363
PR
114}
115
116/* writes on behalf of the partner, or resync writes,
45bb912b 117 * "submitted" by the receiver, final stage. */
db830c46 118static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
119{
120 unsigned long flags = 0;
a21e9298 121 struct drbd_conf *mdev = peer_req->w.mdev;
181286ad 122 struct drbd_interval i;
b411b363 123 int do_wake;
579b57ed 124 u64 block_id;
b411b363 125 int do_al_complete_io;
b411b363 126
db830c46 127 /* after we moved peer_req to done_ee,
b411b363
PR
128 * we may no longer access it,
129 * it may be freed/reused already!
130 * (as soon as we release the req_lock) */
181286ad 131 i = peer_req->i;
db830c46
AG
132 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
133 block_id = peer_req->block_id;
b411b363 134
87eeee41 135 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
136 mdev->writ_cnt += peer_req->i.size >> 9;
137 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
138 list_add_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 139
bb3bfe96 140 /*
5e472264 141 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
142 * Ack yet and did not wake possibly waiting conflicting requests.
143 * Removed from the tree from "drbd_process_done_ee" within the
144 * appropriate w.cb (e_end_block/e_end_resync_block) or from
145 * _drbd_clear_done_ee.
146 */
b411b363 147
579b57ed 148 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 149
db830c46 150 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
0c849666 151 __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
87eeee41 152 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 153
579b57ed 154 if (block_id == ID_SYNCER)
181286ad 155 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
156
157 if (do_wake)
158 wake_up(&mdev->ee_wait);
159
160 if (do_al_complete_io)
181286ad 161 drbd_al_complete_io(mdev, &i);
b411b363 162
0625ac19 163 wake_asender(mdev->tconn);
b411b363 164 put_ldev(mdev);
45bb912b 165}
b411b363 166
45bb912b
LE
167/* writes on behalf of the partner, or resync writes,
168 * "submitted" by the receiver.
169 */
fcefa62e 170void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 171{
db830c46 172 struct drbd_peer_request *peer_req = bio->bi_private;
a21e9298 173 struct drbd_conf *mdev = peer_req->w.mdev;
45bb912b
LE
174 int uptodate = bio_flagged(bio, BIO_UPTODATE);
175 int is_write = bio_data_dir(bio) == WRITE;
176
07194272 177 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
178 dev_warn(DEV, "%s: error=%d s=%llus\n",
179 is_write ? "write" : "read", error,
db830c46 180 (unsigned long long)peer_req->i.sector);
45bb912b 181 if (!error && !uptodate) {
07194272
LE
182 if (__ratelimit(&drbd_ratelimit_state))
183 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
184 is_write ? "write" : "read",
db830c46 185 (unsigned long long)peer_req->i.sector);
45bb912b
LE
186 /* strange behavior of some lower level drivers...
187 * fail the request by clearing the uptodate flag,
188 * but do not return any error?! */
189 error = -EIO;
190 }
191
192 if (error)
db830c46 193 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
194
195 bio_put(bio); /* no need for the bio anymore */
db830c46 196 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 197 if (is_write)
db830c46 198 drbd_endio_write_sec_final(peer_req);
45bb912b 199 else
db830c46 200 drbd_endio_read_sec_final(peer_req);
45bb912b 201 }
b411b363
PR
202}
203
204/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
205 */
fcefa62e 206void drbd_request_endio(struct bio *bio, int error)
b411b363 207{
a115413d 208 unsigned long flags;
b411b363 209 struct drbd_request *req = bio->bi_private;
a21e9298 210 struct drbd_conf *mdev = req->w.mdev;
a115413d 211 struct bio_and_error m;
b411b363
PR
212 enum drbd_req_event what;
213 int uptodate = bio_flagged(bio, BIO_UPTODATE);
214
b411b363
PR
215 if (!error && !uptodate) {
216 dev_warn(DEV, "p %s: setting error to -EIO\n",
217 bio_data_dir(bio) == WRITE ? "write" : "read");
218 /* strange behavior of some lower level drivers...
219 * fail the request by clearing the uptodate flag,
220 * but do not return any error?! */
221 error = -EIO;
222 }
223
b411b363
PR
224 /* to avoid recursion in __req_mod */
225 if (unlikely(error)) {
226 what = (bio_data_dir(bio) == WRITE)
8554df1c 227 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 228 : (bio_rw(bio) == READ)
8554df1c
AG
229 ? READ_COMPLETED_WITH_ERROR
230 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 231 } else
8554df1c 232 what = COMPLETED_OK;
b411b363
PR
233
234 bio_put(req->private_bio);
235 req->private_bio = ERR_PTR(error);
236
a115413d 237 /* not req_mod(), we need irqsave here! */
87eeee41 238 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 239 __req_mod(req, what, &m);
87eeee41 240 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
2415308e 241 put_ldev(mdev);
a115413d
LE
242
243 if (m.bio)
244 complete_master_bio(mdev, &m);
b411b363
PR
245}
246
f6ffca9f 247void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
db830c46 248 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
249{
250 struct hash_desc desc;
251 struct scatterlist sg;
db830c46 252 struct page *page = peer_req->pages;
45bb912b
LE
253 struct page *tmp;
254 unsigned len;
255
256 desc.tfm = tfm;
257 desc.flags = 0;
258
259 sg_init_table(&sg, 1);
260 crypto_hash_init(&desc);
261
262 while ((tmp = page_chain_next(page))) {
263 /* all but the last page will be fully used */
264 sg_set_page(&sg, page, PAGE_SIZE, 0);
265 crypto_hash_update(&desc, &sg, sg.length);
266 page = tmp;
267 }
268 /* and now the last, possibly only partially used page */
db830c46 269 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
270 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
271 crypto_hash_update(&desc, &sg, sg.length);
272 crypto_hash_final(&desc, digest);
273}
274
275void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
276{
277 struct hash_desc desc;
278 struct scatterlist sg;
279 struct bio_vec *bvec;
280 int i;
281
282 desc.tfm = tfm;
283 desc.flags = 0;
284
285 sg_init_table(&sg, 1);
286 crypto_hash_init(&desc);
287
4b8514ee 288 bio_for_each_segment(bvec, bio, i) {
b411b363
PR
289 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
290 crypto_hash_update(&desc, &sg, sg.length);
291 }
292 crypto_hash_final(&desc, digest);
293}
294
9676c760 295/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 296static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 297{
00d56944
PR
298 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
299 struct drbd_conf *mdev = w->mdev;
b411b363
PR
300 int digest_size;
301 void *digest;
99920dc5 302 int err = 0;
b411b363 303
53ea4331
LE
304 if (unlikely(cancel))
305 goto out;
b411b363 306
9676c760 307 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 308 goto out;
b411b363 309
f399002e 310 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
311 digest = kmalloc(digest_size, GFP_NOIO);
312 if (digest) {
db830c46
AG
313 sector_t sector = peer_req->i.sector;
314 unsigned int size = peer_req->i.size;
f399002e 315 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 316 /* Free peer_req and pages before send.
53ea4331
LE
317 * In case we block on congestion, we could otherwise run into
318 * some distributed deadlock, if the other side blocks on
319 * congestion as well, because our receiver blocks in
c37c8ecf 320 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 321 drbd_free_peer_req(mdev, peer_req);
db830c46 322 peer_req = NULL;
53ea4331 323 inc_rs_pending(mdev);
99920dc5 324 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
325 digest, digest_size,
326 P_CSUM_RS_REQUEST);
53ea4331
LE
327 kfree(digest);
328 } else {
329 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 330 err = -ENOMEM;
53ea4331 331 }
b411b363 332
53ea4331 333out:
db830c46 334 if (peer_req)
3967deb1 335 drbd_free_peer_req(mdev, peer_req);
b411b363 336
99920dc5 337 if (unlikely(err))
b411b363 338 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 339 return err;
b411b363
PR
340}
341
342#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
343
344static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
345{
db830c46 346 struct drbd_peer_request *peer_req;
b411b363
PR
347
348 if (!get_ldev(mdev))
80a40e43 349 return -EIO;
b411b363 350
e3555d85 351 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
352 goto defer;
353
b411b363
PR
354 /* GFP_TRY, because if there is no memory available right now, this may
355 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
356 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
357 size, GFP_TRY);
db830c46 358 if (!peer_req)
80a40e43 359 goto defer;
b411b363 360
db830c46 361 peer_req->w.cb = w_e_send_csum;
87eeee41 362 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 363 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 364 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 365
0f0601f4 366 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 367 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 368 return 0;
b411b363 369
10f6d992
LE
370 /* If it failed because of ENOMEM, retry should help. If it failed
371 * because bio_add_page failed (probably broken lower level driver),
372 * retry may or may not help.
373 * If it does not, you may need to force disconnect. */
87eeee41 374 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 375 list_del(&peer_req->w.list);
87eeee41 376 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 377
3967deb1 378 drbd_free_peer_req(mdev, peer_req);
80a40e43 379defer:
45bb912b 380 put_ldev(mdev);
80a40e43 381 return -EAGAIN;
b411b363
PR
382}
383
99920dc5 384int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 385{
00d56944 386 struct drbd_conf *mdev = w->mdev;
63106d3c
PR
387 switch (mdev->state.conn) {
388 case C_VERIFY_S:
00d56944 389 w_make_ov_request(w, cancel);
63106d3c
PR
390 break;
391 case C_SYNC_TARGET:
00d56944 392 w_make_resync_request(w, cancel);
63106d3c 393 break;
b411b363
PR
394 }
395
99920dc5 396 return 0;
794abb75
PR
397}
398
399void resync_timer_fn(unsigned long data)
400{
401 struct drbd_conf *mdev = (struct drbd_conf *) data;
402
403 if (list_empty(&mdev->resync_work.list))
d5b27b01 404 drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work);
b411b363
PR
405}
406
778f271d
PR
407static void fifo_set(struct fifo_buffer *fb, int value)
408{
409 int i;
410
411 for (i = 0; i < fb->size; i++)
f10f2623 412 fb->values[i] = value;
778f271d
PR
413}
414
415static int fifo_push(struct fifo_buffer *fb, int value)
416{
417 int ov;
418
419 ov = fb->values[fb->head_index];
420 fb->values[fb->head_index++] = value;
421
422 if (fb->head_index >= fb->size)
423 fb->head_index = 0;
424
425 return ov;
426}
427
428static void fifo_add_val(struct fifo_buffer *fb, int value)
429{
430 int i;
431
432 for (i = 0; i < fb->size; i++)
433 fb->values[i] += value;
434}
435
9958c857
PR
436struct fifo_buffer *fifo_alloc(int fifo_size)
437{
438 struct fifo_buffer *fb;
439
440 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
441 if (!fb)
442 return NULL;
443
444 fb->head_index = 0;
445 fb->size = fifo_size;
446 fb->total = 0;
447
448 return fb;
449}
450
9d77a5fe 451static int drbd_rs_controller(struct drbd_conf *mdev)
778f271d 452{
daeda1cc 453 struct disk_conf *dc;
778f271d
PR
454 unsigned int sect_in; /* Number of sectors that came in since the last turn */
455 unsigned int want; /* The number of sectors we want in the proxy */
456 int req_sect; /* Number of sectors to request in this turn */
457 int correction; /* Number of sectors more we need in the proxy*/
458 int cps; /* correction per invocation of drbd_rs_controller() */
459 int steps; /* Number of time steps to plan ahead */
460 int curr_corr;
461 int max_sect;
813472ce 462 struct fifo_buffer *plan;
778f271d
PR
463
464 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
465 mdev->rs_in_flight -= sect_in;
466
daeda1cc 467 dc = rcu_dereference(mdev->ldev->disk_conf);
813472ce 468 plan = rcu_dereference(mdev->rs_plan_s);
778f271d 469
813472ce 470 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
471
472 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 473 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 474 } else { /* normal path */
daeda1cc
PR
475 want = dc->c_fill_target ? dc->c_fill_target :
476 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
477 }
478
813472ce 479 correction = want - mdev->rs_in_flight - plan->total;
778f271d
PR
480
481 /* Plan ahead */
482 cps = correction / steps;
813472ce
PR
483 fifo_add_val(plan, cps);
484 plan->total += cps * steps;
778f271d
PR
485
486 /* What we do in this step */
813472ce
PR
487 curr_corr = fifo_push(plan, 0);
488 plan->total -= curr_corr;
778f271d
PR
489
490 req_sect = sect_in + curr_corr;
491 if (req_sect < 0)
492 req_sect = 0;
493
daeda1cc 494 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
495 if (req_sect > max_sect)
496 req_sect = max_sect;
497
498 /*
499 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
500 sect_in, mdev->rs_in_flight, want, correction,
501 steps, cps, mdev->rs_planed, curr_corr, req_sect);
502 */
503
504 return req_sect;
505}
506
9d77a5fe 507static int drbd_rs_number_requests(struct drbd_conf *mdev)
e65f440d
LE
508{
509 int number;
813472ce
PR
510
511 rcu_read_lock();
512 if (rcu_dereference(mdev->rs_plan_s)->size) {
e65f440d
LE
513 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
514 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
515 } else {
daeda1cc 516 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
e65f440d
LE
517 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
518 }
813472ce 519 rcu_read_unlock();
e65f440d 520
e65f440d
LE
521 /* ignore the amount of pending requests, the resync controller should
522 * throttle down to incoming reply rate soon enough anyways. */
523 return number;
524}
525
99920dc5 526int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 527{
00d56944 528 struct drbd_conf *mdev = w->mdev;
b411b363
PR
529 unsigned long bit;
530 sector_t sector;
531 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 532 int max_bio_size;
e65f440d 533 int number, rollback_i, size;
b411b363 534 int align, queued, sndbuf;
0f0601f4 535 int i = 0;
b411b363
PR
536
537 if (unlikely(cancel))
99920dc5 538 return 0;
b411b363 539
af85e8e8
LE
540 if (mdev->rs_total == 0) {
541 /* empty resync? */
542 drbd_resync_finished(mdev);
99920dc5 543 return 0;
af85e8e8
LE
544 }
545
b411b363
PR
546 if (!get_ldev(mdev)) {
547 /* Since we only need to access mdev->rsync a
548 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
549 to continue resync with a broken disk makes no sense at
550 all */
551 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 552 return 0;
b411b363
PR
553 }
554
0cfdd247 555 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
556 number = drbd_rs_number_requests(mdev);
557 if (number == 0)
0f0601f4 558 goto requeue;
b411b363 559
b411b363
PR
560 for (i = 0; i < number; i++) {
561 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
562 mutex_lock(&mdev->tconn->data.mutex);
563 if (mdev->tconn->data.socket) {
564 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
565 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
566 } else {
567 queued = 1;
568 sndbuf = 0;
569 }
e42325a5 570 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
571 if (queued > sndbuf / 2)
572 goto requeue;
573
574next_sector:
575 size = BM_BLOCK_SIZE;
576 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
577
4b0715f0 578 if (bit == DRBD_END_OF_BITMAP) {
b411b363 579 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 580 put_ldev(mdev);
99920dc5 581 return 0;
b411b363
PR
582 }
583
584 sector = BM_BIT_TO_SECT(bit);
585
e3555d85
PR
586 if (drbd_rs_should_slow_down(mdev, sector) ||
587 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
588 mdev->bm_resync_fo = bit;
589 goto requeue;
590 }
591 mdev->bm_resync_fo = bit + 1;
592
593 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
594 drbd_rs_complete_io(mdev, sector);
595 goto next_sector;
596 }
597
1816a2b4 598#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
599 /* try to find some adjacent bits.
600 * we stop if we have already the maximum req size.
601 *
602 * Additionally always align bigger requests, in order to
603 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
604 */
605 align = 1;
d207450c 606 rollback_i = i;
b411b363 607 for (;;) {
1816a2b4 608 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
609 break;
610
611 /* Be always aligned */
612 if (sector & ((1<<(align+3))-1))
613 break;
614
615 /* do not cross extent boundaries */
616 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
617 break;
618 /* now, is it actually dirty, after all?
619 * caution, drbd_bm_test_bit is tri-state for some
620 * obscure reason; ( b == 0 ) would get the out-of-band
621 * only accidentally right because of the "oddly sized"
622 * adjustment below */
623 if (drbd_bm_test_bit(mdev, bit+1) != 1)
624 break;
625 bit++;
626 size += BM_BLOCK_SIZE;
627 if ((BM_BLOCK_SIZE << align) <= size)
628 align++;
629 i++;
630 }
631 /* if we merged some,
632 * reset the offset to start the next drbd_bm_find_next from */
633 if (size > BM_BLOCK_SIZE)
634 mdev->bm_resync_fo = bit + 1;
635#endif
636
637 /* adjust very last sectors, in case we are oddly sized */
638 if (sector + (size>>9) > capacity)
639 size = (capacity-sector)<<9;
f399002e 640 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 641 switch (read_for_csum(mdev, sector, size)) {
80a40e43 642 case -EIO: /* Disk failure */
b411b363 643 put_ldev(mdev);
99920dc5 644 return -EIO;
80a40e43 645 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
646 drbd_rs_complete_io(mdev, sector);
647 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 648 i = rollback_i;
b411b363 649 goto requeue;
80a40e43
LE
650 case 0:
651 /* everything ok */
652 break;
653 default:
654 BUG();
b411b363
PR
655 }
656 } else {
99920dc5
AG
657 int err;
658
b411b363 659 inc_rs_pending(mdev);
99920dc5
AG
660 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
661 sector, size, ID_SYNCER);
662 if (err) {
b411b363
PR
663 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
664 dec_rs_pending(mdev);
665 put_ldev(mdev);
99920dc5 666 return err;
b411b363
PR
667 }
668 }
669 }
670
671 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
672 /* last syncer _request_ was sent,
673 * but the P_RS_DATA_REPLY not yet received. sync will end (and
674 * next sync group will resume), as soon as we receive the last
675 * resync data block, and the last bit is cleared.
676 * until then resync "work" is "inactive" ...
677 */
b411b363 678 put_ldev(mdev);
99920dc5 679 return 0;
b411b363
PR
680 }
681
682 requeue:
778f271d 683 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
684 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
685 put_ldev(mdev);
99920dc5 686 return 0;
b411b363
PR
687}
688
00d56944 689static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 690{
00d56944 691 struct drbd_conf *mdev = w->mdev;
b411b363
PR
692 int number, i, size;
693 sector_t sector;
694 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
58ffa580 695 bool stop_sector_reached = false;
b411b363
PR
696
697 if (unlikely(cancel))
698 return 1;
699
2649f080 700 number = drbd_rs_number_requests(mdev);
b411b363
PR
701
702 sector = mdev->ov_position;
703 for (i = 0; i < number; i++) {
58ffa580 704 if (sector >= capacity)
b411b363 705 return 1;
58ffa580
LE
706
707 /* We check for "finished" only in the reply path:
708 * w_e_end_ov_reply().
709 * We need to send at least one request out. */
710 stop_sector_reached = i > 0
711 && verify_can_do_stop_sector(mdev)
712 && sector >= mdev->ov_stop_sector;
713 if (stop_sector_reached)
714 break;
b411b363
PR
715
716 size = BM_BLOCK_SIZE;
717
e3555d85
PR
718 if (drbd_rs_should_slow_down(mdev, sector) ||
719 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
720 mdev->ov_position = sector;
721 goto requeue;
722 }
723
724 if (sector + (size>>9) > capacity)
725 size = (capacity-sector)<<9;
726
727 inc_rs_pending(mdev);
5b9f499c 728 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
729 dec_rs_pending(mdev);
730 return 0;
731 }
732 sector += BM_SECT_PER_BIT;
733 }
734 mdev->ov_position = sector;
735
736 requeue:
2649f080 737 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
58ffa580
LE
738 if (i == 0 || !stop_sector_reached)
739 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
b411b363
PR
740 return 1;
741}
742
99920dc5 743int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 744{
00d56944 745 struct drbd_conf *mdev = w->mdev;
b411b363 746 kfree(w);
8f7bed77 747 ov_out_of_sync_print(mdev);
b411b363
PR
748 drbd_resync_finished(mdev);
749
99920dc5 750 return 0;
b411b363
PR
751}
752
99920dc5 753static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 754{
00d56944 755 struct drbd_conf *mdev = w->mdev;
b411b363
PR
756 kfree(w);
757
758 drbd_resync_finished(mdev);
759
99920dc5 760 return 0;
b411b363
PR
761}
762
af85e8e8
LE
763static void ping_peer(struct drbd_conf *mdev)
764{
2a67d8b9
PR
765 struct drbd_tconn *tconn = mdev->tconn;
766
767 clear_bit(GOT_PING_ACK, &tconn->flags);
768 request_ping(tconn);
769 wait_event(tconn->ping_wait,
770 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
771}
772
b411b363
PR
773int drbd_resync_finished(struct drbd_conf *mdev)
774{
775 unsigned long db, dt, dbdt;
776 unsigned long n_oos;
777 union drbd_state os, ns;
778 struct drbd_work *w;
779 char *khelper_cmd = NULL;
26525618 780 int verify_done = 0;
b411b363
PR
781
782 /* Remove all elements from the resync LRU. Since future actions
783 * might set bits in the (main) bitmap, then the entries in the
784 * resync LRU would be wrong. */
785 if (drbd_rs_del_all(mdev)) {
786 /* In case this is not possible now, most probably because
787 * there are P_RS_DATA_REPLY Packets lingering on the worker's
788 * queue (or even the read operations for those packets
789 * is not finished by now). Retry in 100ms. */
790
20ee6390 791 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
792 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
793 if (w) {
794 w->cb = w_resync_finished;
9b743da9 795 w->mdev = mdev;
d5b27b01 796 drbd_queue_work(&mdev->tconn->sender_work, w);
b411b363
PR
797 return 1;
798 }
799 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
800 }
801
802 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
803 if (dt <= 0)
804 dt = 1;
58ffa580 805
b411b363 806 db = mdev->rs_total;
58ffa580
LE
807 /* adjust for verify start and stop sectors, respective reached position */
808 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
809 db -= mdev->ov_left;
810
b411b363
PR
811 dbdt = Bit2KB(db/dt);
812 mdev->rs_paused /= HZ;
813
814 if (!get_ldev(mdev))
815 goto out;
816
af85e8e8
LE
817 ping_peer(mdev);
818
87eeee41 819 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 820 os = drbd_read_state(mdev);
b411b363 821
26525618
LE
822 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
823
b411b363
PR
824 /* This protects us against multiple calls (that can happen in the presence
825 of application IO), and against connectivity loss just before we arrive here. */
826 if (os.conn <= C_CONNECTED)
827 goto out_unlock;
828
829 ns = os;
830 ns.conn = C_CONNECTED;
831
832 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
58ffa580 833 verify_done ? "Online verify" : "Resync",
b411b363
PR
834 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
835
836 n_oos = drbd_bm_total_weight(mdev);
837
838 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
839 if (n_oos) {
840 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
841 n_oos, Bit2KB(1));
842 khelper_cmd = "out-of-sync";
843 }
844 } else {
845 D_ASSERT((n_oos - mdev->rs_failed) == 0);
846
847 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
848 khelper_cmd = "after-resync-target";
849
f399002e 850 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
851 const unsigned long s = mdev->rs_same_csum;
852 const unsigned long t = mdev->rs_total;
853 const int ratio =
854 (t == 0) ? 0 :
855 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 856 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
857 "transferred %luK total %luK\n",
858 ratio,
859 Bit2KB(mdev->rs_same_csum),
860 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
861 Bit2KB(mdev->rs_total));
862 }
863 }
864
865 if (mdev->rs_failed) {
866 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
867
868 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
869 ns.disk = D_INCONSISTENT;
870 ns.pdsk = D_UP_TO_DATE;
871 } else {
872 ns.disk = D_UP_TO_DATE;
873 ns.pdsk = D_INCONSISTENT;
874 }
875 } else {
876 ns.disk = D_UP_TO_DATE;
877 ns.pdsk = D_UP_TO_DATE;
878
879 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
880 if (mdev->p_uuid) {
881 int i;
882 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
883 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
884 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
885 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
886 } else {
887 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
888 }
889 }
890
62b0da3a
LE
891 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
892 /* for verify runs, we don't update uuids here,
893 * so there would be nothing to report. */
894 drbd_uuid_set_bm(mdev, 0UL);
895 drbd_print_uuids(mdev, "updated UUIDs");
896 if (mdev->p_uuid) {
897 /* Now the two UUID sets are equal, update what we
898 * know of the peer. */
899 int i;
900 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
901 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
902 }
b411b363
PR
903 }
904 }
905
906 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
907out_unlock:
87eeee41 908 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
909 put_ldev(mdev);
910out:
911 mdev->rs_total = 0;
912 mdev->rs_failed = 0;
913 mdev->rs_paused = 0;
58ffa580
LE
914
915 /* reset start sector, if we reached end of device */
916 if (verify_done && mdev->ov_left == 0)
26525618 917 mdev->ov_start_sector = 0;
b411b363 918
13d42685
LE
919 drbd_md_sync(mdev);
920
b411b363
PR
921 if (khelper_cmd)
922 drbd_khelper(mdev, khelper_cmd);
923
924 return 1;
925}
926
927/* helper */
db830c46 928static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
b411b363 929{
045417f7 930 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 931 /* This might happen if sendpage() has not finished */
db830c46 932 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
933 atomic_add(i, &mdev->pp_in_use_by_net);
934 atomic_sub(i, &mdev->pp_in_use);
87eeee41 935 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 936 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 937 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 938 wake_up(&drbd_pp_wait);
b411b363 939 } else
3967deb1 940 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
941}
942
943/**
944 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
945 * @mdev: DRBD device.
946 * @w: work object.
947 * @cancel: The connection will be closed anyways
948 */
99920dc5 949int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 950{
db830c46 951 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 952 struct drbd_conf *mdev = w->mdev;
99920dc5 953 int err;
b411b363
PR
954
955 if (unlikely(cancel)) {
3967deb1 956 drbd_free_peer_req(mdev, peer_req);
b411b363 957 dec_unacked(mdev);
99920dc5 958 return 0;
b411b363
PR
959 }
960
db830c46 961 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 962 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
963 } else {
964 if (__ratelimit(&drbd_ratelimit_state))
965 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 966 (unsigned long long)peer_req->i.sector);
b411b363 967
99920dc5 968 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
969 }
970
971 dec_unacked(mdev);
972
db830c46 973 move_to_net_ee_or_free(mdev, peer_req);
b411b363 974
99920dc5 975 if (unlikely(err))
b411b363 976 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 977 return err;
b411b363
PR
978}
979
980/**
a209b4ae 981 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
982 * @mdev: DRBD device.
983 * @w: work object.
984 * @cancel: The connection will be closed anyways
985 */
99920dc5 986int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 987{
db830c46 988 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 989 struct drbd_conf *mdev = w->mdev;
99920dc5 990 int err;
b411b363
PR
991
992 if (unlikely(cancel)) {
3967deb1 993 drbd_free_peer_req(mdev, peer_req);
b411b363 994 dec_unacked(mdev);
99920dc5 995 return 0;
b411b363
PR
996 }
997
998 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 999 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
1000 put_ldev(mdev);
1001 }
1002
d612d309 1003 if (mdev->state.conn == C_AHEAD) {
99920dc5 1004 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 1005 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1006 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
1007 inc_rs_pending(mdev);
99920dc5 1008 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1009 } else {
1010 if (__ratelimit(&drbd_ratelimit_state))
1011 dev_err(DEV, "Not sending RSDataReply, "
1012 "partner DISKLESS!\n");
99920dc5 1013 err = 0;
b411b363
PR
1014 }
1015 } else {
1016 if (__ratelimit(&drbd_ratelimit_state))
1017 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1018 (unsigned long long)peer_req->i.sector);
b411b363 1019
99920dc5 1020 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1021
1022 /* update resync data with failure */
db830c46 1023 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1024 }
1025
1026 dec_unacked(mdev);
1027
db830c46 1028 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1029
99920dc5 1030 if (unlikely(err))
b411b363 1031 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1032 return err;
b411b363
PR
1033}
1034
99920dc5 1035int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1036{
db830c46 1037 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1038 struct drbd_conf *mdev = w->mdev;
b411b363
PR
1039 struct digest_info *di;
1040 int digest_size;
1041 void *digest = NULL;
99920dc5 1042 int err, eq = 0;
b411b363
PR
1043
1044 if (unlikely(cancel)) {
3967deb1 1045 drbd_free_peer_req(mdev, peer_req);
b411b363 1046 dec_unacked(mdev);
99920dc5 1047 return 0;
b411b363
PR
1048 }
1049
1d53f09e 1050 if (get_ldev(mdev)) {
db830c46 1051 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1052 put_ldev(mdev);
1053 }
b411b363 1054
db830c46 1055 di = peer_req->digest;
b411b363 1056
db830c46 1057 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1058 /* quick hack to try to avoid a race against reconfiguration.
1059 * a real fix would be much more involved,
1060 * introducing more locking mechanisms */
f399002e
LE
1061 if (mdev->tconn->csums_tfm) {
1062 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1063 D_ASSERT(digest_size == di->digest_size);
1064 digest = kmalloc(digest_size, GFP_NOIO);
1065 }
1066 if (digest) {
f399002e 1067 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1068 eq = !memcmp(digest, di->digest, digest_size);
1069 kfree(digest);
1070 }
1071
1072 if (eq) {
db830c46 1073 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1074 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1075 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1076 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1077 } else {
1078 inc_rs_pending(mdev);
db830c46
AG
1079 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1080 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1081 kfree(di);
99920dc5 1082 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1083 }
1084 } else {
99920dc5 1085 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1086 if (__ratelimit(&drbd_ratelimit_state))
1087 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1088 }
1089
1090 dec_unacked(mdev);
db830c46 1091 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1092
99920dc5 1093 if (unlikely(err))
b411b363 1094 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1095 return err;
b411b363
PR
1096}
1097
99920dc5 1098int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1099{
db830c46 1100 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1101 struct drbd_conf *mdev = w->mdev;
db830c46
AG
1102 sector_t sector = peer_req->i.sector;
1103 unsigned int size = peer_req->i.size;
b411b363
PR
1104 int digest_size;
1105 void *digest;
99920dc5 1106 int err = 0;
b411b363
PR
1107
1108 if (unlikely(cancel))
1109 goto out;
1110
f399002e 1111 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1112 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1113 if (!digest) {
99920dc5 1114 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1115 goto out;
b411b363
PR
1116 }
1117
db830c46 1118 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1119 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1120 else
1121 memset(digest, 0, digest_size);
1122
53ea4331
LE
1123 /* Free e and pages before send.
1124 * In case we block on congestion, we could otherwise run into
1125 * some distributed deadlock, if the other side blocks on
1126 * congestion as well, because our receiver blocks in
c37c8ecf 1127 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1128 drbd_free_peer_req(mdev, peer_req);
db830c46 1129 peer_req = NULL;
8f21420e 1130 inc_rs_pending(mdev);
99920dc5
AG
1131 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1132 if (err)
8f21420e
PR
1133 dec_rs_pending(mdev);
1134 kfree(digest);
1135
b411b363 1136out:
db830c46 1137 if (peer_req)
3967deb1 1138 drbd_free_peer_req(mdev, peer_req);
b411b363 1139 dec_unacked(mdev);
99920dc5 1140 return err;
b411b363
PR
1141}
1142
8f7bed77 1143void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
1144{
1145 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1146 mdev->ov_last_oos_size += size>>9;
1147 } else {
1148 mdev->ov_last_oos_start = sector;
1149 mdev->ov_last_oos_size = size>>9;
1150 }
1151 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1152}
1153
99920dc5 1154int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1155{
db830c46 1156 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1157 struct drbd_conf *mdev = w->mdev;
b411b363 1158 struct digest_info *di;
b411b363 1159 void *digest;
db830c46
AG
1160 sector_t sector = peer_req->i.sector;
1161 unsigned int size = peer_req->i.size;
53ea4331 1162 int digest_size;
99920dc5 1163 int err, eq = 0;
58ffa580 1164 bool stop_sector_reached = false;
b411b363
PR
1165
1166 if (unlikely(cancel)) {
3967deb1 1167 drbd_free_peer_req(mdev, peer_req);
b411b363 1168 dec_unacked(mdev);
99920dc5 1169 return 0;
b411b363
PR
1170 }
1171
1172 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1173 * the resync lru has been cleaned up already */
1d53f09e 1174 if (get_ldev(mdev)) {
db830c46 1175 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1176 put_ldev(mdev);
1177 }
b411b363 1178
db830c46 1179 di = peer_req->digest;
b411b363 1180
db830c46 1181 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1182 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1183 digest = kmalloc(digest_size, GFP_NOIO);
1184 if (digest) {
f399002e 1185 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1186
1187 D_ASSERT(digest_size == di->digest_size);
1188 eq = !memcmp(digest, di->digest, digest_size);
1189 kfree(digest);
1190 }
b411b363
PR
1191 }
1192
9676c760
LE
1193 /* Free peer_req and pages before send.
1194 * In case we block on congestion, we could otherwise run into
1195 * some distributed deadlock, if the other side blocks on
1196 * congestion as well, because our receiver blocks in
c37c8ecf 1197 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1198 drbd_free_peer_req(mdev, peer_req);
b411b363 1199 if (!eq)
8f7bed77 1200 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1201 else
8f7bed77 1202 ov_out_of_sync_print(mdev);
b411b363 1203
99920dc5 1204 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1205 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1206
53ea4331 1207 dec_unacked(mdev);
b411b363 1208
ea5442af
LE
1209 --mdev->ov_left;
1210
1211 /* let's advance progress step marks only for every other megabyte */
1212 if ((mdev->ov_left & 0x200) == 0x200)
1213 drbd_advance_rs_marks(mdev, mdev->ov_left);
1214
58ffa580
LE
1215 stop_sector_reached = verify_can_do_stop_sector(mdev) &&
1216 (sector + (size>>9)) >= mdev->ov_stop_sector;
1217
1218 if (mdev->ov_left == 0 || stop_sector_reached) {
8f7bed77 1219 ov_out_of_sync_print(mdev);
b411b363
PR
1220 drbd_resync_finished(mdev);
1221 }
1222
99920dc5 1223 return err;
b411b363
PR
1224}
1225
99920dc5 1226int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1227{
1228 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1229
b411b363 1230 complete(&b->done);
99920dc5 1231 return 0;
b411b363
PR
1232}
1233
b6dd1a89
LE
1234/* FIXME
1235 * We need to track the number of pending barrier acks,
1236 * and to be able to wait for them.
1237 * See also comment in drbd_adm_attach before drbd_suspend_io.
1238 */
1239int drbd_send_barrier(struct drbd_tconn *tconn)
b411b363 1240{
9f5bdc33 1241 struct p_barrier *p;
b6dd1a89 1242 struct drbd_socket *sock;
b411b363 1243
b6dd1a89
LE
1244 sock = &tconn->data;
1245 p = conn_prepare_command(tconn, sock);
9f5bdc33
AG
1246 if (!p)
1247 return -EIO;
b6dd1a89
LE
1248 p->barrier = tconn->send.current_epoch_nr;
1249 p->pad = 0;
1250 tconn->send.current_epoch_writes = 0;
1251
1252 return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1253}
1254
99920dc5 1255int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1256{
00d56944 1257 struct drbd_conf *mdev = w->mdev;
9f5bdc33
AG
1258 struct drbd_socket *sock;
1259
b411b363 1260 if (cancel)
99920dc5 1261 return 0;
9f5bdc33
AG
1262 sock = &mdev->tconn->data;
1263 if (!drbd_prepare_command(mdev, sock))
1264 return -EIO;
e658983a 1265 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1266}
1267
4eb9b3cb
LE
1268static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
1269{
1270 if (!tconn->send.seen_any_write_yet) {
1271 tconn->send.seen_any_write_yet = true;
1272 tconn->send.current_epoch_nr = epoch;
1273 tconn->send.current_epoch_writes = 0;
1274 }
1275}
1276
1277static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
1278{
1279 /* re-init if first write on this connection */
1280 if (!tconn->send.seen_any_write_yet)
1281 return;
1282 if (tconn->send.current_epoch_nr != epoch) {
1283 if (tconn->send.current_epoch_writes)
1284 drbd_send_barrier(tconn);
1285 tconn->send.current_epoch_nr = epoch;
1286 }
1287}
1288
8f7bed77 1289int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1290{
1291 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1292 struct drbd_conf *mdev = w->mdev;
b6dd1a89 1293 struct drbd_tconn *tconn = mdev->tconn;
99920dc5 1294 int err;
73a01a18
PR
1295
1296 if (unlikely(cancel)) {
8554df1c 1297 req_mod(req, SEND_CANCELED);
99920dc5 1298 return 0;
73a01a18
PR
1299 }
1300
b6dd1a89
LE
1301 /* this time, no tconn->send.current_epoch_writes++;
1302 * If it was sent, it was the closing barrier for the last
1303 * replicated epoch, before we went into AHEAD mode.
1304 * No more barriers will be sent, until we leave AHEAD mode again. */
4eb9b3cb 1305 maybe_send_barrier(tconn, req->epoch);
b6dd1a89 1306
8f7bed77 1307 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1308 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1309
99920dc5 1310 return err;
73a01a18
PR
1311}
1312
b411b363
PR
1313/**
1314 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1315 * @mdev: DRBD device.
1316 * @w: work object.
1317 * @cancel: The connection will be closed anyways
1318 */
99920dc5 1319int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1320{
1321 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1322 struct drbd_conf *mdev = w->mdev;
b6dd1a89 1323 struct drbd_tconn *tconn = mdev->tconn;
99920dc5 1324 int err;
b411b363
PR
1325
1326 if (unlikely(cancel)) {
8554df1c 1327 req_mod(req, SEND_CANCELED);
99920dc5 1328 return 0;
b411b363
PR
1329 }
1330
4eb9b3cb
LE
1331 re_init_if_first_write(tconn, req->epoch);
1332 maybe_send_barrier(tconn, req->epoch);
b6dd1a89
LE
1333 tconn->send.current_epoch_writes++;
1334
99920dc5
AG
1335 err = drbd_send_dblock(mdev, req);
1336 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1337
99920dc5 1338 return err;
b411b363
PR
1339}
1340
1341/**
1342 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1343 * @mdev: DRBD device.
1344 * @w: work object.
1345 * @cancel: The connection will be closed anyways
1346 */
99920dc5 1347int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1348{
1349 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1350 struct drbd_conf *mdev = w->mdev;
b6dd1a89 1351 struct drbd_tconn *tconn = mdev->tconn;
99920dc5 1352 int err;
b411b363
PR
1353
1354 if (unlikely(cancel)) {
8554df1c 1355 req_mod(req, SEND_CANCELED);
99920dc5 1356 return 0;
b411b363
PR
1357 }
1358
b6dd1a89
LE
1359 /* Even read requests may close a write epoch,
1360 * if there was any yet. */
4eb9b3cb 1361 maybe_send_barrier(tconn, req->epoch);
b6dd1a89 1362
99920dc5 1363 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1364 (unsigned long)req);
b411b363 1365
99920dc5 1366 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1367
99920dc5 1368 return err;
b411b363
PR
1369}
1370
99920dc5 1371int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1372{
1373 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1374 struct drbd_conf *mdev = w->mdev;
265be2d0 1375
0778286a 1376 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
181286ad 1377 drbd_al_begin_io(mdev, &req->i);
265be2d0
PR
1378
1379 drbd_req_make_private_bio(req, req->master_bio);
1380 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1381 generic_make_request(req->private_bio);
1382
99920dc5 1383 return 0;
265be2d0
PR
1384}
1385
b411b363
PR
1386static int _drbd_may_sync_now(struct drbd_conf *mdev)
1387{
1388 struct drbd_conf *odev = mdev;
95f8efd0 1389 int resync_after;
b411b363
PR
1390
1391 while (1) {
438c8374
PR
1392 if (!odev->ldev)
1393 return 1;
daeda1cc 1394 rcu_read_lock();
95f8efd0 1395 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1396 rcu_read_unlock();
95f8efd0 1397 if (resync_after == -1)
b411b363 1398 return 1;
95f8efd0 1399 odev = minor_to_mdev(resync_after);
841ce241
AG
1400 if (!expect(odev))
1401 return 1;
b411b363
PR
1402 if ((odev->state.conn >= C_SYNC_SOURCE &&
1403 odev->state.conn <= C_PAUSED_SYNC_T) ||
1404 odev->state.aftr_isp || odev->state.peer_isp ||
1405 odev->state.user_isp)
1406 return 0;
1407 }
1408}
1409
1410/**
1411 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1412 * @mdev: DRBD device.
1413 *
1414 * Called from process context only (admin command and after_state_ch).
1415 */
1416static int _drbd_pause_after(struct drbd_conf *mdev)
1417{
1418 struct drbd_conf *odev;
1419 int i, rv = 0;
1420
695d08fa 1421 rcu_read_lock();
81a5d60e 1422 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1423 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1424 continue;
1425 if (!_drbd_may_sync_now(odev))
1426 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1427 != SS_NOTHING_TO_DO);
1428 }
695d08fa 1429 rcu_read_unlock();
b411b363
PR
1430
1431 return rv;
1432}
1433
1434/**
1435 * _drbd_resume_next() - Resume resync on all devices that may resync now
1436 * @mdev: DRBD device.
1437 *
1438 * Called from process context only (admin command and worker).
1439 */
1440static int _drbd_resume_next(struct drbd_conf *mdev)
1441{
1442 struct drbd_conf *odev;
1443 int i, rv = 0;
1444
695d08fa 1445 rcu_read_lock();
81a5d60e 1446 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1447 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1448 continue;
1449 if (odev->state.aftr_isp) {
1450 if (_drbd_may_sync_now(odev))
1451 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1452 CS_HARD, NULL)
1453 != SS_NOTHING_TO_DO) ;
1454 }
1455 }
695d08fa 1456 rcu_read_unlock();
b411b363
PR
1457 return rv;
1458}
1459
1460void resume_next_sg(struct drbd_conf *mdev)
1461{
1462 write_lock_irq(&global_state_lock);
1463 _drbd_resume_next(mdev);
1464 write_unlock_irq(&global_state_lock);
1465}
1466
1467void suspend_other_sg(struct drbd_conf *mdev)
1468{
1469 write_lock_irq(&global_state_lock);
1470 _drbd_pause_after(mdev);
1471 write_unlock_irq(&global_state_lock);
1472}
1473
dc97b708 1474/* caller must hold global_state_lock */
95f8efd0 1475enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
b411b363
PR
1476{
1477 struct drbd_conf *odev;
95f8efd0 1478 int resync_after;
b411b363
PR
1479
1480 if (o_minor == -1)
1481 return NO_ERROR;
1482 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
95f8efd0 1483 return ERR_RESYNC_AFTER;
b411b363
PR
1484
1485 /* check for loops */
1486 odev = minor_to_mdev(o_minor);
1487 while (1) {
1488 if (odev == mdev)
95f8efd0 1489 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1490
daeda1cc 1491 rcu_read_lock();
95f8efd0 1492 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1493 rcu_read_unlock();
b411b363 1494 /* dependency chain ends here, no cycles. */
95f8efd0 1495 if (resync_after == -1)
b411b363
PR
1496 return NO_ERROR;
1497
1498 /* follow the dependency chain */
95f8efd0 1499 odev = minor_to_mdev(resync_after);
b411b363
PR
1500 }
1501}
1502
dc97b708 1503/* caller must hold global_state_lock */
95f8efd0 1504void drbd_resync_after_changed(struct drbd_conf *mdev)
b411b363
PR
1505{
1506 int changes;
b411b363 1507
dc97b708
PR
1508 do {
1509 changes = _drbd_pause_after(mdev);
1510 changes |= _drbd_resume_next(mdev);
1511 } while (changes);
b411b363
PR
1512}
1513
9bd28d3c
LE
1514void drbd_rs_controller_reset(struct drbd_conf *mdev)
1515{
813472ce
PR
1516 struct fifo_buffer *plan;
1517
9bd28d3c
LE
1518 atomic_set(&mdev->rs_sect_in, 0);
1519 atomic_set(&mdev->rs_sect_ev, 0);
1520 mdev->rs_in_flight = 0;
813472ce
PR
1521
1522 /* Updating the RCU protected object in place is necessary since
1523 this function gets called from atomic context.
1524 It is valid since all other updates also lead to an completely
1525 empty fifo */
1526 rcu_read_lock();
1527 plan = rcu_dereference(mdev->rs_plan_s);
1528 plan->total = 0;
1529 fifo_set(plan, 0);
1530 rcu_read_unlock();
9bd28d3c
LE
1531}
1532
1f04af33
PR
1533void start_resync_timer_fn(unsigned long data)
1534{
1535 struct drbd_conf *mdev = (struct drbd_conf *) data;
1536
d5b27b01 1537 drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work);
1f04af33
PR
1538}
1539
99920dc5 1540int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1541{
00d56944
PR
1542 struct drbd_conf *mdev = w->mdev;
1543
1f04af33
PR
1544 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1545 dev_warn(DEV, "w_start_resync later...\n");
1546 mdev->start_resync_timer.expires = jiffies + HZ/10;
1547 add_timer(&mdev->start_resync_timer);
99920dc5 1548 return 0;
1f04af33
PR
1549 }
1550
1551 drbd_start_resync(mdev, C_SYNC_SOURCE);
36baf611 1552 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
99920dc5 1553 return 0;
1f04af33
PR
1554}
1555
b411b363
PR
1556/**
1557 * drbd_start_resync() - Start the resync process
1558 * @mdev: DRBD device.
1559 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1560 *
1561 * This function might bring you directly into one of the
1562 * C_PAUSED_SYNC_* states.
1563 */
1564void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1565{
1566 union drbd_state ns;
1567 int r;
1568
c4752ef1 1569 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1570 dev_err(DEV, "Resync already running!\n");
1571 return;
1572 }
1573
e64a3294
PR
1574 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1575 if (side == C_SYNC_TARGET) {
1576 /* Since application IO was locked out during C_WF_BITMAP_T and
1577 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1578 we check that we might make the data inconsistent. */
1579 r = drbd_khelper(mdev, "before-resync-target");
1580 r = (r >> 8) & 0xff;
1581 if (r > 0) {
1582 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1583 "dropping connection.\n", r);
38fa9988 1584 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1585 return;
1586 }
e64a3294
PR
1587 } else /* C_SYNC_SOURCE */ {
1588 r = drbd_khelper(mdev, "before-resync-source");
1589 r = (r >> 8) & 0xff;
1590 if (r > 0) {
1591 if (r == 3) {
1592 dev_info(DEV, "before-resync-source handler returned %d, "
1593 "ignoring. Old userland tools?", r);
1594 } else {
1595 dev_info(DEV, "before-resync-source handler returned %d, "
1596 "dropping connection.\n", r);
38fa9988 1597 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1598 return;
1599 }
1600 }
09b9e797 1601 }
b411b363
PR
1602 }
1603
e64a3294 1604 if (current == mdev->tconn->worker.task) {
dad20554 1605 /* The worker should not sleep waiting for state_mutex,
e64a3294 1606 that can take long */
8410da8f 1607 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1608 set_bit(B_RS_H_DONE, &mdev->flags);
1609 mdev->start_resync_timer.expires = jiffies + HZ/5;
1610 add_timer(&mdev->start_resync_timer);
1611 return;
1612 }
1613 } else {
8410da8f 1614 mutex_lock(mdev->state_mutex);
e64a3294
PR
1615 }
1616 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363 1617
0cfac5dd 1618 write_lock_irq(&global_state_lock);
b411b363 1619 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
0cfac5dd 1620 write_unlock_irq(&global_state_lock);
8410da8f 1621 mutex_unlock(mdev->state_mutex);
b411b363
PR
1622 return;
1623 }
1624
78bae59b 1625 ns = drbd_read_state(mdev);
b411b363
PR
1626
1627 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1628
1629 ns.conn = side;
1630
1631 if (side == C_SYNC_TARGET)
1632 ns.disk = D_INCONSISTENT;
1633 else /* side == C_SYNC_SOURCE */
1634 ns.pdsk = D_INCONSISTENT;
1635
1636 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1637 ns = drbd_read_state(mdev);
b411b363
PR
1638
1639 if (ns.conn < C_CONNECTED)
1640 r = SS_UNKNOWN_ERROR;
1641
1642 if (r == SS_SUCCESS) {
1d7734a0
LE
1643 unsigned long tw = drbd_bm_total_weight(mdev);
1644 unsigned long now = jiffies;
1645 int i;
1646
b411b363
PR
1647 mdev->rs_failed = 0;
1648 mdev->rs_paused = 0;
b411b363 1649 mdev->rs_same_csum = 0;
0f0601f4
LE
1650 mdev->rs_last_events = 0;
1651 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1652 mdev->rs_total = tw;
1653 mdev->rs_start = now;
1654 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1655 mdev->rs_mark_left[i] = tw;
1656 mdev->rs_mark_time[i] = now;
1657 }
b411b363
PR
1658 _drbd_pause_after(mdev);
1659 }
1660 write_unlock_irq(&global_state_lock);
5a22db89 1661
b411b363
PR
1662 if (r == SS_SUCCESS) {
1663 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1664 drbd_conn_str(ns.conn),
1665 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1666 (unsigned long) mdev->rs_total);
6c922ed5
LE
1667 if (side == C_SYNC_TARGET)
1668 mdev->bm_resync_fo = 0;
1669
1670 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1671 * with w_send_oos, or the sync target will get confused as to
1672 * how much bits to resync. We cannot do that always, because for an
1673 * empty resync and protocol < 95, we need to do it here, as we call
1674 * drbd_resync_finished from here in that case.
1675 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1676 * and from after_state_ch otherwise. */
31890f4a 1677 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1678 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1679
31890f4a 1680 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1681 /* This still has a race (about when exactly the peers
1682 * detect connection loss) that can lead to a full sync
1683 * on next handshake. In 8.3.9 we fixed this with explicit
1684 * resync-finished notifications, but the fix
1685 * introduces a protocol change. Sleeping for some
1686 * time longer than the ping interval + timeout on the
1687 * SyncSource, to give the SyncTarget the chance to
1688 * detect connection loss, then waiting for a ping
1689 * response (implicit in drbd_resync_finished) reduces
1690 * the race considerably, but does not solve it. */
44ed167d
PR
1691 if (side == C_SYNC_SOURCE) {
1692 struct net_conf *nc;
1693 int timeo;
1694
1695 rcu_read_lock();
1696 nc = rcu_dereference(mdev->tconn->net_conf);
1697 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1698 rcu_read_unlock();
1699 schedule_timeout_interruptible(timeo);
1700 }
b411b363 1701 drbd_resync_finished(mdev);
b411b363
PR
1702 }
1703
9bd28d3c 1704 drbd_rs_controller_reset(mdev);
b411b363
PR
1705 /* ns.conn may already be != mdev->state.conn,
1706 * we may have been paused in between, or become paused until
1707 * the timer triggers.
1708 * No matter, that is handled in resync_timer_fn() */
1709 if (ns.conn == C_SYNC_TARGET)
1710 mod_timer(&mdev->resync_timer, jiffies);
1711
1712 drbd_md_sync(mdev);
1713 }
5a22db89 1714 put_ldev(mdev);
8410da8f 1715 mutex_unlock(mdev->state_mutex);
b411b363
PR
1716}
1717
b6dd1a89
LE
1718/* If the resource already closed the current epoch, but we did not
1719 * (because we have not yet seen new requests), we should send the
1720 * corresponding barrier now. Must be checked within the same spinlock
1721 * that is used to check for new requests. */
1722bool need_to_send_barrier(struct drbd_tconn *connection)
1723{
1724 if (!connection->send.seen_any_write_yet)
1725 return false;
1726
1727 /* Skip barriers that do not contain any writes.
1728 * This may happen during AHEAD mode. */
1729 if (!connection->send.current_epoch_writes)
1730 return false;
1731
1732 /* ->req_lock is held when requests are queued on
1733 * connection->sender_work, and put into ->transfer_log.
1734 * It is also held when ->current_tle_nr is increased.
1735 * So either there are already new requests queued,
1736 * and corresponding barriers will be send there.
1737 * Or nothing new is queued yet, so the difference will be 1.
1738 */
1739 if (atomic_read(&connection->current_tle_nr) !=
1740 connection->send.current_epoch_nr + 1)
1741 return false;
1742
1743 return true;
1744}
1745
8c0785a5
LE
1746bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
1747{
1748 spin_lock_irq(&queue->q_lock);
1749 list_splice_init(&queue->q, work_list);
1750 spin_unlock_irq(&queue->q_lock);
1751 return !list_empty(work_list);
1752}
1753
1754bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
1755{
1756 spin_lock_irq(&queue->q_lock);
1757 if (!list_empty(&queue->q))
1758 list_move(queue->q.next, work_list);
1759 spin_unlock_irq(&queue->q_lock);
1760 return !list_empty(work_list);
1761}
1762
b6dd1a89
LE
1763void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list)
1764{
1765 DEFINE_WAIT(wait);
1766 struct net_conf *nc;
1767 int uncork, cork;
1768
1769 dequeue_work_item(&connection->sender_work, work_list);
1770 if (!list_empty(work_list))
1771 return;
1772
1773 /* Still nothing to do?
1774 * Maybe we still need to close the current epoch,
1775 * even if no new requests are queued yet.
1776 *
1777 * Also, poke TCP, just in case.
1778 * Then wait for new work (or signal). */
1779 rcu_read_lock();
1780 nc = rcu_dereference(connection->net_conf);
1781 uncork = nc ? nc->tcp_cork : 0;
1782 rcu_read_unlock();
1783 if (uncork) {
1784 mutex_lock(&connection->data.mutex);
1785 if (connection->data.socket)
1786 drbd_tcp_uncork(connection->data.socket);
1787 mutex_unlock(&connection->data.mutex);
1788 }
1789
1790 for (;;) {
1791 int send_barrier;
1792 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
1793 spin_lock_irq(&connection->req_lock);
1794 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1795 list_splice_init(&connection->sender_work.q, work_list);
1796 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1797 if (!list_empty(work_list) || signal_pending(current)) {
1798 spin_unlock_irq(&connection->req_lock);
1799 break;
1800 }
1801 send_barrier = need_to_send_barrier(connection);
1802 spin_unlock_irq(&connection->req_lock);
1803 if (send_barrier) {
1804 drbd_send_barrier(connection);
1805 connection->send.current_epoch_nr++;
1806 }
1807 schedule();
1808 /* may be woken up for other things but new work, too,
1809 * e.g. if the current epoch got closed.
1810 * In which case we send the barrier above. */
1811 }
1812 finish_wait(&connection->sender_work.q_wait, &wait);
1813
1814 /* someone may have changed the config while we have been waiting above. */
1815 rcu_read_lock();
1816 nc = rcu_dereference(connection->net_conf);
1817 cork = nc ? nc->tcp_cork : 0;
1818 rcu_read_unlock();
1819 mutex_lock(&connection->data.mutex);
1820 if (connection->data.socket) {
1821 if (cork)
1822 drbd_tcp_cork(connection->data.socket);
1823 else if (!uncork)
1824 drbd_tcp_uncork(connection->data.socket);
1825 }
1826 mutex_unlock(&connection->data.mutex);
1827}
1828
b411b363
PR
1829int drbd_worker(struct drbd_thread *thi)
1830{
392c8801 1831 struct drbd_tconn *tconn = thi->tconn;
b411b363 1832 struct drbd_work *w = NULL;
0e29d163 1833 struct drbd_conf *mdev;
b411b363 1834 LIST_HEAD(work_list);
8c0785a5 1835 int vnr;
b411b363 1836
e77a0a5c 1837 while (get_t_state(thi) == RUNNING) {
80822284 1838 drbd_thread_current_set_cpu(thi);
b411b363 1839
8c0785a5
LE
1840 /* as long as we use drbd_queue_work_front(),
1841 * we may only dequeue single work items here, not batches. */
1842 if (list_empty(&work_list))
b6dd1a89 1843 wait_for_work(tconn, &work_list);
b411b363 1844
8c0785a5 1845 if (signal_pending(current)) {
b411b363 1846 flush_signals(current);
19393e10
PR
1847 if (get_t_state(thi) == RUNNING) {
1848 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1849 continue;
19393e10 1850 }
b411b363
PR
1851 break;
1852 }
1853
e77a0a5c 1854 if (get_t_state(thi) != RUNNING)
b411b363 1855 break;
b411b363 1856
8c0785a5
LE
1857 while (!list_empty(&work_list)) {
1858 w = list_first_entry(&work_list, struct drbd_work, list);
1859 list_del_init(&w->list);
1860 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
1861 continue;
bbeb641c
PR
1862 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1863 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1864 }
1865 }
b411b363 1866
8c0785a5 1867 do {
b411b363 1868 while (!list_empty(&work_list)) {
8c0785a5 1869 w = list_first_entry(&work_list, struct drbd_work, list);
b411b363 1870 list_del_init(&w->list);
00d56944 1871 w->cb(w, 1);
b411b363 1872 }
d5b27b01 1873 dequeue_work_batch(&tconn->sender_work, &work_list);
8c0785a5 1874 } while (!list_empty(&work_list));
b411b363 1875
c141ebda 1876 rcu_read_lock();
f399002e 1877 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1878 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
c141ebda
PR
1879 kref_get(&mdev->kref);
1880 rcu_read_unlock();
0e29d163 1881 drbd_mdev_cleanup(mdev);
c141ebda
PR
1882 kref_put(&mdev->kref, &drbd_minor_destroy);
1883 rcu_read_lock();
0e29d163 1884 }
c141ebda 1885 rcu_read_unlock();
b411b363
PR
1886
1887 return 0;
1888}
This page took 0.216387 seconds and 5 git commands to generate.