drbd: move the drbd_work_queue from drbd_socket to drbd_connection
[deliverable/linux.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
b411b363 40
00d56944 41static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
42
43
c5a91619
AG
44/* endio handlers:
45 * drbd_md_io_complete (defined here)
fcefa62e
AG
46 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
c5a91619
AG
48 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
b411b363
PR
50 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
95f8efd0 60 to evaluate the resync after dependencies, we grab a write lock, because
b411b363
PR
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
cdfda633 70 struct drbd_conf *mdev;
b411b363
PR
71
72 md_io = (struct drbd_md_io *)bio->bi_private;
cdfda633
PR
73 mdev = container_of(md_io, struct drbd_conf, md_io);
74
b411b363
PR
75 md_io->error = error;
76
0cfac5dd
PR
77 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
78 * to timeout on the lower level device, and eventually detach from it.
79 * If this io completion runs after that timeout expired, this
80 * drbd_md_put_buffer() may allow us to finally try and re-attach.
81 * During normal operation, this only puts that extra reference
82 * down to 1 again.
83 * Make sure we first drop the reference, and only then signal
84 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
85 * next drbd_md_sync_page_io(), that we trigger the
86 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
87 */
88 drbd_md_put_buffer(mdev);
cdfda633
PR
89 md_io->done = 1;
90 wake_up(&mdev->misc_wait);
91 bio_put(bio);
cdfda633 92 put_ldev(mdev);
b411b363
PR
93}
94
95/* reads on behalf of the partner,
96 * "submitted" by the receiver
97 */
db830c46 98void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
99{
100 unsigned long flags = 0;
a21e9298 101 struct drbd_conf *mdev = peer_req->w.mdev;
b411b363 102
87eeee41 103 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
104 mdev->read_cnt += peer_req->i.size >> 9;
105 list_del(&peer_req->w.list);
b411b363
PR
106 if (list_empty(&mdev->read_ee))
107 wake_up(&mdev->ee_wait);
db830c46 108 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 109 __drbd_chk_io_error(mdev, false);
87eeee41 110 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 111
d5b27b01 112 drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
b411b363 113 put_ldev(mdev);
b411b363
PR
114}
115
116/* writes on behalf of the partner, or resync writes,
45bb912b 117 * "submitted" by the receiver, final stage. */
db830c46 118static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
119{
120 unsigned long flags = 0;
a21e9298 121 struct drbd_conf *mdev = peer_req->w.mdev;
181286ad 122 struct drbd_interval i;
b411b363 123 int do_wake;
579b57ed 124 u64 block_id;
b411b363 125 int do_al_complete_io;
b411b363 126
db830c46 127 /* after we moved peer_req to done_ee,
b411b363
PR
128 * we may no longer access it,
129 * it may be freed/reused already!
130 * (as soon as we release the req_lock) */
181286ad 131 i = peer_req->i;
db830c46
AG
132 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
133 block_id = peer_req->block_id;
b411b363 134
87eeee41 135 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
136 mdev->writ_cnt += peer_req->i.size >> 9;
137 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
138 list_add_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 139
bb3bfe96 140 /*
5e472264 141 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
142 * Ack yet and did not wake possibly waiting conflicting requests.
143 * Removed from the tree from "drbd_process_done_ee" within the
144 * appropriate w.cb (e_end_block/e_end_resync_block) or from
145 * _drbd_clear_done_ee.
146 */
b411b363 147
579b57ed 148 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 149
db830c46 150 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 151 __drbd_chk_io_error(mdev, false);
87eeee41 152 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 153
579b57ed 154 if (block_id == ID_SYNCER)
181286ad 155 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
156
157 if (do_wake)
158 wake_up(&mdev->ee_wait);
159
160 if (do_al_complete_io)
181286ad 161 drbd_al_complete_io(mdev, &i);
b411b363 162
0625ac19 163 wake_asender(mdev->tconn);
b411b363 164 put_ldev(mdev);
45bb912b 165}
b411b363 166
45bb912b
LE
167/* writes on behalf of the partner, or resync writes,
168 * "submitted" by the receiver.
169 */
fcefa62e 170void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 171{
db830c46 172 struct drbd_peer_request *peer_req = bio->bi_private;
a21e9298 173 struct drbd_conf *mdev = peer_req->w.mdev;
45bb912b
LE
174 int uptodate = bio_flagged(bio, BIO_UPTODATE);
175 int is_write = bio_data_dir(bio) == WRITE;
176
07194272 177 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
178 dev_warn(DEV, "%s: error=%d s=%llus\n",
179 is_write ? "write" : "read", error,
db830c46 180 (unsigned long long)peer_req->i.sector);
45bb912b 181 if (!error && !uptodate) {
07194272
LE
182 if (__ratelimit(&drbd_ratelimit_state))
183 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
184 is_write ? "write" : "read",
db830c46 185 (unsigned long long)peer_req->i.sector);
45bb912b
LE
186 /* strange behavior of some lower level drivers...
187 * fail the request by clearing the uptodate flag,
188 * but do not return any error?! */
189 error = -EIO;
190 }
191
192 if (error)
db830c46 193 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
194
195 bio_put(bio); /* no need for the bio anymore */
db830c46 196 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 197 if (is_write)
db830c46 198 drbd_endio_write_sec_final(peer_req);
45bb912b 199 else
db830c46 200 drbd_endio_read_sec_final(peer_req);
45bb912b 201 }
b411b363
PR
202}
203
204/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
205 */
fcefa62e 206void drbd_request_endio(struct bio *bio, int error)
b411b363 207{
a115413d 208 unsigned long flags;
b411b363 209 struct drbd_request *req = bio->bi_private;
a21e9298 210 struct drbd_conf *mdev = req->w.mdev;
a115413d 211 struct bio_and_error m;
b411b363
PR
212 enum drbd_req_event what;
213 int uptodate = bio_flagged(bio, BIO_UPTODATE);
214
b411b363
PR
215 if (!error && !uptodate) {
216 dev_warn(DEV, "p %s: setting error to -EIO\n",
217 bio_data_dir(bio) == WRITE ? "write" : "read");
218 /* strange behavior of some lower level drivers...
219 * fail the request by clearing the uptodate flag,
220 * but do not return any error?! */
221 error = -EIO;
222 }
223
b411b363
PR
224 /* to avoid recursion in __req_mod */
225 if (unlikely(error)) {
226 what = (bio_data_dir(bio) == WRITE)
8554df1c 227 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 228 : (bio_rw(bio) == READ)
8554df1c
AG
229 ? READ_COMPLETED_WITH_ERROR
230 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 231 } else
8554df1c 232 what = COMPLETED_OK;
b411b363
PR
233
234 bio_put(req->private_bio);
235 req->private_bio = ERR_PTR(error);
236
a115413d 237 /* not req_mod(), we need irqsave here! */
87eeee41 238 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 239 __req_mod(req, what, &m);
87eeee41 240 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
2415308e 241 put_ldev(mdev);
a115413d
LE
242
243 if (m.bio)
244 complete_master_bio(mdev, &m);
b411b363
PR
245}
246
f6ffca9f 247void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
db830c46 248 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
249{
250 struct hash_desc desc;
251 struct scatterlist sg;
db830c46 252 struct page *page = peer_req->pages;
45bb912b
LE
253 struct page *tmp;
254 unsigned len;
255
256 desc.tfm = tfm;
257 desc.flags = 0;
258
259 sg_init_table(&sg, 1);
260 crypto_hash_init(&desc);
261
262 while ((tmp = page_chain_next(page))) {
263 /* all but the last page will be fully used */
264 sg_set_page(&sg, page, PAGE_SIZE, 0);
265 crypto_hash_update(&desc, &sg, sg.length);
266 page = tmp;
267 }
268 /* and now the last, possibly only partially used page */
db830c46 269 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
270 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
271 crypto_hash_update(&desc, &sg, sg.length);
272 crypto_hash_final(&desc, digest);
273}
274
275void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
276{
277 struct hash_desc desc;
278 struct scatterlist sg;
279 struct bio_vec *bvec;
280 int i;
281
282 desc.tfm = tfm;
283 desc.flags = 0;
284
285 sg_init_table(&sg, 1);
286 crypto_hash_init(&desc);
287
4b8514ee 288 bio_for_each_segment(bvec, bio, i) {
b411b363
PR
289 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
290 crypto_hash_update(&desc, &sg, sg.length);
291 }
292 crypto_hash_final(&desc, digest);
293}
294
9676c760 295/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 296static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 297{
00d56944
PR
298 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
299 struct drbd_conf *mdev = w->mdev;
b411b363
PR
300 int digest_size;
301 void *digest;
99920dc5 302 int err = 0;
b411b363 303
53ea4331
LE
304 if (unlikely(cancel))
305 goto out;
b411b363 306
9676c760 307 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 308 goto out;
b411b363 309
f399002e 310 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
311 digest = kmalloc(digest_size, GFP_NOIO);
312 if (digest) {
db830c46
AG
313 sector_t sector = peer_req->i.sector;
314 unsigned int size = peer_req->i.size;
f399002e 315 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 316 /* Free peer_req and pages before send.
53ea4331
LE
317 * In case we block on congestion, we could otherwise run into
318 * some distributed deadlock, if the other side blocks on
319 * congestion as well, because our receiver blocks in
c37c8ecf 320 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 321 drbd_free_peer_req(mdev, peer_req);
db830c46 322 peer_req = NULL;
53ea4331 323 inc_rs_pending(mdev);
99920dc5 324 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
325 digest, digest_size,
326 P_CSUM_RS_REQUEST);
53ea4331
LE
327 kfree(digest);
328 } else {
329 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 330 err = -ENOMEM;
53ea4331 331 }
b411b363 332
53ea4331 333out:
db830c46 334 if (peer_req)
3967deb1 335 drbd_free_peer_req(mdev, peer_req);
b411b363 336
99920dc5 337 if (unlikely(err))
b411b363 338 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 339 return err;
b411b363
PR
340}
341
342#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
343
344static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
345{
db830c46 346 struct drbd_peer_request *peer_req;
b411b363
PR
347
348 if (!get_ldev(mdev))
80a40e43 349 return -EIO;
b411b363 350
e3555d85 351 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
352 goto defer;
353
b411b363
PR
354 /* GFP_TRY, because if there is no memory available right now, this may
355 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
356 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
357 size, GFP_TRY);
db830c46 358 if (!peer_req)
80a40e43 359 goto defer;
b411b363 360
db830c46 361 peer_req->w.cb = w_e_send_csum;
87eeee41 362 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 363 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 364 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 365
0f0601f4 366 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 367 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 368 return 0;
b411b363 369
10f6d992
LE
370 /* If it failed because of ENOMEM, retry should help. If it failed
371 * because bio_add_page failed (probably broken lower level driver),
372 * retry may or may not help.
373 * If it does not, you may need to force disconnect. */
87eeee41 374 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 375 list_del(&peer_req->w.list);
87eeee41 376 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 377
3967deb1 378 drbd_free_peer_req(mdev, peer_req);
80a40e43 379defer:
45bb912b 380 put_ldev(mdev);
80a40e43 381 return -EAGAIN;
b411b363
PR
382}
383
99920dc5 384int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 385{
00d56944 386 struct drbd_conf *mdev = w->mdev;
63106d3c
PR
387 switch (mdev->state.conn) {
388 case C_VERIFY_S:
00d56944 389 w_make_ov_request(w, cancel);
63106d3c
PR
390 break;
391 case C_SYNC_TARGET:
00d56944 392 w_make_resync_request(w, cancel);
63106d3c 393 break;
b411b363
PR
394 }
395
99920dc5 396 return 0;
794abb75
PR
397}
398
399void resync_timer_fn(unsigned long data)
400{
401 struct drbd_conf *mdev = (struct drbd_conf *) data;
402
403 if (list_empty(&mdev->resync_work.list))
d5b27b01 404 drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work);
b411b363
PR
405}
406
778f271d
PR
407static void fifo_set(struct fifo_buffer *fb, int value)
408{
409 int i;
410
411 for (i = 0; i < fb->size; i++)
f10f2623 412 fb->values[i] = value;
778f271d
PR
413}
414
415static int fifo_push(struct fifo_buffer *fb, int value)
416{
417 int ov;
418
419 ov = fb->values[fb->head_index];
420 fb->values[fb->head_index++] = value;
421
422 if (fb->head_index >= fb->size)
423 fb->head_index = 0;
424
425 return ov;
426}
427
428static void fifo_add_val(struct fifo_buffer *fb, int value)
429{
430 int i;
431
432 for (i = 0; i < fb->size; i++)
433 fb->values[i] += value;
434}
435
9958c857
PR
436struct fifo_buffer *fifo_alloc(int fifo_size)
437{
438 struct fifo_buffer *fb;
439
440 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
441 if (!fb)
442 return NULL;
443
444 fb->head_index = 0;
445 fb->size = fifo_size;
446 fb->total = 0;
447
448 return fb;
449}
450
9d77a5fe 451static int drbd_rs_controller(struct drbd_conf *mdev)
778f271d 452{
daeda1cc 453 struct disk_conf *dc;
778f271d
PR
454 unsigned int sect_in; /* Number of sectors that came in since the last turn */
455 unsigned int want; /* The number of sectors we want in the proxy */
456 int req_sect; /* Number of sectors to request in this turn */
457 int correction; /* Number of sectors more we need in the proxy*/
458 int cps; /* correction per invocation of drbd_rs_controller() */
459 int steps; /* Number of time steps to plan ahead */
460 int curr_corr;
461 int max_sect;
813472ce 462 struct fifo_buffer *plan;
778f271d
PR
463
464 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
465 mdev->rs_in_flight -= sect_in;
466
daeda1cc 467 dc = rcu_dereference(mdev->ldev->disk_conf);
813472ce 468 plan = rcu_dereference(mdev->rs_plan_s);
778f271d 469
813472ce 470 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
471
472 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 473 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 474 } else { /* normal path */
daeda1cc
PR
475 want = dc->c_fill_target ? dc->c_fill_target :
476 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
477 }
478
813472ce 479 correction = want - mdev->rs_in_flight - plan->total;
778f271d
PR
480
481 /* Plan ahead */
482 cps = correction / steps;
813472ce
PR
483 fifo_add_val(plan, cps);
484 plan->total += cps * steps;
778f271d
PR
485
486 /* What we do in this step */
813472ce
PR
487 curr_corr = fifo_push(plan, 0);
488 plan->total -= curr_corr;
778f271d
PR
489
490 req_sect = sect_in + curr_corr;
491 if (req_sect < 0)
492 req_sect = 0;
493
daeda1cc 494 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
495 if (req_sect > max_sect)
496 req_sect = max_sect;
497
498 /*
499 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
500 sect_in, mdev->rs_in_flight, want, correction,
501 steps, cps, mdev->rs_planed, curr_corr, req_sect);
502 */
503
504 return req_sect;
505}
506
9d77a5fe 507static int drbd_rs_number_requests(struct drbd_conf *mdev)
e65f440d
LE
508{
509 int number;
813472ce
PR
510
511 rcu_read_lock();
512 if (rcu_dereference(mdev->rs_plan_s)->size) {
e65f440d
LE
513 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
514 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
515 } else {
daeda1cc 516 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
e65f440d
LE
517 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
518 }
813472ce 519 rcu_read_unlock();
e65f440d 520
e65f440d
LE
521 /* ignore the amount of pending requests, the resync controller should
522 * throttle down to incoming reply rate soon enough anyways. */
523 return number;
524}
525
99920dc5 526int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 527{
00d56944 528 struct drbd_conf *mdev = w->mdev;
b411b363
PR
529 unsigned long bit;
530 sector_t sector;
531 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 532 int max_bio_size;
e65f440d 533 int number, rollback_i, size;
b411b363 534 int align, queued, sndbuf;
0f0601f4 535 int i = 0;
b411b363
PR
536
537 if (unlikely(cancel))
99920dc5 538 return 0;
b411b363 539
af85e8e8
LE
540 if (mdev->rs_total == 0) {
541 /* empty resync? */
542 drbd_resync_finished(mdev);
99920dc5 543 return 0;
af85e8e8
LE
544 }
545
b411b363
PR
546 if (!get_ldev(mdev)) {
547 /* Since we only need to access mdev->rsync a
548 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
549 to continue resync with a broken disk makes no sense at
550 all */
551 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 552 return 0;
b411b363
PR
553 }
554
0cfdd247 555 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
556 number = drbd_rs_number_requests(mdev);
557 if (number == 0)
0f0601f4 558 goto requeue;
b411b363 559
b411b363
PR
560 for (i = 0; i < number; i++) {
561 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
562 mutex_lock(&mdev->tconn->data.mutex);
563 if (mdev->tconn->data.socket) {
564 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
565 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
566 } else {
567 queued = 1;
568 sndbuf = 0;
569 }
e42325a5 570 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
571 if (queued > sndbuf / 2)
572 goto requeue;
573
574next_sector:
575 size = BM_BLOCK_SIZE;
576 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
577
4b0715f0 578 if (bit == DRBD_END_OF_BITMAP) {
b411b363 579 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 580 put_ldev(mdev);
99920dc5 581 return 0;
b411b363
PR
582 }
583
584 sector = BM_BIT_TO_SECT(bit);
585
e3555d85
PR
586 if (drbd_rs_should_slow_down(mdev, sector) ||
587 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
588 mdev->bm_resync_fo = bit;
589 goto requeue;
590 }
591 mdev->bm_resync_fo = bit + 1;
592
593 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
594 drbd_rs_complete_io(mdev, sector);
595 goto next_sector;
596 }
597
1816a2b4 598#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
599 /* try to find some adjacent bits.
600 * we stop if we have already the maximum req size.
601 *
602 * Additionally always align bigger requests, in order to
603 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
604 */
605 align = 1;
d207450c 606 rollback_i = i;
b411b363 607 for (;;) {
1816a2b4 608 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
609 break;
610
611 /* Be always aligned */
612 if (sector & ((1<<(align+3))-1))
613 break;
614
615 /* do not cross extent boundaries */
616 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
617 break;
618 /* now, is it actually dirty, after all?
619 * caution, drbd_bm_test_bit is tri-state for some
620 * obscure reason; ( b == 0 ) would get the out-of-band
621 * only accidentally right because of the "oddly sized"
622 * adjustment below */
623 if (drbd_bm_test_bit(mdev, bit+1) != 1)
624 break;
625 bit++;
626 size += BM_BLOCK_SIZE;
627 if ((BM_BLOCK_SIZE << align) <= size)
628 align++;
629 i++;
630 }
631 /* if we merged some,
632 * reset the offset to start the next drbd_bm_find_next from */
633 if (size > BM_BLOCK_SIZE)
634 mdev->bm_resync_fo = bit + 1;
635#endif
636
637 /* adjust very last sectors, in case we are oddly sized */
638 if (sector + (size>>9) > capacity)
639 size = (capacity-sector)<<9;
f399002e 640 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 641 switch (read_for_csum(mdev, sector, size)) {
80a40e43 642 case -EIO: /* Disk failure */
b411b363 643 put_ldev(mdev);
99920dc5 644 return -EIO;
80a40e43 645 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
646 drbd_rs_complete_io(mdev, sector);
647 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 648 i = rollback_i;
b411b363 649 goto requeue;
80a40e43
LE
650 case 0:
651 /* everything ok */
652 break;
653 default:
654 BUG();
b411b363
PR
655 }
656 } else {
99920dc5
AG
657 int err;
658
b411b363 659 inc_rs_pending(mdev);
99920dc5
AG
660 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
661 sector, size, ID_SYNCER);
662 if (err) {
b411b363
PR
663 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
664 dec_rs_pending(mdev);
665 put_ldev(mdev);
99920dc5 666 return err;
b411b363
PR
667 }
668 }
669 }
670
671 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
672 /* last syncer _request_ was sent,
673 * but the P_RS_DATA_REPLY not yet received. sync will end (and
674 * next sync group will resume), as soon as we receive the last
675 * resync data block, and the last bit is cleared.
676 * until then resync "work" is "inactive" ...
677 */
b411b363 678 put_ldev(mdev);
99920dc5 679 return 0;
b411b363
PR
680 }
681
682 requeue:
778f271d 683 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
684 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
685 put_ldev(mdev);
99920dc5 686 return 0;
b411b363
PR
687}
688
00d56944 689static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 690{
00d56944 691 struct drbd_conf *mdev = w->mdev;
b411b363
PR
692 int number, i, size;
693 sector_t sector;
694 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
695
696 if (unlikely(cancel))
697 return 1;
698
2649f080 699 number = drbd_rs_number_requests(mdev);
b411b363
PR
700
701 sector = mdev->ov_position;
702 for (i = 0; i < number; i++) {
703 if (sector >= capacity) {
b411b363
PR
704 return 1;
705 }
706
707 size = BM_BLOCK_SIZE;
708
e3555d85
PR
709 if (drbd_rs_should_slow_down(mdev, sector) ||
710 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
711 mdev->ov_position = sector;
712 goto requeue;
713 }
714
715 if (sector + (size>>9) > capacity)
716 size = (capacity-sector)<<9;
717
718 inc_rs_pending(mdev);
5b9f499c 719 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
720 dec_rs_pending(mdev);
721 return 0;
722 }
723 sector += BM_SECT_PER_BIT;
724 }
725 mdev->ov_position = sector;
726
727 requeue:
2649f080 728 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
729 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
730 return 1;
731}
732
99920dc5 733int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 734{
00d56944 735 struct drbd_conf *mdev = w->mdev;
b411b363 736 kfree(w);
8f7bed77 737 ov_out_of_sync_print(mdev);
b411b363
PR
738 drbd_resync_finished(mdev);
739
99920dc5 740 return 0;
b411b363
PR
741}
742
99920dc5 743static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 744{
00d56944 745 struct drbd_conf *mdev = w->mdev;
b411b363
PR
746 kfree(w);
747
748 drbd_resync_finished(mdev);
749
99920dc5 750 return 0;
b411b363
PR
751}
752
af85e8e8
LE
753static void ping_peer(struct drbd_conf *mdev)
754{
2a67d8b9
PR
755 struct drbd_tconn *tconn = mdev->tconn;
756
757 clear_bit(GOT_PING_ACK, &tconn->flags);
758 request_ping(tconn);
759 wait_event(tconn->ping_wait,
760 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
761}
762
b411b363
PR
763int drbd_resync_finished(struct drbd_conf *mdev)
764{
765 unsigned long db, dt, dbdt;
766 unsigned long n_oos;
767 union drbd_state os, ns;
768 struct drbd_work *w;
769 char *khelper_cmd = NULL;
26525618 770 int verify_done = 0;
b411b363
PR
771
772 /* Remove all elements from the resync LRU. Since future actions
773 * might set bits in the (main) bitmap, then the entries in the
774 * resync LRU would be wrong. */
775 if (drbd_rs_del_all(mdev)) {
776 /* In case this is not possible now, most probably because
777 * there are P_RS_DATA_REPLY Packets lingering on the worker's
778 * queue (or even the read operations for those packets
779 * is not finished by now). Retry in 100ms. */
780
20ee6390 781 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
782 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
783 if (w) {
784 w->cb = w_resync_finished;
9b743da9 785 w->mdev = mdev;
d5b27b01 786 drbd_queue_work(&mdev->tconn->sender_work, w);
b411b363
PR
787 return 1;
788 }
789 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
790 }
791
792 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
793 if (dt <= 0)
794 dt = 1;
795 db = mdev->rs_total;
796 dbdt = Bit2KB(db/dt);
797 mdev->rs_paused /= HZ;
798
799 if (!get_ldev(mdev))
800 goto out;
801
af85e8e8
LE
802 ping_peer(mdev);
803
87eeee41 804 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 805 os = drbd_read_state(mdev);
b411b363 806
26525618
LE
807 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
808
b411b363
PR
809 /* This protects us against multiple calls (that can happen in the presence
810 of application IO), and against connectivity loss just before we arrive here. */
811 if (os.conn <= C_CONNECTED)
812 goto out_unlock;
813
814 ns = os;
815 ns.conn = C_CONNECTED;
816
817 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
26525618 818 verify_done ? "Online verify " : "Resync",
b411b363
PR
819 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
820
821 n_oos = drbd_bm_total_weight(mdev);
822
823 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
824 if (n_oos) {
825 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
826 n_oos, Bit2KB(1));
827 khelper_cmd = "out-of-sync";
828 }
829 } else {
830 D_ASSERT((n_oos - mdev->rs_failed) == 0);
831
832 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
833 khelper_cmd = "after-resync-target";
834
f399002e 835 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
836 const unsigned long s = mdev->rs_same_csum;
837 const unsigned long t = mdev->rs_total;
838 const int ratio =
839 (t == 0) ? 0 :
840 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 841 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
842 "transferred %luK total %luK\n",
843 ratio,
844 Bit2KB(mdev->rs_same_csum),
845 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
846 Bit2KB(mdev->rs_total));
847 }
848 }
849
850 if (mdev->rs_failed) {
851 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
852
853 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
854 ns.disk = D_INCONSISTENT;
855 ns.pdsk = D_UP_TO_DATE;
856 } else {
857 ns.disk = D_UP_TO_DATE;
858 ns.pdsk = D_INCONSISTENT;
859 }
860 } else {
861 ns.disk = D_UP_TO_DATE;
862 ns.pdsk = D_UP_TO_DATE;
863
864 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
865 if (mdev->p_uuid) {
866 int i;
867 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
868 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
869 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
870 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
871 } else {
872 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
873 }
874 }
875
62b0da3a
LE
876 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
877 /* for verify runs, we don't update uuids here,
878 * so there would be nothing to report. */
879 drbd_uuid_set_bm(mdev, 0UL);
880 drbd_print_uuids(mdev, "updated UUIDs");
881 if (mdev->p_uuid) {
882 /* Now the two UUID sets are equal, update what we
883 * know of the peer. */
884 int i;
885 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
886 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
887 }
b411b363
PR
888 }
889 }
890
891 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
892out_unlock:
87eeee41 893 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
894 put_ldev(mdev);
895out:
896 mdev->rs_total = 0;
897 mdev->rs_failed = 0;
898 mdev->rs_paused = 0;
26525618
LE
899 if (verify_done)
900 mdev->ov_start_sector = 0;
b411b363 901
13d42685
LE
902 drbd_md_sync(mdev);
903
b411b363
PR
904 if (khelper_cmd)
905 drbd_khelper(mdev, khelper_cmd);
906
907 return 1;
908}
909
910/* helper */
db830c46 911static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
b411b363 912{
045417f7 913 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 914 /* This might happen if sendpage() has not finished */
db830c46 915 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
916 atomic_add(i, &mdev->pp_in_use_by_net);
917 atomic_sub(i, &mdev->pp_in_use);
87eeee41 918 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 919 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 920 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 921 wake_up(&drbd_pp_wait);
b411b363 922 } else
3967deb1 923 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
924}
925
926/**
927 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
928 * @mdev: DRBD device.
929 * @w: work object.
930 * @cancel: The connection will be closed anyways
931 */
99920dc5 932int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 933{
db830c46 934 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 935 struct drbd_conf *mdev = w->mdev;
99920dc5 936 int err;
b411b363
PR
937
938 if (unlikely(cancel)) {
3967deb1 939 drbd_free_peer_req(mdev, peer_req);
b411b363 940 dec_unacked(mdev);
99920dc5 941 return 0;
b411b363
PR
942 }
943
db830c46 944 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 945 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
946 } else {
947 if (__ratelimit(&drbd_ratelimit_state))
948 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 949 (unsigned long long)peer_req->i.sector);
b411b363 950
99920dc5 951 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
952 }
953
954 dec_unacked(mdev);
955
db830c46 956 move_to_net_ee_or_free(mdev, peer_req);
b411b363 957
99920dc5 958 if (unlikely(err))
b411b363 959 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 960 return err;
b411b363
PR
961}
962
963/**
a209b4ae 964 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
965 * @mdev: DRBD device.
966 * @w: work object.
967 * @cancel: The connection will be closed anyways
968 */
99920dc5 969int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 970{
db830c46 971 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 972 struct drbd_conf *mdev = w->mdev;
99920dc5 973 int err;
b411b363
PR
974
975 if (unlikely(cancel)) {
3967deb1 976 drbd_free_peer_req(mdev, peer_req);
b411b363 977 dec_unacked(mdev);
99920dc5 978 return 0;
b411b363
PR
979 }
980
981 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 982 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
983 put_ldev(mdev);
984 }
985
d612d309 986 if (mdev->state.conn == C_AHEAD) {
99920dc5 987 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 988 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
989 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
990 inc_rs_pending(mdev);
99920dc5 991 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
992 } else {
993 if (__ratelimit(&drbd_ratelimit_state))
994 dev_err(DEV, "Not sending RSDataReply, "
995 "partner DISKLESS!\n");
99920dc5 996 err = 0;
b411b363
PR
997 }
998 } else {
999 if (__ratelimit(&drbd_ratelimit_state))
1000 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1001 (unsigned long long)peer_req->i.sector);
b411b363 1002
99920dc5 1003 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1004
1005 /* update resync data with failure */
db830c46 1006 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1007 }
1008
1009 dec_unacked(mdev);
1010
db830c46 1011 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1012
99920dc5 1013 if (unlikely(err))
b411b363 1014 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1015 return err;
b411b363
PR
1016}
1017
99920dc5 1018int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1019{
db830c46 1020 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1021 struct drbd_conf *mdev = w->mdev;
b411b363
PR
1022 struct digest_info *di;
1023 int digest_size;
1024 void *digest = NULL;
99920dc5 1025 int err, eq = 0;
b411b363
PR
1026
1027 if (unlikely(cancel)) {
3967deb1 1028 drbd_free_peer_req(mdev, peer_req);
b411b363 1029 dec_unacked(mdev);
99920dc5 1030 return 0;
b411b363
PR
1031 }
1032
1d53f09e 1033 if (get_ldev(mdev)) {
db830c46 1034 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1035 put_ldev(mdev);
1036 }
b411b363 1037
db830c46 1038 di = peer_req->digest;
b411b363 1039
db830c46 1040 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1041 /* quick hack to try to avoid a race against reconfiguration.
1042 * a real fix would be much more involved,
1043 * introducing more locking mechanisms */
f399002e
LE
1044 if (mdev->tconn->csums_tfm) {
1045 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1046 D_ASSERT(digest_size == di->digest_size);
1047 digest = kmalloc(digest_size, GFP_NOIO);
1048 }
1049 if (digest) {
f399002e 1050 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1051 eq = !memcmp(digest, di->digest, digest_size);
1052 kfree(digest);
1053 }
1054
1055 if (eq) {
db830c46 1056 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1057 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1058 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1059 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1060 } else {
1061 inc_rs_pending(mdev);
db830c46
AG
1062 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1063 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1064 kfree(di);
99920dc5 1065 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1066 }
1067 } else {
99920dc5 1068 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1069 if (__ratelimit(&drbd_ratelimit_state))
1070 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1071 }
1072
1073 dec_unacked(mdev);
db830c46 1074 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1075
99920dc5 1076 if (unlikely(err))
b411b363 1077 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1078 return err;
b411b363
PR
1079}
1080
99920dc5 1081int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1082{
db830c46 1083 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1084 struct drbd_conf *mdev = w->mdev;
db830c46
AG
1085 sector_t sector = peer_req->i.sector;
1086 unsigned int size = peer_req->i.size;
b411b363
PR
1087 int digest_size;
1088 void *digest;
99920dc5 1089 int err = 0;
b411b363
PR
1090
1091 if (unlikely(cancel))
1092 goto out;
1093
f399002e 1094 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1095 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1096 if (!digest) {
99920dc5 1097 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1098 goto out;
b411b363
PR
1099 }
1100
db830c46 1101 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1102 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1103 else
1104 memset(digest, 0, digest_size);
1105
53ea4331
LE
1106 /* Free e and pages before send.
1107 * In case we block on congestion, we could otherwise run into
1108 * some distributed deadlock, if the other side blocks on
1109 * congestion as well, because our receiver blocks in
c37c8ecf 1110 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1111 drbd_free_peer_req(mdev, peer_req);
db830c46 1112 peer_req = NULL;
8f21420e 1113 inc_rs_pending(mdev);
99920dc5
AG
1114 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1115 if (err)
8f21420e
PR
1116 dec_rs_pending(mdev);
1117 kfree(digest);
1118
b411b363 1119out:
db830c46 1120 if (peer_req)
3967deb1 1121 drbd_free_peer_req(mdev, peer_req);
b411b363 1122 dec_unacked(mdev);
99920dc5 1123 return err;
b411b363
PR
1124}
1125
8f7bed77 1126void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
1127{
1128 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1129 mdev->ov_last_oos_size += size>>9;
1130 } else {
1131 mdev->ov_last_oos_start = sector;
1132 mdev->ov_last_oos_size = size>>9;
1133 }
1134 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1135}
1136
99920dc5 1137int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1138{
db830c46 1139 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1140 struct drbd_conf *mdev = w->mdev;
b411b363 1141 struct digest_info *di;
b411b363 1142 void *digest;
db830c46
AG
1143 sector_t sector = peer_req->i.sector;
1144 unsigned int size = peer_req->i.size;
53ea4331 1145 int digest_size;
99920dc5 1146 int err, eq = 0;
b411b363
PR
1147
1148 if (unlikely(cancel)) {
3967deb1 1149 drbd_free_peer_req(mdev, peer_req);
b411b363 1150 dec_unacked(mdev);
99920dc5 1151 return 0;
b411b363
PR
1152 }
1153
1154 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1155 * the resync lru has been cleaned up already */
1d53f09e 1156 if (get_ldev(mdev)) {
db830c46 1157 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1158 put_ldev(mdev);
1159 }
b411b363 1160
db830c46 1161 di = peer_req->digest;
b411b363 1162
db830c46 1163 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1164 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1165 digest = kmalloc(digest_size, GFP_NOIO);
1166 if (digest) {
f399002e 1167 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1168
1169 D_ASSERT(digest_size == di->digest_size);
1170 eq = !memcmp(digest, di->digest, digest_size);
1171 kfree(digest);
1172 }
b411b363
PR
1173 }
1174
9676c760
LE
1175 /* Free peer_req and pages before send.
1176 * In case we block on congestion, we could otherwise run into
1177 * some distributed deadlock, if the other side blocks on
1178 * congestion as well, because our receiver blocks in
c37c8ecf 1179 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1180 drbd_free_peer_req(mdev, peer_req);
b411b363 1181 if (!eq)
8f7bed77 1182 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1183 else
8f7bed77 1184 ov_out_of_sync_print(mdev);
b411b363 1185
99920dc5 1186 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1187 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1188
53ea4331 1189 dec_unacked(mdev);
b411b363 1190
ea5442af
LE
1191 --mdev->ov_left;
1192
1193 /* let's advance progress step marks only for every other megabyte */
1194 if ((mdev->ov_left & 0x200) == 0x200)
1195 drbd_advance_rs_marks(mdev, mdev->ov_left);
1196
1197 if (mdev->ov_left == 0) {
8f7bed77 1198 ov_out_of_sync_print(mdev);
b411b363
PR
1199 drbd_resync_finished(mdev);
1200 }
1201
99920dc5 1202 return err;
b411b363
PR
1203}
1204
99920dc5 1205int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1206{
1207 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1208
b411b363 1209 complete(&b->done);
99920dc5 1210 return 0;
b411b363
PR
1211}
1212
99920dc5 1213int w_send_barrier(struct drbd_work *w, int cancel)
b411b363 1214{
9f5bdc33 1215 struct drbd_socket *sock;
b411b363 1216 struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
00d56944 1217 struct drbd_conf *mdev = w->mdev;
9f5bdc33 1218 struct p_barrier *p;
b411b363
PR
1219
1220 /* really avoid racing with tl_clear. w.cb may have been referenced
1221 * just before it was reassigned and re-queued, so double check that.
1222 * actually, this race was harmless, since we only try to send the
1223 * barrier packet here, and otherwise do nothing with the object.
1224 * but compare with the head of w_clear_epoch */
87eeee41 1225 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
1226 if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
1227 cancel = 1;
87eeee41 1228 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1229 if (cancel)
b411b363 1230 return 0;
99920dc5 1231
9f5bdc33
AG
1232 sock = &mdev->tconn->data;
1233 p = drbd_prepare_command(mdev, sock);
1234 if (!p)
1235 return -EIO;
b411b363
PR
1236 p->barrier = b->br_number;
1237 /* inc_ap_pending was done where this was queued.
1238 * dec_ap_pending will be done in got_BarrierAck
1239 * or (on connection loss) in w_clear_epoch. */
9f5bdc33 1240 return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1241}
1242
99920dc5 1243int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1244{
00d56944 1245 struct drbd_conf *mdev = w->mdev;
9f5bdc33
AG
1246 struct drbd_socket *sock;
1247
b411b363 1248 if (cancel)
99920dc5 1249 return 0;
9f5bdc33
AG
1250 sock = &mdev->tconn->data;
1251 if (!drbd_prepare_command(mdev, sock))
1252 return -EIO;
e658983a 1253 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1254}
1255
8f7bed77 1256int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1257{
1258 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1259 struct drbd_conf *mdev = w->mdev;
99920dc5 1260 int err;
73a01a18
PR
1261
1262 if (unlikely(cancel)) {
8554df1c 1263 req_mod(req, SEND_CANCELED);
99920dc5 1264 return 0;
73a01a18
PR
1265 }
1266
8f7bed77 1267 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1268 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1269
99920dc5 1270 return err;
73a01a18
PR
1271}
1272
b411b363
PR
1273/**
1274 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1275 * @mdev: DRBD device.
1276 * @w: work object.
1277 * @cancel: The connection will be closed anyways
1278 */
99920dc5 1279int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1280{
1281 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1282 struct drbd_conf *mdev = w->mdev;
99920dc5 1283 int err;
b411b363
PR
1284
1285 if (unlikely(cancel)) {
8554df1c 1286 req_mod(req, SEND_CANCELED);
99920dc5 1287 return 0;
b411b363
PR
1288 }
1289
99920dc5
AG
1290 err = drbd_send_dblock(mdev, req);
1291 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1292
99920dc5 1293 return err;
b411b363
PR
1294}
1295
1296/**
1297 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1298 * @mdev: DRBD device.
1299 * @w: work object.
1300 * @cancel: The connection will be closed anyways
1301 */
99920dc5 1302int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1303{
1304 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1305 struct drbd_conf *mdev = w->mdev;
99920dc5 1306 int err;
b411b363
PR
1307
1308 if (unlikely(cancel)) {
8554df1c 1309 req_mod(req, SEND_CANCELED);
99920dc5 1310 return 0;
b411b363
PR
1311 }
1312
99920dc5 1313 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1314 (unsigned long)req);
b411b363 1315
99920dc5 1316 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1317
99920dc5 1318 return err;
b411b363
PR
1319}
1320
99920dc5 1321int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1322{
1323 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1324 struct drbd_conf *mdev = w->mdev;
265be2d0 1325
0778286a 1326 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
181286ad 1327 drbd_al_begin_io(mdev, &req->i);
265be2d0
PR
1328
1329 drbd_req_make_private_bio(req, req->master_bio);
1330 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1331 generic_make_request(req->private_bio);
1332
99920dc5 1333 return 0;
265be2d0
PR
1334}
1335
b411b363
PR
1336static int _drbd_may_sync_now(struct drbd_conf *mdev)
1337{
1338 struct drbd_conf *odev = mdev;
95f8efd0 1339 int resync_after;
b411b363
PR
1340
1341 while (1) {
438c8374
PR
1342 if (!odev->ldev)
1343 return 1;
daeda1cc 1344 rcu_read_lock();
95f8efd0 1345 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1346 rcu_read_unlock();
95f8efd0 1347 if (resync_after == -1)
b411b363 1348 return 1;
95f8efd0 1349 odev = minor_to_mdev(resync_after);
841ce241
AG
1350 if (!expect(odev))
1351 return 1;
b411b363
PR
1352 if ((odev->state.conn >= C_SYNC_SOURCE &&
1353 odev->state.conn <= C_PAUSED_SYNC_T) ||
1354 odev->state.aftr_isp || odev->state.peer_isp ||
1355 odev->state.user_isp)
1356 return 0;
1357 }
1358}
1359
1360/**
1361 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1362 * @mdev: DRBD device.
1363 *
1364 * Called from process context only (admin command and after_state_ch).
1365 */
1366static int _drbd_pause_after(struct drbd_conf *mdev)
1367{
1368 struct drbd_conf *odev;
1369 int i, rv = 0;
1370
695d08fa 1371 rcu_read_lock();
81a5d60e 1372 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1373 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1374 continue;
1375 if (!_drbd_may_sync_now(odev))
1376 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1377 != SS_NOTHING_TO_DO);
1378 }
695d08fa 1379 rcu_read_unlock();
b411b363
PR
1380
1381 return rv;
1382}
1383
1384/**
1385 * _drbd_resume_next() - Resume resync on all devices that may resync now
1386 * @mdev: DRBD device.
1387 *
1388 * Called from process context only (admin command and worker).
1389 */
1390static int _drbd_resume_next(struct drbd_conf *mdev)
1391{
1392 struct drbd_conf *odev;
1393 int i, rv = 0;
1394
695d08fa 1395 rcu_read_lock();
81a5d60e 1396 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1397 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1398 continue;
1399 if (odev->state.aftr_isp) {
1400 if (_drbd_may_sync_now(odev))
1401 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1402 CS_HARD, NULL)
1403 != SS_NOTHING_TO_DO) ;
1404 }
1405 }
695d08fa 1406 rcu_read_unlock();
b411b363
PR
1407 return rv;
1408}
1409
1410void resume_next_sg(struct drbd_conf *mdev)
1411{
1412 write_lock_irq(&global_state_lock);
1413 _drbd_resume_next(mdev);
1414 write_unlock_irq(&global_state_lock);
1415}
1416
1417void suspend_other_sg(struct drbd_conf *mdev)
1418{
1419 write_lock_irq(&global_state_lock);
1420 _drbd_pause_after(mdev);
1421 write_unlock_irq(&global_state_lock);
1422}
1423
dc97b708 1424/* caller must hold global_state_lock */
95f8efd0 1425enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
b411b363
PR
1426{
1427 struct drbd_conf *odev;
95f8efd0 1428 int resync_after;
b411b363
PR
1429
1430 if (o_minor == -1)
1431 return NO_ERROR;
1432 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
95f8efd0 1433 return ERR_RESYNC_AFTER;
b411b363
PR
1434
1435 /* check for loops */
1436 odev = minor_to_mdev(o_minor);
1437 while (1) {
1438 if (odev == mdev)
95f8efd0 1439 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1440
daeda1cc 1441 rcu_read_lock();
95f8efd0 1442 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1443 rcu_read_unlock();
b411b363 1444 /* dependency chain ends here, no cycles. */
95f8efd0 1445 if (resync_after == -1)
b411b363
PR
1446 return NO_ERROR;
1447
1448 /* follow the dependency chain */
95f8efd0 1449 odev = minor_to_mdev(resync_after);
b411b363
PR
1450 }
1451}
1452
dc97b708 1453/* caller must hold global_state_lock */
95f8efd0 1454void drbd_resync_after_changed(struct drbd_conf *mdev)
b411b363
PR
1455{
1456 int changes;
b411b363 1457
dc97b708
PR
1458 do {
1459 changes = _drbd_pause_after(mdev);
1460 changes |= _drbd_resume_next(mdev);
1461 } while (changes);
b411b363
PR
1462}
1463
9bd28d3c
LE
1464void drbd_rs_controller_reset(struct drbd_conf *mdev)
1465{
813472ce
PR
1466 struct fifo_buffer *plan;
1467
9bd28d3c
LE
1468 atomic_set(&mdev->rs_sect_in, 0);
1469 atomic_set(&mdev->rs_sect_ev, 0);
1470 mdev->rs_in_flight = 0;
813472ce
PR
1471
1472 /* Updating the RCU protected object in place is necessary since
1473 this function gets called from atomic context.
1474 It is valid since all other updates also lead to an completely
1475 empty fifo */
1476 rcu_read_lock();
1477 plan = rcu_dereference(mdev->rs_plan_s);
1478 plan->total = 0;
1479 fifo_set(plan, 0);
1480 rcu_read_unlock();
9bd28d3c
LE
1481}
1482
1f04af33
PR
1483void start_resync_timer_fn(unsigned long data)
1484{
1485 struct drbd_conf *mdev = (struct drbd_conf *) data;
1486
d5b27b01 1487 drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work);
1f04af33
PR
1488}
1489
99920dc5 1490int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1491{
00d56944
PR
1492 struct drbd_conf *mdev = w->mdev;
1493
1f04af33
PR
1494 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1495 dev_warn(DEV, "w_start_resync later...\n");
1496 mdev->start_resync_timer.expires = jiffies + HZ/10;
1497 add_timer(&mdev->start_resync_timer);
99920dc5 1498 return 0;
1f04af33
PR
1499 }
1500
1501 drbd_start_resync(mdev, C_SYNC_SOURCE);
36baf611 1502 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
99920dc5 1503 return 0;
1f04af33
PR
1504}
1505
b411b363
PR
1506/**
1507 * drbd_start_resync() - Start the resync process
1508 * @mdev: DRBD device.
1509 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1510 *
1511 * This function might bring you directly into one of the
1512 * C_PAUSED_SYNC_* states.
1513 */
1514void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1515{
1516 union drbd_state ns;
1517 int r;
1518
c4752ef1 1519 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1520 dev_err(DEV, "Resync already running!\n");
1521 return;
1522 }
1523
59817f4f
PR
1524 if (mdev->state.conn < C_AHEAD) {
1525 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1526 drbd_rs_cancel_all(mdev);
1527 /* This should be done when we abort the resync. We definitely do not
1528 want to have this for connections going back and forth between
1529 Ahead/Behind and SyncSource/SyncTarget */
1530 }
b411b363 1531
e64a3294
PR
1532 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1533 if (side == C_SYNC_TARGET) {
1534 /* Since application IO was locked out during C_WF_BITMAP_T and
1535 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1536 we check that we might make the data inconsistent. */
1537 r = drbd_khelper(mdev, "before-resync-target");
1538 r = (r >> 8) & 0xff;
1539 if (r > 0) {
1540 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1541 "dropping connection.\n", r);
38fa9988 1542 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1543 return;
1544 }
e64a3294
PR
1545 } else /* C_SYNC_SOURCE */ {
1546 r = drbd_khelper(mdev, "before-resync-source");
1547 r = (r >> 8) & 0xff;
1548 if (r > 0) {
1549 if (r == 3) {
1550 dev_info(DEV, "before-resync-source handler returned %d, "
1551 "ignoring. Old userland tools?", r);
1552 } else {
1553 dev_info(DEV, "before-resync-source handler returned %d, "
1554 "dropping connection.\n", r);
38fa9988 1555 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1556 return;
1557 }
1558 }
09b9e797 1559 }
b411b363
PR
1560 }
1561
e64a3294 1562 if (current == mdev->tconn->worker.task) {
dad20554 1563 /* The worker should not sleep waiting for state_mutex,
e64a3294 1564 that can take long */
8410da8f 1565 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1566 set_bit(B_RS_H_DONE, &mdev->flags);
1567 mdev->start_resync_timer.expires = jiffies + HZ/5;
1568 add_timer(&mdev->start_resync_timer);
1569 return;
1570 }
1571 } else {
8410da8f 1572 mutex_lock(mdev->state_mutex);
e64a3294
PR
1573 }
1574 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363 1575
0cfac5dd 1576 write_lock_irq(&global_state_lock);
b411b363 1577 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
0cfac5dd 1578 write_unlock_irq(&global_state_lock);
8410da8f 1579 mutex_unlock(mdev->state_mutex);
b411b363
PR
1580 return;
1581 }
1582
78bae59b 1583 ns = drbd_read_state(mdev);
b411b363
PR
1584
1585 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1586
1587 ns.conn = side;
1588
1589 if (side == C_SYNC_TARGET)
1590 ns.disk = D_INCONSISTENT;
1591 else /* side == C_SYNC_SOURCE */
1592 ns.pdsk = D_INCONSISTENT;
1593
1594 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1595 ns = drbd_read_state(mdev);
b411b363
PR
1596
1597 if (ns.conn < C_CONNECTED)
1598 r = SS_UNKNOWN_ERROR;
1599
1600 if (r == SS_SUCCESS) {
1d7734a0
LE
1601 unsigned long tw = drbd_bm_total_weight(mdev);
1602 unsigned long now = jiffies;
1603 int i;
1604
b411b363
PR
1605 mdev->rs_failed = 0;
1606 mdev->rs_paused = 0;
b411b363 1607 mdev->rs_same_csum = 0;
0f0601f4
LE
1608 mdev->rs_last_events = 0;
1609 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1610 mdev->rs_total = tw;
1611 mdev->rs_start = now;
1612 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1613 mdev->rs_mark_left[i] = tw;
1614 mdev->rs_mark_time[i] = now;
1615 }
b411b363
PR
1616 _drbd_pause_after(mdev);
1617 }
1618 write_unlock_irq(&global_state_lock);
5a22db89 1619
b411b363
PR
1620 if (r == SS_SUCCESS) {
1621 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1622 drbd_conn_str(ns.conn),
1623 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1624 (unsigned long) mdev->rs_total);
6c922ed5
LE
1625 if (side == C_SYNC_TARGET)
1626 mdev->bm_resync_fo = 0;
1627
1628 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1629 * with w_send_oos, or the sync target will get confused as to
1630 * how much bits to resync. We cannot do that always, because for an
1631 * empty resync and protocol < 95, we need to do it here, as we call
1632 * drbd_resync_finished from here in that case.
1633 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1634 * and from after_state_ch otherwise. */
31890f4a 1635 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1636 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1637
31890f4a 1638 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1639 /* This still has a race (about when exactly the peers
1640 * detect connection loss) that can lead to a full sync
1641 * on next handshake. In 8.3.9 we fixed this with explicit
1642 * resync-finished notifications, but the fix
1643 * introduces a protocol change. Sleeping for some
1644 * time longer than the ping interval + timeout on the
1645 * SyncSource, to give the SyncTarget the chance to
1646 * detect connection loss, then waiting for a ping
1647 * response (implicit in drbd_resync_finished) reduces
1648 * the race considerably, but does not solve it. */
44ed167d
PR
1649 if (side == C_SYNC_SOURCE) {
1650 struct net_conf *nc;
1651 int timeo;
1652
1653 rcu_read_lock();
1654 nc = rcu_dereference(mdev->tconn->net_conf);
1655 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1656 rcu_read_unlock();
1657 schedule_timeout_interruptible(timeo);
1658 }
b411b363 1659 drbd_resync_finished(mdev);
b411b363
PR
1660 }
1661
9bd28d3c 1662 drbd_rs_controller_reset(mdev);
b411b363
PR
1663 /* ns.conn may already be != mdev->state.conn,
1664 * we may have been paused in between, or become paused until
1665 * the timer triggers.
1666 * No matter, that is handled in resync_timer_fn() */
1667 if (ns.conn == C_SYNC_TARGET)
1668 mod_timer(&mdev->resync_timer, jiffies);
1669
1670 drbd_md_sync(mdev);
1671 }
5a22db89 1672 put_ldev(mdev);
8410da8f 1673 mutex_unlock(mdev->state_mutex);
b411b363
PR
1674}
1675
8c0785a5
LE
1676bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
1677{
1678 spin_lock_irq(&queue->q_lock);
1679 list_splice_init(&queue->q, work_list);
1680 spin_unlock_irq(&queue->q_lock);
1681 return !list_empty(work_list);
1682}
1683
1684bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
1685{
1686 spin_lock_irq(&queue->q_lock);
1687 if (!list_empty(&queue->q))
1688 list_move(queue->q.next, work_list);
1689 spin_unlock_irq(&queue->q_lock);
1690 return !list_empty(work_list);
1691}
1692
b411b363
PR
1693int drbd_worker(struct drbd_thread *thi)
1694{
392c8801 1695 struct drbd_tconn *tconn = thi->tconn;
b411b363 1696 struct drbd_work *w = NULL;
0e29d163 1697 struct drbd_conf *mdev;
44ed167d 1698 struct net_conf *nc;
b411b363 1699 LIST_HEAD(work_list);
8c0785a5 1700 int vnr;
44ed167d 1701 int cork;
b411b363 1702
e77a0a5c 1703 while (get_t_state(thi) == RUNNING) {
80822284 1704 drbd_thread_current_set_cpu(thi);
b411b363 1705
8c0785a5
LE
1706 /* as long as we use drbd_queue_work_front(),
1707 * we may only dequeue single work items here, not batches. */
1708 if (list_empty(&work_list))
d5b27b01 1709 dequeue_work_item(&tconn->sender_work, &work_list);
44ed167d 1710
8c0785a5
LE
1711 /* Still nothing to do? Poke TCP, just in case,
1712 * then wait for new work (or signal). */
1713 if (list_empty(&work_list)) {
1714 mutex_lock(&tconn->data.mutex);
44ed167d
PR
1715 rcu_read_lock();
1716 nc = rcu_dereference(tconn->net_conf);
bb77d34e 1717 cork = nc ? nc->tcp_cork : 0;
44ed167d
PR
1718 rcu_read_unlock();
1719
1720 if (tconn->data.socket && cork)
19393e10
PR
1721 drbd_tcp_uncork(tconn->data.socket);
1722 mutex_unlock(&tconn->data.mutex);
b411b363 1723
d5b27b01
LE
1724 wait_event_interruptible(tconn->sender_work.q_wait,
1725 dequeue_work_item(&tconn->sender_work, &work_list));
b411b363 1726
19393e10 1727 mutex_lock(&tconn->data.mutex);
8c0785a5 1728 if (tconn->data.socket && cork)
19393e10
PR
1729 drbd_tcp_cork(tconn->data.socket);
1730 mutex_unlock(&tconn->data.mutex);
b411b363
PR
1731 }
1732
8c0785a5 1733 if (signal_pending(current)) {
b411b363 1734 flush_signals(current);
19393e10
PR
1735 if (get_t_state(thi) == RUNNING) {
1736 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1737 continue;
19393e10 1738 }
b411b363
PR
1739 break;
1740 }
1741
e77a0a5c 1742 if (get_t_state(thi) != RUNNING)
b411b363 1743 break;
b411b363 1744
8c0785a5
LE
1745 while (!list_empty(&work_list)) {
1746 w = list_first_entry(&work_list, struct drbd_work, list);
1747 list_del_init(&w->list);
1748 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
1749 continue;
bbeb641c
PR
1750 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1751 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1752 }
1753 }
b411b363 1754
8c0785a5 1755 do {
b411b363 1756 while (!list_empty(&work_list)) {
8c0785a5 1757 w = list_first_entry(&work_list, struct drbd_work, list);
b411b363 1758 list_del_init(&w->list);
00d56944 1759 w->cb(w, 1);
b411b363 1760 }
d5b27b01 1761 dequeue_work_batch(&tconn->sender_work, &work_list);
8c0785a5 1762 } while (!list_empty(&work_list));
b411b363 1763
c141ebda 1764 rcu_read_lock();
f399002e 1765 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1766 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
c141ebda
PR
1767 kref_get(&mdev->kref);
1768 rcu_read_unlock();
0e29d163 1769 drbd_mdev_cleanup(mdev);
c141ebda
PR
1770 kref_put(&mdev->kref, &drbd_minor_destroy);
1771 rcu_read_lock();
0e29d163 1772 }
c141ebda 1773 rcu_read_unlock();
b411b363
PR
1774
1775 return 0;
1776}
This page took 0.22262 seconds and 5 git commands to generate.