drbd: Silenced compiler warnings
[deliverable/linux.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
b411b363 40
00d56944 41static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
42
43
c5a91619
AG
44/* endio handlers:
45 * drbd_md_io_complete (defined here)
fcefa62e
AG
46 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
c5a91619
AG
48 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
b411b363
PR
50 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
95f8efd0 60 to evaluate the resync after dependencies, we grab a write lock, because
b411b363
PR
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
cdfda633 70 struct drbd_conf *mdev;
b411b363
PR
71
72 md_io = (struct drbd_md_io *)bio->bi_private;
cdfda633
PR
73 mdev = container_of(md_io, struct drbd_conf, md_io);
74
b411b363
PR
75 md_io->error = error;
76
cdfda633
PR
77 md_io->done = 1;
78 wake_up(&mdev->misc_wait);
79 bio_put(bio);
80 drbd_md_put_buffer(mdev);
81 put_ldev(mdev);
b411b363
PR
82}
83
84/* reads on behalf of the partner,
85 * "submitted" by the receiver
86 */
db830c46 87void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
88{
89 unsigned long flags = 0;
a21e9298 90 struct drbd_conf *mdev = peer_req->w.mdev;
b411b363 91
87eeee41 92 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
93 mdev->read_cnt += peer_req->i.size >> 9;
94 list_del(&peer_req->w.list);
b411b363
PR
95 if (list_empty(&mdev->read_ee))
96 wake_up(&mdev->ee_wait);
db830c46 97 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 98 __drbd_chk_io_error(mdev, false);
87eeee41 99 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 100
db830c46 101 drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
b411b363 102 put_ldev(mdev);
b411b363
PR
103}
104
105/* writes on behalf of the partner, or resync writes,
45bb912b 106 * "submitted" by the receiver, final stage. */
db830c46 107static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
108{
109 unsigned long flags = 0;
a21e9298 110 struct drbd_conf *mdev = peer_req->w.mdev;
181286ad 111 struct drbd_interval i;
b411b363 112 int do_wake;
579b57ed 113 u64 block_id;
b411b363 114 int do_al_complete_io;
b411b363 115
db830c46 116 /* after we moved peer_req to done_ee,
b411b363
PR
117 * we may no longer access it,
118 * it may be freed/reused already!
119 * (as soon as we release the req_lock) */
181286ad 120 i = peer_req->i;
db830c46
AG
121 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
122 block_id = peer_req->block_id;
b411b363 123
87eeee41 124 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
125 mdev->writ_cnt += peer_req->i.size >> 9;
126 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
127 list_add_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 128
bb3bfe96 129 /*
5e472264 130 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
131 * Ack yet and did not wake possibly waiting conflicting requests.
132 * Removed from the tree from "drbd_process_done_ee" within the
133 * appropriate w.cb (e_end_block/e_end_resync_block) or from
134 * _drbd_clear_done_ee.
135 */
b411b363 136
579b57ed 137 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 138
db830c46 139 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 140 __drbd_chk_io_error(mdev, false);
87eeee41 141 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 142
579b57ed 143 if (block_id == ID_SYNCER)
181286ad 144 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
145
146 if (do_wake)
147 wake_up(&mdev->ee_wait);
148
149 if (do_al_complete_io)
181286ad 150 drbd_al_complete_io(mdev, &i);
b411b363 151
0625ac19 152 wake_asender(mdev->tconn);
b411b363 153 put_ldev(mdev);
45bb912b 154}
b411b363 155
45bb912b
LE
156/* writes on behalf of the partner, or resync writes,
157 * "submitted" by the receiver.
158 */
fcefa62e 159void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 160{
db830c46 161 struct drbd_peer_request *peer_req = bio->bi_private;
a21e9298 162 struct drbd_conf *mdev = peer_req->w.mdev;
45bb912b
LE
163 int uptodate = bio_flagged(bio, BIO_UPTODATE);
164 int is_write = bio_data_dir(bio) == WRITE;
165
07194272 166 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
167 dev_warn(DEV, "%s: error=%d s=%llus\n",
168 is_write ? "write" : "read", error,
db830c46 169 (unsigned long long)peer_req->i.sector);
45bb912b 170 if (!error && !uptodate) {
07194272
LE
171 if (__ratelimit(&drbd_ratelimit_state))
172 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
173 is_write ? "write" : "read",
db830c46 174 (unsigned long long)peer_req->i.sector);
45bb912b
LE
175 /* strange behavior of some lower level drivers...
176 * fail the request by clearing the uptodate flag,
177 * but do not return any error?! */
178 error = -EIO;
179 }
180
181 if (error)
db830c46 182 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
183
184 bio_put(bio); /* no need for the bio anymore */
db830c46 185 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 186 if (is_write)
db830c46 187 drbd_endio_write_sec_final(peer_req);
45bb912b 188 else
db830c46 189 drbd_endio_read_sec_final(peer_req);
45bb912b 190 }
b411b363
PR
191}
192
193/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
194 */
fcefa62e 195void drbd_request_endio(struct bio *bio, int error)
b411b363 196{
a115413d 197 unsigned long flags;
b411b363 198 struct drbd_request *req = bio->bi_private;
a21e9298 199 struct drbd_conf *mdev = req->w.mdev;
a115413d 200 struct bio_and_error m;
b411b363
PR
201 enum drbd_req_event what;
202 int uptodate = bio_flagged(bio, BIO_UPTODATE);
203
b411b363
PR
204 if (!error && !uptodate) {
205 dev_warn(DEV, "p %s: setting error to -EIO\n",
206 bio_data_dir(bio) == WRITE ? "write" : "read");
207 /* strange behavior of some lower level drivers...
208 * fail the request by clearing the uptodate flag,
209 * but do not return any error?! */
210 error = -EIO;
211 }
212
b411b363
PR
213 /* to avoid recursion in __req_mod */
214 if (unlikely(error)) {
215 what = (bio_data_dir(bio) == WRITE)
8554df1c 216 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 217 : (bio_rw(bio) == READ)
8554df1c
AG
218 ? READ_COMPLETED_WITH_ERROR
219 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 220 } else
8554df1c 221 what = COMPLETED_OK;
b411b363
PR
222
223 bio_put(req->private_bio);
224 req->private_bio = ERR_PTR(error);
225
a115413d 226 /* not req_mod(), we need irqsave here! */
87eeee41 227 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 228 __req_mod(req, what, &m);
87eeee41 229 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
a115413d
LE
230
231 if (m.bio)
232 complete_master_bio(mdev, &m);
b411b363
PR
233}
234
99920dc5 235int w_read_retry_remote(struct drbd_work *w, int cancel)
b411b363
PR
236{
237 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 238 struct drbd_conf *mdev = w->mdev;
b411b363
PR
239
240 /* We should not detach for read io-error,
241 * but try to WRITE the P_DATA_REPLY to the failed location,
242 * to give the disk the chance to relocate that block */
243
87eeee41 244 spin_lock_irq(&mdev->tconn->req_lock);
d255e5ff 245 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
8554df1c 246 _req_mod(req, READ_RETRY_REMOTE_CANCELED);
87eeee41 247 spin_unlock_irq(&mdev->tconn->req_lock);
99920dc5 248 return 0;
b411b363 249 }
87eeee41 250 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 251
00d56944 252 return w_send_read_req(w, 0);
b411b363
PR
253}
254
f6ffca9f 255void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
db830c46 256 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
257{
258 struct hash_desc desc;
259 struct scatterlist sg;
db830c46 260 struct page *page = peer_req->pages;
45bb912b
LE
261 struct page *tmp;
262 unsigned len;
263
264 desc.tfm = tfm;
265 desc.flags = 0;
266
267 sg_init_table(&sg, 1);
268 crypto_hash_init(&desc);
269
270 while ((tmp = page_chain_next(page))) {
271 /* all but the last page will be fully used */
272 sg_set_page(&sg, page, PAGE_SIZE, 0);
273 crypto_hash_update(&desc, &sg, sg.length);
274 page = tmp;
275 }
276 /* and now the last, possibly only partially used page */
db830c46 277 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
278 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
279 crypto_hash_update(&desc, &sg, sg.length);
280 crypto_hash_final(&desc, digest);
281}
282
283void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
284{
285 struct hash_desc desc;
286 struct scatterlist sg;
287 struct bio_vec *bvec;
288 int i;
289
290 desc.tfm = tfm;
291 desc.flags = 0;
292
293 sg_init_table(&sg, 1);
294 crypto_hash_init(&desc);
295
296 __bio_for_each_segment(bvec, bio, i, 0) {
297 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
298 crypto_hash_update(&desc, &sg, sg.length);
299 }
300 crypto_hash_final(&desc, digest);
301}
302
9676c760 303/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 304static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 305{
00d56944
PR
306 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
307 struct drbd_conf *mdev = w->mdev;
b411b363
PR
308 int digest_size;
309 void *digest;
99920dc5 310 int err = 0;
b411b363 311
53ea4331
LE
312 if (unlikely(cancel))
313 goto out;
b411b363 314
9676c760 315 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 316 goto out;
b411b363 317
f399002e 318 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
319 digest = kmalloc(digest_size, GFP_NOIO);
320 if (digest) {
db830c46
AG
321 sector_t sector = peer_req->i.sector;
322 unsigned int size = peer_req->i.size;
f399002e 323 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 324 /* Free peer_req and pages before send.
53ea4331
LE
325 * In case we block on congestion, we could otherwise run into
326 * some distributed deadlock, if the other side blocks on
327 * congestion as well, because our receiver blocks in
c37c8ecf 328 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 329 drbd_free_peer_req(mdev, peer_req);
db830c46 330 peer_req = NULL;
53ea4331 331 inc_rs_pending(mdev);
99920dc5 332 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
333 digest, digest_size,
334 P_CSUM_RS_REQUEST);
53ea4331
LE
335 kfree(digest);
336 } else {
337 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 338 err = -ENOMEM;
53ea4331 339 }
b411b363 340
53ea4331 341out:
db830c46 342 if (peer_req)
3967deb1 343 drbd_free_peer_req(mdev, peer_req);
b411b363 344
99920dc5 345 if (unlikely(err))
b411b363 346 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 347 return err;
b411b363
PR
348}
349
350#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
351
352static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
353{
db830c46 354 struct drbd_peer_request *peer_req;
b411b363
PR
355
356 if (!get_ldev(mdev))
80a40e43 357 return -EIO;
b411b363 358
e3555d85 359 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
360 goto defer;
361
b411b363
PR
362 /* GFP_TRY, because if there is no memory available right now, this may
363 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
364 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
365 size, GFP_TRY);
db830c46 366 if (!peer_req)
80a40e43 367 goto defer;
b411b363 368
db830c46 369 peer_req->w.cb = w_e_send_csum;
87eeee41 370 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 371 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 372 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 373
0f0601f4 374 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 375 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 376 return 0;
b411b363 377
10f6d992
LE
378 /* If it failed because of ENOMEM, retry should help. If it failed
379 * because bio_add_page failed (probably broken lower level driver),
380 * retry may or may not help.
381 * If it does not, you may need to force disconnect. */
87eeee41 382 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 383 list_del(&peer_req->w.list);
87eeee41 384 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 385
3967deb1 386 drbd_free_peer_req(mdev, peer_req);
80a40e43 387defer:
45bb912b 388 put_ldev(mdev);
80a40e43 389 return -EAGAIN;
b411b363
PR
390}
391
99920dc5 392int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 393{
00d56944 394 struct drbd_conf *mdev = w->mdev;
63106d3c
PR
395 switch (mdev->state.conn) {
396 case C_VERIFY_S:
00d56944 397 w_make_ov_request(w, cancel);
63106d3c
PR
398 break;
399 case C_SYNC_TARGET:
00d56944 400 w_make_resync_request(w, cancel);
63106d3c 401 break;
b411b363
PR
402 }
403
99920dc5 404 return 0;
794abb75
PR
405}
406
407void resync_timer_fn(unsigned long data)
408{
409 struct drbd_conf *mdev = (struct drbd_conf *) data;
410
411 if (list_empty(&mdev->resync_work.list))
e42325a5 412 drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work);
b411b363
PR
413}
414
778f271d
PR
415static void fifo_set(struct fifo_buffer *fb, int value)
416{
417 int i;
418
419 for (i = 0; i < fb->size; i++)
f10f2623 420 fb->values[i] = value;
778f271d
PR
421}
422
423static int fifo_push(struct fifo_buffer *fb, int value)
424{
425 int ov;
426
427 ov = fb->values[fb->head_index];
428 fb->values[fb->head_index++] = value;
429
430 if (fb->head_index >= fb->size)
431 fb->head_index = 0;
432
433 return ov;
434}
435
436static void fifo_add_val(struct fifo_buffer *fb, int value)
437{
438 int i;
439
440 for (i = 0; i < fb->size; i++)
441 fb->values[i] += value;
442}
443
9958c857
PR
444struct fifo_buffer *fifo_alloc(int fifo_size)
445{
446 struct fifo_buffer *fb;
447
448 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
449 if (!fb)
450 return NULL;
451
452 fb->head_index = 0;
453 fb->size = fifo_size;
454 fb->total = 0;
455
456 return fb;
457}
458
9d77a5fe 459static int drbd_rs_controller(struct drbd_conf *mdev)
778f271d 460{
daeda1cc 461 struct disk_conf *dc;
778f271d
PR
462 unsigned int sect_in; /* Number of sectors that came in since the last turn */
463 unsigned int want; /* The number of sectors we want in the proxy */
464 int req_sect; /* Number of sectors to request in this turn */
465 int correction; /* Number of sectors more we need in the proxy*/
466 int cps; /* correction per invocation of drbd_rs_controller() */
467 int steps; /* Number of time steps to plan ahead */
468 int curr_corr;
469 int max_sect;
813472ce 470 struct fifo_buffer *plan;
778f271d
PR
471
472 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
473 mdev->rs_in_flight -= sect_in;
474
daeda1cc 475 dc = rcu_dereference(mdev->ldev->disk_conf);
813472ce 476 plan = rcu_dereference(mdev->rs_plan_s);
778f271d 477
813472ce 478 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
479
480 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 481 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 482 } else { /* normal path */
daeda1cc
PR
483 want = dc->c_fill_target ? dc->c_fill_target :
484 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
485 }
486
813472ce 487 correction = want - mdev->rs_in_flight - plan->total;
778f271d
PR
488
489 /* Plan ahead */
490 cps = correction / steps;
813472ce
PR
491 fifo_add_val(plan, cps);
492 plan->total += cps * steps;
778f271d
PR
493
494 /* What we do in this step */
813472ce
PR
495 curr_corr = fifo_push(plan, 0);
496 plan->total -= curr_corr;
778f271d
PR
497
498 req_sect = sect_in + curr_corr;
499 if (req_sect < 0)
500 req_sect = 0;
501
daeda1cc 502 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
503 if (req_sect > max_sect)
504 req_sect = max_sect;
505
506 /*
507 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
508 sect_in, mdev->rs_in_flight, want, correction,
509 steps, cps, mdev->rs_planed, curr_corr, req_sect);
510 */
511
512 return req_sect;
513}
514
9d77a5fe 515static int drbd_rs_number_requests(struct drbd_conf *mdev)
e65f440d
LE
516{
517 int number;
813472ce
PR
518
519 rcu_read_lock();
520 if (rcu_dereference(mdev->rs_plan_s)->size) {
e65f440d
LE
521 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
522 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
523 } else {
daeda1cc 524 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
e65f440d
LE
525 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
526 }
813472ce 527 rcu_read_unlock();
e65f440d 528
e65f440d
LE
529 /* ignore the amount of pending requests, the resync controller should
530 * throttle down to incoming reply rate soon enough anyways. */
531 return number;
532}
533
99920dc5 534int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 535{
00d56944 536 struct drbd_conf *mdev = w->mdev;
b411b363
PR
537 unsigned long bit;
538 sector_t sector;
539 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 540 int max_bio_size;
e65f440d 541 int number, rollback_i, size;
b411b363 542 int align, queued, sndbuf;
0f0601f4 543 int i = 0;
b411b363
PR
544
545 if (unlikely(cancel))
99920dc5 546 return 0;
b411b363 547
af85e8e8
LE
548 if (mdev->rs_total == 0) {
549 /* empty resync? */
550 drbd_resync_finished(mdev);
99920dc5 551 return 0;
af85e8e8
LE
552 }
553
b411b363
PR
554 if (!get_ldev(mdev)) {
555 /* Since we only need to access mdev->rsync a
556 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
557 to continue resync with a broken disk makes no sense at
558 all */
559 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 560 return 0;
b411b363
PR
561 }
562
0cfdd247 563 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
564 number = drbd_rs_number_requests(mdev);
565 if (number == 0)
0f0601f4 566 goto requeue;
b411b363 567
b411b363
PR
568 for (i = 0; i < number; i++) {
569 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
570 mutex_lock(&mdev->tconn->data.mutex);
571 if (mdev->tconn->data.socket) {
572 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
573 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
574 } else {
575 queued = 1;
576 sndbuf = 0;
577 }
e42325a5 578 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
579 if (queued > sndbuf / 2)
580 goto requeue;
581
582next_sector:
583 size = BM_BLOCK_SIZE;
584 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
585
4b0715f0 586 if (bit == DRBD_END_OF_BITMAP) {
b411b363 587 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 588 put_ldev(mdev);
99920dc5 589 return 0;
b411b363
PR
590 }
591
592 sector = BM_BIT_TO_SECT(bit);
593
e3555d85
PR
594 if (drbd_rs_should_slow_down(mdev, sector) ||
595 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
596 mdev->bm_resync_fo = bit;
597 goto requeue;
598 }
599 mdev->bm_resync_fo = bit + 1;
600
601 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
602 drbd_rs_complete_io(mdev, sector);
603 goto next_sector;
604 }
605
1816a2b4 606#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
607 /* try to find some adjacent bits.
608 * we stop if we have already the maximum req size.
609 *
610 * Additionally always align bigger requests, in order to
611 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
612 */
613 align = 1;
d207450c 614 rollback_i = i;
b411b363 615 for (;;) {
1816a2b4 616 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
617 break;
618
619 /* Be always aligned */
620 if (sector & ((1<<(align+3))-1))
621 break;
622
623 /* do not cross extent boundaries */
624 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
625 break;
626 /* now, is it actually dirty, after all?
627 * caution, drbd_bm_test_bit is tri-state for some
628 * obscure reason; ( b == 0 ) would get the out-of-band
629 * only accidentally right because of the "oddly sized"
630 * adjustment below */
631 if (drbd_bm_test_bit(mdev, bit+1) != 1)
632 break;
633 bit++;
634 size += BM_BLOCK_SIZE;
635 if ((BM_BLOCK_SIZE << align) <= size)
636 align++;
637 i++;
638 }
639 /* if we merged some,
640 * reset the offset to start the next drbd_bm_find_next from */
641 if (size > BM_BLOCK_SIZE)
642 mdev->bm_resync_fo = bit + 1;
643#endif
644
645 /* adjust very last sectors, in case we are oddly sized */
646 if (sector + (size>>9) > capacity)
647 size = (capacity-sector)<<9;
f399002e 648 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 649 switch (read_for_csum(mdev, sector, size)) {
80a40e43 650 case -EIO: /* Disk failure */
b411b363 651 put_ldev(mdev);
99920dc5 652 return -EIO;
80a40e43 653 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
654 drbd_rs_complete_io(mdev, sector);
655 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 656 i = rollback_i;
b411b363 657 goto requeue;
80a40e43
LE
658 case 0:
659 /* everything ok */
660 break;
661 default:
662 BUG();
b411b363
PR
663 }
664 } else {
99920dc5
AG
665 int err;
666
b411b363 667 inc_rs_pending(mdev);
99920dc5
AG
668 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
669 sector, size, ID_SYNCER);
670 if (err) {
b411b363
PR
671 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
672 dec_rs_pending(mdev);
673 put_ldev(mdev);
99920dc5 674 return err;
b411b363
PR
675 }
676 }
677 }
678
679 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
680 /* last syncer _request_ was sent,
681 * but the P_RS_DATA_REPLY not yet received. sync will end (and
682 * next sync group will resume), as soon as we receive the last
683 * resync data block, and the last bit is cleared.
684 * until then resync "work" is "inactive" ...
685 */
b411b363 686 put_ldev(mdev);
99920dc5 687 return 0;
b411b363
PR
688 }
689
690 requeue:
778f271d 691 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
692 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
693 put_ldev(mdev);
99920dc5 694 return 0;
b411b363
PR
695}
696
00d56944 697static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 698{
00d56944 699 struct drbd_conf *mdev = w->mdev;
b411b363
PR
700 int number, i, size;
701 sector_t sector;
702 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
703
704 if (unlikely(cancel))
705 return 1;
706
2649f080 707 number = drbd_rs_number_requests(mdev);
b411b363
PR
708
709 sector = mdev->ov_position;
710 for (i = 0; i < number; i++) {
711 if (sector >= capacity) {
b411b363
PR
712 return 1;
713 }
714
715 size = BM_BLOCK_SIZE;
716
e3555d85
PR
717 if (drbd_rs_should_slow_down(mdev, sector) ||
718 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
719 mdev->ov_position = sector;
720 goto requeue;
721 }
722
723 if (sector + (size>>9) > capacity)
724 size = (capacity-sector)<<9;
725
726 inc_rs_pending(mdev);
5b9f499c 727 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
728 dec_rs_pending(mdev);
729 return 0;
730 }
731 sector += BM_SECT_PER_BIT;
732 }
733 mdev->ov_position = sector;
734
735 requeue:
2649f080 736 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
737 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
738 return 1;
739}
740
99920dc5 741int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 742{
00d56944 743 struct drbd_conf *mdev = w->mdev;
b411b363 744 kfree(w);
8f7bed77 745 ov_out_of_sync_print(mdev);
b411b363
PR
746 drbd_resync_finished(mdev);
747
99920dc5 748 return 0;
b411b363
PR
749}
750
99920dc5 751static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 752{
00d56944 753 struct drbd_conf *mdev = w->mdev;
b411b363
PR
754 kfree(w);
755
756 drbd_resync_finished(mdev);
757
99920dc5 758 return 0;
b411b363
PR
759}
760
af85e8e8
LE
761static void ping_peer(struct drbd_conf *mdev)
762{
2a67d8b9
PR
763 struct drbd_tconn *tconn = mdev->tconn;
764
765 clear_bit(GOT_PING_ACK, &tconn->flags);
766 request_ping(tconn);
767 wait_event(tconn->ping_wait,
768 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
769}
770
b411b363
PR
771int drbd_resync_finished(struct drbd_conf *mdev)
772{
773 unsigned long db, dt, dbdt;
774 unsigned long n_oos;
775 union drbd_state os, ns;
776 struct drbd_work *w;
777 char *khelper_cmd = NULL;
26525618 778 int verify_done = 0;
b411b363
PR
779
780 /* Remove all elements from the resync LRU. Since future actions
781 * might set bits in the (main) bitmap, then the entries in the
782 * resync LRU would be wrong. */
783 if (drbd_rs_del_all(mdev)) {
784 /* In case this is not possible now, most probably because
785 * there are P_RS_DATA_REPLY Packets lingering on the worker's
786 * queue (or even the read operations for those packets
787 * is not finished by now). Retry in 100ms. */
788
20ee6390 789 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
790 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
791 if (w) {
792 w->cb = w_resync_finished;
9b743da9 793 w->mdev = mdev;
e42325a5 794 drbd_queue_work(&mdev->tconn->data.work, w);
b411b363
PR
795 return 1;
796 }
797 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
798 }
799
800 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
801 if (dt <= 0)
802 dt = 1;
803 db = mdev->rs_total;
804 dbdt = Bit2KB(db/dt);
805 mdev->rs_paused /= HZ;
806
807 if (!get_ldev(mdev))
808 goto out;
809
af85e8e8
LE
810 ping_peer(mdev);
811
87eeee41 812 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 813 os = drbd_read_state(mdev);
b411b363 814
26525618
LE
815 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
816
b411b363
PR
817 /* This protects us against multiple calls (that can happen in the presence
818 of application IO), and against connectivity loss just before we arrive here. */
819 if (os.conn <= C_CONNECTED)
820 goto out_unlock;
821
822 ns = os;
823 ns.conn = C_CONNECTED;
824
825 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
26525618 826 verify_done ? "Online verify " : "Resync",
b411b363
PR
827 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
828
829 n_oos = drbd_bm_total_weight(mdev);
830
831 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
832 if (n_oos) {
833 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
834 n_oos, Bit2KB(1));
835 khelper_cmd = "out-of-sync";
836 }
837 } else {
838 D_ASSERT((n_oos - mdev->rs_failed) == 0);
839
840 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
841 khelper_cmd = "after-resync-target";
842
f399002e 843 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
844 const unsigned long s = mdev->rs_same_csum;
845 const unsigned long t = mdev->rs_total;
846 const int ratio =
847 (t == 0) ? 0 :
848 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 849 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
850 "transferred %luK total %luK\n",
851 ratio,
852 Bit2KB(mdev->rs_same_csum),
853 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
854 Bit2KB(mdev->rs_total));
855 }
856 }
857
858 if (mdev->rs_failed) {
859 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
860
861 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
862 ns.disk = D_INCONSISTENT;
863 ns.pdsk = D_UP_TO_DATE;
864 } else {
865 ns.disk = D_UP_TO_DATE;
866 ns.pdsk = D_INCONSISTENT;
867 }
868 } else {
869 ns.disk = D_UP_TO_DATE;
870 ns.pdsk = D_UP_TO_DATE;
871
872 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
873 if (mdev->p_uuid) {
874 int i;
875 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
876 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
877 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
878 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
879 } else {
880 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
881 }
882 }
883
62b0da3a
LE
884 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
885 /* for verify runs, we don't update uuids here,
886 * so there would be nothing to report. */
887 drbd_uuid_set_bm(mdev, 0UL);
888 drbd_print_uuids(mdev, "updated UUIDs");
889 if (mdev->p_uuid) {
890 /* Now the two UUID sets are equal, update what we
891 * know of the peer. */
892 int i;
893 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
894 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
895 }
b411b363
PR
896 }
897 }
898
899 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
900out_unlock:
87eeee41 901 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
902 put_ldev(mdev);
903out:
904 mdev->rs_total = 0;
905 mdev->rs_failed = 0;
906 mdev->rs_paused = 0;
26525618
LE
907 if (verify_done)
908 mdev->ov_start_sector = 0;
b411b363 909
13d42685
LE
910 drbd_md_sync(mdev);
911
b411b363
PR
912 if (khelper_cmd)
913 drbd_khelper(mdev, khelper_cmd);
914
915 return 1;
916}
917
918/* helper */
db830c46 919static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
b411b363 920{
045417f7 921 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 922 /* This might happen if sendpage() has not finished */
db830c46 923 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
924 atomic_add(i, &mdev->pp_in_use_by_net);
925 atomic_sub(i, &mdev->pp_in_use);
87eeee41 926 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 927 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 928 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 929 wake_up(&drbd_pp_wait);
b411b363 930 } else
3967deb1 931 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
932}
933
934/**
935 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
936 * @mdev: DRBD device.
937 * @w: work object.
938 * @cancel: The connection will be closed anyways
939 */
99920dc5 940int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 941{
db830c46 942 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 943 struct drbd_conf *mdev = w->mdev;
99920dc5 944 int err;
b411b363
PR
945
946 if (unlikely(cancel)) {
3967deb1 947 drbd_free_peer_req(mdev, peer_req);
b411b363 948 dec_unacked(mdev);
99920dc5 949 return 0;
b411b363
PR
950 }
951
db830c46 952 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 953 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
954 } else {
955 if (__ratelimit(&drbd_ratelimit_state))
956 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 957 (unsigned long long)peer_req->i.sector);
b411b363 958
99920dc5 959 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
960 }
961
962 dec_unacked(mdev);
963
db830c46 964 move_to_net_ee_or_free(mdev, peer_req);
b411b363 965
99920dc5 966 if (unlikely(err))
b411b363 967 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 968 return err;
b411b363
PR
969}
970
971/**
a209b4ae 972 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
973 * @mdev: DRBD device.
974 * @w: work object.
975 * @cancel: The connection will be closed anyways
976 */
99920dc5 977int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 978{
db830c46 979 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 980 struct drbd_conf *mdev = w->mdev;
99920dc5 981 int err;
b411b363
PR
982
983 if (unlikely(cancel)) {
3967deb1 984 drbd_free_peer_req(mdev, peer_req);
b411b363 985 dec_unacked(mdev);
99920dc5 986 return 0;
b411b363
PR
987 }
988
989 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 990 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
991 put_ldev(mdev);
992 }
993
d612d309 994 if (mdev->state.conn == C_AHEAD) {
99920dc5 995 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 996 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
997 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
998 inc_rs_pending(mdev);
99920dc5 999 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1000 } else {
1001 if (__ratelimit(&drbd_ratelimit_state))
1002 dev_err(DEV, "Not sending RSDataReply, "
1003 "partner DISKLESS!\n");
99920dc5 1004 err = 0;
b411b363
PR
1005 }
1006 } else {
1007 if (__ratelimit(&drbd_ratelimit_state))
1008 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1009 (unsigned long long)peer_req->i.sector);
b411b363 1010
99920dc5 1011 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1012
1013 /* update resync data with failure */
db830c46 1014 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1015 }
1016
1017 dec_unacked(mdev);
1018
db830c46 1019 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1020
99920dc5 1021 if (unlikely(err))
b411b363 1022 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1023 return err;
b411b363
PR
1024}
1025
99920dc5 1026int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1027{
db830c46 1028 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1029 struct drbd_conf *mdev = w->mdev;
b411b363
PR
1030 struct digest_info *di;
1031 int digest_size;
1032 void *digest = NULL;
99920dc5 1033 int err, eq = 0;
b411b363
PR
1034
1035 if (unlikely(cancel)) {
3967deb1 1036 drbd_free_peer_req(mdev, peer_req);
b411b363 1037 dec_unacked(mdev);
99920dc5 1038 return 0;
b411b363
PR
1039 }
1040
1d53f09e 1041 if (get_ldev(mdev)) {
db830c46 1042 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1043 put_ldev(mdev);
1044 }
b411b363 1045
db830c46 1046 di = peer_req->digest;
b411b363 1047
db830c46 1048 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1049 /* quick hack to try to avoid a race against reconfiguration.
1050 * a real fix would be much more involved,
1051 * introducing more locking mechanisms */
f399002e
LE
1052 if (mdev->tconn->csums_tfm) {
1053 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1054 D_ASSERT(digest_size == di->digest_size);
1055 digest = kmalloc(digest_size, GFP_NOIO);
1056 }
1057 if (digest) {
f399002e 1058 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1059 eq = !memcmp(digest, di->digest, digest_size);
1060 kfree(digest);
1061 }
1062
1063 if (eq) {
db830c46 1064 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1065 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1066 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1067 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1068 } else {
1069 inc_rs_pending(mdev);
db830c46
AG
1070 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1071 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1072 kfree(di);
99920dc5 1073 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1074 }
1075 } else {
99920dc5 1076 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1077 if (__ratelimit(&drbd_ratelimit_state))
1078 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1079 }
1080
1081 dec_unacked(mdev);
db830c46 1082 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1083
99920dc5 1084 if (unlikely(err))
b411b363 1085 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1086 return err;
b411b363
PR
1087}
1088
99920dc5 1089int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1090{
db830c46 1091 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1092 struct drbd_conf *mdev = w->mdev;
db830c46
AG
1093 sector_t sector = peer_req->i.sector;
1094 unsigned int size = peer_req->i.size;
b411b363
PR
1095 int digest_size;
1096 void *digest;
99920dc5 1097 int err = 0;
b411b363
PR
1098
1099 if (unlikely(cancel))
1100 goto out;
1101
f399002e 1102 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1103 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1104 if (!digest) {
99920dc5 1105 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1106 goto out;
b411b363
PR
1107 }
1108
db830c46 1109 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1110 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1111 else
1112 memset(digest, 0, digest_size);
1113
53ea4331
LE
1114 /* Free e and pages before send.
1115 * In case we block on congestion, we could otherwise run into
1116 * some distributed deadlock, if the other side blocks on
1117 * congestion as well, because our receiver blocks in
c37c8ecf 1118 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1119 drbd_free_peer_req(mdev, peer_req);
db830c46 1120 peer_req = NULL;
8f21420e 1121 inc_rs_pending(mdev);
99920dc5
AG
1122 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1123 if (err)
8f21420e
PR
1124 dec_rs_pending(mdev);
1125 kfree(digest);
1126
b411b363 1127out:
db830c46 1128 if (peer_req)
3967deb1 1129 drbd_free_peer_req(mdev, peer_req);
b411b363 1130 dec_unacked(mdev);
99920dc5 1131 return err;
b411b363
PR
1132}
1133
8f7bed77 1134void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
1135{
1136 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1137 mdev->ov_last_oos_size += size>>9;
1138 } else {
1139 mdev->ov_last_oos_start = sector;
1140 mdev->ov_last_oos_size = size>>9;
1141 }
1142 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1143}
1144
99920dc5 1145int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1146{
db830c46 1147 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1148 struct drbd_conf *mdev = w->mdev;
b411b363 1149 struct digest_info *di;
b411b363 1150 void *digest;
db830c46
AG
1151 sector_t sector = peer_req->i.sector;
1152 unsigned int size = peer_req->i.size;
53ea4331 1153 int digest_size;
99920dc5 1154 int err, eq = 0;
b411b363
PR
1155
1156 if (unlikely(cancel)) {
3967deb1 1157 drbd_free_peer_req(mdev, peer_req);
b411b363 1158 dec_unacked(mdev);
99920dc5 1159 return 0;
b411b363
PR
1160 }
1161
1162 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1163 * the resync lru has been cleaned up already */
1d53f09e 1164 if (get_ldev(mdev)) {
db830c46 1165 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1166 put_ldev(mdev);
1167 }
b411b363 1168
db830c46 1169 di = peer_req->digest;
b411b363 1170
db830c46 1171 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1172 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1173 digest = kmalloc(digest_size, GFP_NOIO);
1174 if (digest) {
f399002e 1175 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1176
1177 D_ASSERT(digest_size == di->digest_size);
1178 eq = !memcmp(digest, di->digest, digest_size);
1179 kfree(digest);
1180 }
b411b363
PR
1181 }
1182
9676c760
LE
1183 /* Free peer_req and pages before send.
1184 * In case we block on congestion, we could otherwise run into
1185 * some distributed deadlock, if the other side blocks on
1186 * congestion as well, because our receiver blocks in
c37c8ecf 1187 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1188 drbd_free_peer_req(mdev, peer_req);
b411b363 1189 if (!eq)
8f7bed77 1190 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1191 else
8f7bed77 1192 ov_out_of_sync_print(mdev);
b411b363 1193
99920dc5 1194 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1195 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1196
53ea4331 1197 dec_unacked(mdev);
b411b363 1198
ea5442af
LE
1199 --mdev->ov_left;
1200
1201 /* let's advance progress step marks only for every other megabyte */
1202 if ((mdev->ov_left & 0x200) == 0x200)
1203 drbd_advance_rs_marks(mdev, mdev->ov_left);
1204
1205 if (mdev->ov_left == 0) {
8f7bed77 1206 ov_out_of_sync_print(mdev);
b411b363
PR
1207 drbd_resync_finished(mdev);
1208 }
1209
99920dc5 1210 return err;
b411b363
PR
1211}
1212
99920dc5 1213int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1214{
1215 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1216
b411b363 1217 complete(&b->done);
99920dc5 1218 return 0;
b411b363
PR
1219}
1220
99920dc5 1221int w_send_barrier(struct drbd_work *w, int cancel)
b411b363 1222{
9f5bdc33 1223 struct drbd_socket *sock;
b411b363 1224 struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
00d56944 1225 struct drbd_conf *mdev = w->mdev;
9f5bdc33 1226 struct p_barrier *p;
b411b363
PR
1227
1228 /* really avoid racing with tl_clear. w.cb may have been referenced
1229 * just before it was reassigned and re-queued, so double check that.
1230 * actually, this race was harmless, since we only try to send the
1231 * barrier packet here, and otherwise do nothing with the object.
1232 * but compare with the head of w_clear_epoch */
87eeee41 1233 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
1234 if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
1235 cancel = 1;
87eeee41 1236 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1237 if (cancel)
b411b363 1238 return 0;
99920dc5 1239
9f5bdc33
AG
1240 sock = &mdev->tconn->data;
1241 p = drbd_prepare_command(mdev, sock);
1242 if (!p)
1243 return -EIO;
b411b363
PR
1244 p->barrier = b->br_number;
1245 /* inc_ap_pending was done where this was queued.
1246 * dec_ap_pending will be done in got_BarrierAck
1247 * or (on connection loss) in w_clear_epoch. */
9f5bdc33 1248 return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1249}
1250
99920dc5 1251int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1252{
00d56944 1253 struct drbd_conf *mdev = w->mdev;
9f5bdc33
AG
1254 struct drbd_socket *sock;
1255
b411b363 1256 if (cancel)
99920dc5 1257 return 0;
9f5bdc33
AG
1258 sock = &mdev->tconn->data;
1259 if (!drbd_prepare_command(mdev, sock))
1260 return -EIO;
e658983a 1261 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1262}
1263
8f7bed77 1264int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1265{
1266 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1267 struct drbd_conf *mdev = w->mdev;
99920dc5 1268 int err;
73a01a18
PR
1269
1270 if (unlikely(cancel)) {
8554df1c 1271 req_mod(req, SEND_CANCELED);
99920dc5 1272 return 0;
73a01a18
PR
1273 }
1274
8f7bed77 1275 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1276 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1277
99920dc5 1278 return err;
73a01a18
PR
1279}
1280
b411b363
PR
1281/**
1282 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1283 * @mdev: DRBD device.
1284 * @w: work object.
1285 * @cancel: The connection will be closed anyways
1286 */
99920dc5 1287int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1288{
1289 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1290 struct drbd_conf *mdev = w->mdev;
99920dc5 1291 int err;
b411b363
PR
1292
1293 if (unlikely(cancel)) {
8554df1c 1294 req_mod(req, SEND_CANCELED);
99920dc5 1295 return 0;
b411b363
PR
1296 }
1297
99920dc5
AG
1298 err = drbd_send_dblock(mdev, req);
1299 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1300
99920dc5 1301 return err;
b411b363
PR
1302}
1303
1304/**
1305 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1306 * @mdev: DRBD device.
1307 * @w: work object.
1308 * @cancel: The connection will be closed anyways
1309 */
99920dc5 1310int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1311{
1312 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1313 struct drbd_conf *mdev = w->mdev;
99920dc5 1314 int err;
b411b363
PR
1315
1316 if (unlikely(cancel)) {
8554df1c 1317 req_mod(req, SEND_CANCELED);
99920dc5 1318 return 0;
b411b363
PR
1319 }
1320
99920dc5 1321 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1322 (unsigned long)req);
b411b363 1323
99920dc5 1324 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1325
99920dc5 1326 return err;
b411b363
PR
1327}
1328
99920dc5 1329int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1330{
1331 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1332 struct drbd_conf *mdev = w->mdev;
265be2d0 1333
0778286a 1334 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
181286ad 1335 drbd_al_begin_io(mdev, &req->i);
265be2d0
PR
1336
1337 drbd_req_make_private_bio(req, req->master_bio);
1338 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1339 generic_make_request(req->private_bio);
1340
99920dc5 1341 return 0;
265be2d0
PR
1342}
1343
b411b363
PR
1344static int _drbd_may_sync_now(struct drbd_conf *mdev)
1345{
1346 struct drbd_conf *odev = mdev;
95f8efd0 1347 int resync_after;
b411b363
PR
1348
1349 while (1) {
438c8374
PR
1350 if (!odev->ldev)
1351 return 1;
daeda1cc 1352 rcu_read_lock();
95f8efd0 1353 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1354 rcu_read_unlock();
95f8efd0 1355 if (resync_after == -1)
b411b363 1356 return 1;
95f8efd0 1357 odev = minor_to_mdev(resync_after);
841ce241
AG
1358 if (!expect(odev))
1359 return 1;
b411b363
PR
1360 if ((odev->state.conn >= C_SYNC_SOURCE &&
1361 odev->state.conn <= C_PAUSED_SYNC_T) ||
1362 odev->state.aftr_isp || odev->state.peer_isp ||
1363 odev->state.user_isp)
1364 return 0;
1365 }
1366}
1367
1368/**
1369 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1370 * @mdev: DRBD device.
1371 *
1372 * Called from process context only (admin command and after_state_ch).
1373 */
1374static int _drbd_pause_after(struct drbd_conf *mdev)
1375{
1376 struct drbd_conf *odev;
1377 int i, rv = 0;
1378
695d08fa 1379 rcu_read_lock();
81a5d60e 1380 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1381 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1382 continue;
1383 if (!_drbd_may_sync_now(odev))
1384 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1385 != SS_NOTHING_TO_DO);
1386 }
695d08fa 1387 rcu_read_unlock();
b411b363
PR
1388
1389 return rv;
1390}
1391
1392/**
1393 * _drbd_resume_next() - Resume resync on all devices that may resync now
1394 * @mdev: DRBD device.
1395 *
1396 * Called from process context only (admin command and worker).
1397 */
1398static int _drbd_resume_next(struct drbd_conf *mdev)
1399{
1400 struct drbd_conf *odev;
1401 int i, rv = 0;
1402
695d08fa 1403 rcu_read_lock();
81a5d60e 1404 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1405 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1406 continue;
1407 if (odev->state.aftr_isp) {
1408 if (_drbd_may_sync_now(odev))
1409 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1410 CS_HARD, NULL)
1411 != SS_NOTHING_TO_DO) ;
1412 }
1413 }
695d08fa 1414 rcu_read_unlock();
b411b363
PR
1415 return rv;
1416}
1417
1418void resume_next_sg(struct drbd_conf *mdev)
1419{
1420 write_lock_irq(&global_state_lock);
1421 _drbd_resume_next(mdev);
1422 write_unlock_irq(&global_state_lock);
1423}
1424
1425void suspend_other_sg(struct drbd_conf *mdev)
1426{
1427 write_lock_irq(&global_state_lock);
1428 _drbd_pause_after(mdev);
1429 write_unlock_irq(&global_state_lock);
1430}
1431
dc97b708 1432/* caller must hold global_state_lock */
95f8efd0 1433enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
b411b363
PR
1434{
1435 struct drbd_conf *odev;
95f8efd0 1436 int resync_after;
b411b363
PR
1437
1438 if (o_minor == -1)
1439 return NO_ERROR;
1440 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
95f8efd0 1441 return ERR_RESYNC_AFTER;
b411b363
PR
1442
1443 /* check for loops */
1444 odev = minor_to_mdev(o_minor);
1445 while (1) {
1446 if (odev == mdev)
95f8efd0 1447 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1448
daeda1cc 1449 rcu_read_lock();
95f8efd0 1450 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1451 rcu_read_unlock();
b411b363 1452 /* dependency chain ends here, no cycles. */
95f8efd0 1453 if (resync_after == -1)
b411b363
PR
1454 return NO_ERROR;
1455
1456 /* follow the dependency chain */
95f8efd0 1457 odev = minor_to_mdev(resync_after);
b411b363
PR
1458 }
1459}
1460
dc97b708 1461/* caller must hold global_state_lock */
95f8efd0 1462void drbd_resync_after_changed(struct drbd_conf *mdev)
b411b363
PR
1463{
1464 int changes;
b411b363 1465
dc97b708
PR
1466 do {
1467 changes = _drbd_pause_after(mdev);
1468 changes |= _drbd_resume_next(mdev);
1469 } while (changes);
b411b363
PR
1470}
1471
9bd28d3c
LE
1472void drbd_rs_controller_reset(struct drbd_conf *mdev)
1473{
813472ce
PR
1474 struct fifo_buffer *plan;
1475
9bd28d3c
LE
1476 atomic_set(&mdev->rs_sect_in, 0);
1477 atomic_set(&mdev->rs_sect_ev, 0);
1478 mdev->rs_in_flight = 0;
813472ce
PR
1479
1480 /* Updating the RCU protected object in place is necessary since
1481 this function gets called from atomic context.
1482 It is valid since all other updates also lead to an completely
1483 empty fifo */
1484 rcu_read_lock();
1485 plan = rcu_dereference(mdev->rs_plan_s);
1486 plan->total = 0;
1487 fifo_set(plan, 0);
1488 rcu_read_unlock();
9bd28d3c
LE
1489}
1490
1f04af33
PR
1491void start_resync_timer_fn(unsigned long data)
1492{
1493 struct drbd_conf *mdev = (struct drbd_conf *) data;
1494
1495 drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work);
1496}
1497
99920dc5 1498int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1499{
00d56944
PR
1500 struct drbd_conf *mdev = w->mdev;
1501
1f04af33
PR
1502 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1503 dev_warn(DEV, "w_start_resync later...\n");
1504 mdev->start_resync_timer.expires = jiffies + HZ/10;
1505 add_timer(&mdev->start_resync_timer);
99920dc5 1506 return 0;
1f04af33
PR
1507 }
1508
1509 drbd_start_resync(mdev, C_SYNC_SOURCE);
1510 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
99920dc5 1511 return 0;
1f04af33
PR
1512}
1513
b411b363
PR
1514/**
1515 * drbd_start_resync() - Start the resync process
1516 * @mdev: DRBD device.
1517 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1518 *
1519 * This function might bring you directly into one of the
1520 * C_PAUSED_SYNC_* states.
1521 */
1522void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1523{
1524 union drbd_state ns;
1525 int r;
1526
c4752ef1 1527 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1528 dev_err(DEV, "Resync already running!\n");
1529 return;
1530 }
1531
59817f4f
PR
1532 if (mdev->state.conn < C_AHEAD) {
1533 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1534 drbd_rs_cancel_all(mdev);
1535 /* This should be done when we abort the resync. We definitely do not
1536 want to have this for connections going back and forth between
1537 Ahead/Behind and SyncSource/SyncTarget */
1538 }
b411b363 1539
e64a3294
PR
1540 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1541 if (side == C_SYNC_TARGET) {
1542 /* Since application IO was locked out during C_WF_BITMAP_T and
1543 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1544 we check that we might make the data inconsistent. */
1545 r = drbd_khelper(mdev, "before-resync-target");
1546 r = (r >> 8) & 0xff;
1547 if (r > 0) {
1548 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1549 "dropping connection.\n", r);
38fa9988 1550 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1551 return;
1552 }
e64a3294
PR
1553 } else /* C_SYNC_SOURCE */ {
1554 r = drbd_khelper(mdev, "before-resync-source");
1555 r = (r >> 8) & 0xff;
1556 if (r > 0) {
1557 if (r == 3) {
1558 dev_info(DEV, "before-resync-source handler returned %d, "
1559 "ignoring. Old userland tools?", r);
1560 } else {
1561 dev_info(DEV, "before-resync-source handler returned %d, "
1562 "dropping connection.\n", r);
38fa9988 1563 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1564 return;
1565 }
1566 }
09b9e797 1567 }
b411b363
PR
1568 }
1569
e64a3294 1570 if (current == mdev->tconn->worker.task) {
dad20554 1571 /* The worker should not sleep waiting for state_mutex,
e64a3294 1572 that can take long */
8410da8f 1573 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1574 set_bit(B_RS_H_DONE, &mdev->flags);
1575 mdev->start_resync_timer.expires = jiffies + HZ/5;
1576 add_timer(&mdev->start_resync_timer);
1577 return;
1578 }
1579 } else {
8410da8f 1580 mutex_lock(mdev->state_mutex);
e64a3294
PR
1581 }
1582 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363
PR
1583
1584 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
8410da8f 1585 mutex_unlock(mdev->state_mutex);
b411b363
PR
1586 return;
1587 }
1588
b411b363 1589 write_lock_irq(&global_state_lock);
78bae59b 1590 ns = drbd_read_state(mdev);
b411b363
PR
1591
1592 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1593
1594 ns.conn = side;
1595
1596 if (side == C_SYNC_TARGET)
1597 ns.disk = D_INCONSISTENT;
1598 else /* side == C_SYNC_SOURCE */
1599 ns.pdsk = D_INCONSISTENT;
1600
1601 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1602 ns = drbd_read_state(mdev);
b411b363
PR
1603
1604 if (ns.conn < C_CONNECTED)
1605 r = SS_UNKNOWN_ERROR;
1606
1607 if (r == SS_SUCCESS) {
1d7734a0
LE
1608 unsigned long tw = drbd_bm_total_weight(mdev);
1609 unsigned long now = jiffies;
1610 int i;
1611
b411b363
PR
1612 mdev->rs_failed = 0;
1613 mdev->rs_paused = 0;
b411b363 1614 mdev->rs_same_csum = 0;
0f0601f4
LE
1615 mdev->rs_last_events = 0;
1616 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1617 mdev->rs_total = tw;
1618 mdev->rs_start = now;
1619 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1620 mdev->rs_mark_left[i] = tw;
1621 mdev->rs_mark_time[i] = now;
1622 }
b411b363
PR
1623 _drbd_pause_after(mdev);
1624 }
1625 write_unlock_irq(&global_state_lock);
5a22db89 1626
b411b363
PR
1627 if (r == SS_SUCCESS) {
1628 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1629 drbd_conn_str(ns.conn),
1630 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1631 (unsigned long) mdev->rs_total);
6c922ed5
LE
1632 if (side == C_SYNC_TARGET)
1633 mdev->bm_resync_fo = 0;
1634
1635 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1636 * with w_send_oos, or the sync target will get confused as to
1637 * how much bits to resync. We cannot do that always, because for an
1638 * empty resync and protocol < 95, we need to do it here, as we call
1639 * drbd_resync_finished from here in that case.
1640 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1641 * and from after_state_ch otherwise. */
31890f4a 1642 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1643 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1644
31890f4a 1645 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1646 /* This still has a race (about when exactly the peers
1647 * detect connection loss) that can lead to a full sync
1648 * on next handshake. In 8.3.9 we fixed this with explicit
1649 * resync-finished notifications, but the fix
1650 * introduces a protocol change. Sleeping for some
1651 * time longer than the ping interval + timeout on the
1652 * SyncSource, to give the SyncTarget the chance to
1653 * detect connection loss, then waiting for a ping
1654 * response (implicit in drbd_resync_finished) reduces
1655 * the race considerably, but does not solve it. */
44ed167d
PR
1656 if (side == C_SYNC_SOURCE) {
1657 struct net_conf *nc;
1658 int timeo;
1659
1660 rcu_read_lock();
1661 nc = rcu_dereference(mdev->tconn->net_conf);
1662 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1663 rcu_read_unlock();
1664 schedule_timeout_interruptible(timeo);
1665 }
b411b363 1666 drbd_resync_finished(mdev);
b411b363
PR
1667 }
1668
9bd28d3c 1669 drbd_rs_controller_reset(mdev);
b411b363
PR
1670 /* ns.conn may already be != mdev->state.conn,
1671 * we may have been paused in between, or become paused until
1672 * the timer triggers.
1673 * No matter, that is handled in resync_timer_fn() */
1674 if (ns.conn == C_SYNC_TARGET)
1675 mod_timer(&mdev->resync_timer, jiffies);
1676
1677 drbd_md_sync(mdev);
1678 }
5a22db89 1679 put_ldev(mdev);
8410da8f 1680 mutex_unlock(mdev->state_mutex);
b411b363
PR
1681}
1682
1683int drbd_worker(struct drbd_thread *thi)
1684{
392c8801 1685 struct drbd_tconn *tconn = thi->tconn;
b411b363 1686 struct drbd_work *w = NULL;
0e29d163 1687 struct drbd_conf *mdev;
44ed167d 1688 struct net_conf *nc;
b411b363 1689 LIST_HEAD(work_list);
f399002e 1690 int vnr, intr = 0;
44ed167d 1691 int cork;
b411b363 1692
e77a0a5c 1693 while (get_t_state(thi) == RUNNING) {
80822284 1694 drbd_thread_current_set_cpu(thi);
b411b363 1695
19393e10
PR
1696 if (down_trylock(&tconn->data.work.s)) {
1697 mutex_lock(&tconn->data.mutex);
44ed167d
PR
1698
1699 rcu_read_lock();
1700 nc = rcu_dereference(tconn->net_conf);
bb77d34e 1701 cork = nc ? nc->tcp_cork : 0;
44ed167d
PR
1702 rcu_read_unlock();
1703
1704 if (tconn->data.socket && cork)
19393e10
PR
1705 drbd_tcp_uncork(tconn->data.socket);
1706 mutex_unlock(&tconn->data.mutex);
b411b363 1707
19393e10 1708 intr = down_interruptible(&tconn->data.work.s);
b411b363 1709
19393e10 1710 mutex_lock(&tconn->data.mutex);
44ed167d 1711 if (tconn->data.socket && cork)
19393e10
PR
1712 drbd_tcp_cork(tconn->data.socket);
1713 mutex_unlock(&tconn->data.mutex);
b411b363
PR
1714 }
1715
1716 if (intr) {
b411b363 1717 flush_signals(current);
19393e10
PR
1718 if (get_t_state(thi) == RUNNING) {
1719 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1720 continue;
19393e10 1721 }
b411b363
PR
1722 break;
1723 }
1724
e77a0a5c 1725 if (get_t_state(thi) != RUNNING)
b411b363
PR
1726 break;
1727 /* With this break, we have done a down() but not consumed
1728 the entry from the list. The cleanup code takes care of
1729 this... */
1730
1731 w = NULL;
19393e10
PR
1732 spin_lock_irq(&tconn->data.work.q_lock);
1733 if (list_empty(&tconn->data.work.q)) {
b411b363
PR
1734 /* something terribly wrong in our logic.
1735 * we were able to down() the semaphore,
1736 * but the list is empty... doh.
1737 *
1738 * what is the best thing to do now?
1739 * try again from scratch, restarting the receiver,
1740 * asender, whatnot? could break even more ugly,
1741 * e.g. when we are primary, but no good local data.
1742 *
1743 * I'll try to get away just starting over this loop.
1744 */
19393e10
PR
1745 conn_warn(tconn, "Work list unexpectedly empty\n");
1746 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1747 continue;
1748 }
19393e10 1749 w = list_entry(tconn->data.work.q.next, struct drbd_work, list);
b411b363 1750 list_del_init(&w->list);
19393e10 1751 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1752
99920dc5 1753 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) {
b411b363 1754 /* dev_warn(DEV, "worker: a callback failed! \n"); */
bbeb641c
PR
1755 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1756 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1757 }
1758 }
b411b363 1759
19393e10
PR
1760 spin_lock_irq(&tconn->data.work.q_lock);
1761 while (!list_empty(&tconn->data.work.q)) {
1762 list_splice_init(&tconn->data.work.q, &work_list);
1763 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1764
1765 while (!list_empty(&work_list)) {
1766 w = list_entry(work_list.next, struct drbd_work, list);
1767 list_del_init(&w->list);
00d56944 1768 w->cb(w, 1);
b411b363
PR
1769 }
1770
19393e10 1771 spin_lock_irq(&tconn->data.work.q_lock);
b411b363 1772 }
19393e10 1773 sema_init(&tconn->data.work.s, 0);
b411b363
PR
1774 /* DANGEROUS race: if someone did queue his work within the spinlock,
1775 * but up() ed outside the spinlock, we could get an up() on the
1776 * semaphore without corresponding list entry.
1777 * So don't do that.
1778 */
19393e10 1779 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1780
c141ebda 1781 rcu_read_lock();
f399002e 1782 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1783 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
c141ebda
PR
1784 kref_get(&mdev->kref);
1785 rcu_read_unlock();
0e29d163 1786 drbd_mdev_cleanup(mdev);
c141ebda
PR
1787 kref_put(&mdev->kref, &drbd_minor_destroy);
1788 rcu_read_lock();
0e29d163 1789 }
c141ebda 1790 rcu_read_unlock();
b411b363
PR
1791
1792 return 0;
1793}
This page took 0.370233 seconds and 5 git commands to generate.