drbd: Refer to connect-int consistently throughout the code
[deliverable/linux.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
b411b363 40
00d56944 41static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
42
43
c5a91619
AG
44/* endio handlers:
45 * drbd_md_io_complete (defined here)
fcefa62e
AG
46 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
c5a91619
AG
48 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
b411b363
PR
50 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
60 to evaluate the sync after dependencies, we grab a write lock, because
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
70
71 md_io = (struct drbd_md_io *)bio->bi_private;
72 md_io->error = error;
73
b411b363
PR
74 complete(&md_io->event);
75}
76
77/* reads on behalf of the partner,
78 * "submitted" by the receiver
79 */
db830c46 80void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
81{
82 unsigned long flags = 0;
a21e9298 83 struct drbd_conf *mdev = peer_req->w.mdev;
b411b363 84
87eeee41 85 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
86 mdev->read_cnt += peer_req->i.size >> 9;
87 list_del(&peer_req->w.list);
b411b363
PR
88 if (list_empty(&mdev->read_ee))
89 wake_up(&mdev->ee_wait);
db830c46 90 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 91 __drbd_chk_io_error(mdev, false);
87eeee41 92 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 93
db830c46 94 drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
b411b363 95 put_ldev(mdev);
b411b363
PR
96}
97
98/* writes on behalf of the partner, or resync writes,
45bb912b 99 * "submitted" by the receiver, final stage. */
db830c46 100static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
101{
102 unsigned long flags = 0;
a21e9298 103 struct drbd_conf *mdev = peer_req->w.mdev;
181286ad 104 struct drbd_interval i;
b411b363 105 int do_wake;
579b57ed 106 u64 block_id;
b411b363 107 int do_al_complete_io;
b411b363 108
db830c46 109 /* after we moved peer_req to done_ee,
b411b363
PR
110 * we may no longer access it,
111 * it may be freed/reused already!
112 * (as soon as we release the req_lock) */
181286ad 113 i = peer_req->i;
db830c46
AG
114 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
115 block_id = peer_req->block_id;
b411b363 116
87eeee41 117 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
118 mdev->writ_cnt += peer_req->i.size >> 9;
119 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
120 list_add_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 121
bb3bfe96 122 /*
5e472264 123 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
124 * Ack yet and did not wake possibly waiting conflicting requests.
125 * Removed from the tree from "drbd_process_done_ee" within the
126 * appropriate w.cb (e_end_block/e_end_resync_block) or from
127 * _drbd_clear_done_ee.
128 */
b411b363 129
579b57ed 130 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 131
db830c46 132 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 133 __drbd_chk_io_error(mdev, false);
87eeee41 134 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 135
579b57ed 136 if (block_id == ID_SYNCER)
181286ad 137 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
138
139 if (do_wake)
140 wake_up(&mdev->ee_wait);
141
142 if (do_al_complete_io)
181286ad 143 drbd_al_complete_io(mdev, &i);
b411b363 144
0625ac19 145 wake_asender(mdev->tconn);
b411b363 146 put_ldev(mdev);
45bb912b 147}
b411b363 148
45bb912b
LE
149/* writes on behalf of the partner, or resync writes,
150 * "submitted" by the receiver.
151 */
fcefa62e 152void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 153{
db830c46 154 struct drbd_peer_request *peer_req = bio->bi_private;
a21e9298 155 struct drbd_conf *mdev = peer_req->w.mdev;
45bb912b
LE
156 int uptodate = bio_flagged(bio, BIO_UPTODATE);
157 int is_write = bio_data_dir(bio) == WRITE;
158
07194272 159 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
160 dev_warn(DEV, "%s: error=%d s=%llus\n",
161 is_write ? "write" : "read", error,
db830c46 162 (unsigned long long)peer_req->i.sector);
45bb912b 163 if (!error && !uptodate) {
07194272
LE
164 if (__ratelimit(&drbd_ratelimit_state))
165 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
166 is_write ? "write" : "read",
db830c46 167 (unsigned long long)peer_req->i.sector);
45bb912b
LE
168 /* strange behavior of some lower level drivers...
169 * fail the request by clearing the uptodate flag,
170 * but do not return any error?! */
171 error = -EIO;
172 }
173
174 if (error)
db830c46 175 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
176
177 bio_put(bio); /* no need for the bio anymore */
db830c46 178 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 179 if (is_write)
db830c46 180 drbd_endio_write_sec_final(peer_req);
45bb912b 181 else
db830c46 182 drbd_endio_read_sec_final(peer_req);
45bb912b 183 }
b411b363
PR
184}
185
186/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
187 */
fcefa62e 188void drbd_request_endio(struct bio *bio, int error)
b411b363 189{
a115413d 190 unsigned long flags;
b411b363 191 struct drbd_request *req = bio->bi_private;
a21e9298 192 struct drbd_conf *mdev = req->w.mdev;
a115413d 193 struct bio_and_error m;
b411b363
PR
194 enum drbd_req_event what;
195 int uptodate = bio_flagged(bio, BIO_UPTODATE);
196
b411b363
PR
197 if (!error && !uptodate) {
198 dev_warn(DEV, "p %s: setting error to -EIO\n",
199 bio_data_dir(bio) == WRITE ? "write" : "read");
200 /* strange behavior of some lower level drivers...
201 * fail the request by clearing the uptodate flag,
202 * but do not return any error?! */
203 error = -EIO;
204 }
205
b411b363
PR
206 /* to avoid recursion in __req_mod */
207 if (unlikely(error)) {
208 what = (bio_data_dir(bio) == WRITE)
8554df1c 209 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 210 : (bio_rw(bio) == READ)
8554df1c
AG
211 ? READ_COMPLETED_WITH_ERROR
212 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 213 } else
8554df1c 214 what = COMPLETED_OK;
b411b363
PR
215
216 bio_put(req->private_bio);
217 req->private_bio = ERR_PTR(error);
218
a115413d 219 /* not req_mod(), we need irqsave here! */
87eeee41 220 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 221 __req_mod(req, what, &m);
87eeee41 222 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
a115413d
LE
223
224 if (m.bio)
225 complete_master_bio(mdev, &m);
b411b363
PR
226}
227
99920dc5 228int w_read_retry_remote(struct drbd_work *w, int cancel)
b411b363
PR
229{
230 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 231 struct drbd_conf *mdev = w->mdev;
b411b363
PR
232
233 /* We should not detach for read io-error,
234 * but try to WRITE the P_DATA_REPLY to the failed location,
235 * to give the disk the chance to relocate that block */
236
87eeee41 237 spin_lock_irq(&mdev->tconn->req_lock);
d255e5ff 238 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
8554df1c 239 _req_mod(req, READ_RETRY_REMOTE_CANCELED);
87eeee41 240 spin_unlock_irq(&mdev->tconn->req_lock);
99920dc5 241 return 0;
b411b363 242 }
87eeee41 243 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 244
00d56944 245 return w_send_read_req(w, 0);
b411b363
PR
246}
247
f6ffca9f 248void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
db830c46 249 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
250{
251 struct hash_desc desc;
252 struct scatterlist sg;
db830c46 253 struct page *page = peer_req->pages;
45bb912b
LE
254 struct page *tmp;
255 unsigned len;
256
257 desc.tfm = tfm;
258 desc.flags = 0;
259
260 sg_init_table(&sg, 1);
261 crypto_hash_init(&desc);
262
263 while ((tmp = page_chain_next(page))) {
264 /* all but the last page will be fully used */
265 sg_set_page(&sg, page, PAGE_SIZE, 0);
266 crypto_hash_update(&desc, &sg, sg.length);
267 page = tmp;
268 }
269 /* and now the last, possibly only partially used page */
db830c46 270 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
271 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
272 crypto_hash_update(&desc, &sg, sg.length);
273 crypto_hash_final(&desc, digest);
274}
275
276void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
277{
278 struct hash_desc desc;
279 struct scatterlist sg;
280 struct bio_vec *bvec;
281 int i;
282
283 desc.tfm = tfm;
284 desc.flags = 0;
285
286 sg_init_table(&sg, 1);
287 crypto_hash_init(&desc);
288
289 __bio_for_each_segment(bvec, bio, i, 0) {
290 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
291 crypto_hash_update(&desc, &sg, sg.length);
292 }
293 crypto_hash_final(&desc, digest);
294}
295
9676c760 296/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 297static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 298{
00d56944
PR
299 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
300 struct drbd_conf *mdev = w->mdev;
b411b363
PR
301 int digest_size;
302 void *digest;
99920dc5 303 int err = 0;
b411b363 304
53ea4331
LE
305 if (unlikely(cancel))
306 goto out;
b411b363 307
9676c760 308 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 309 goto out;
b411b363 310
f399002e 311 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
312 digest = kmalloc(digest_size, GFP_NOIO);
313 if (digest) {
db830c46
AG
314 sector_t sector = peer_req->i.sector;
315 unsigned int size = peer_req->i.size;
f399002e 316 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 317 /* Free peer_req and pages before send.
53ea4331
LE
318 * In case we block on congestion, we could otherwise run into
319 * some distributed deadlock, if the other side blocks on
320 * congestion as well, because our receiver blocks in
c37c8ecf 321 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 322 drbd_free_peer_req(mdev, peer_req);
db830c46 323 peer_req = NULL;
53ea4331 324 inc_rs_pending(mdev);
99920dc5 325 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
326 digest, digest_size,
327 P_CSUM_RS_REQUEST);
53ea4331
LE
328 kfree(digest);
329 } else {
330 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 331 err = -ENOMEM;
53ea4331 332 }
b411b363 333
53ea4331 334out:
db830c46 335 if (peer_req)
3967deb1 336 drbd_free_peer_req(mdev, peer_req);
b411b363 337
99920dc5 338 if (unlikely(err))
b411b363 339 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 340 return err;
b411b363
PR
341}
342
343#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
344
345static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
346{
db830c46 347 struct drbd_peer_request *peer_req;
b411b363
PR
348
349 if (!get_ldev(mdev))
80a40e43 350 return -EIO;
b411b363 351
e3555d85 352 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
353 goto defer;
354
b411b363
PR
355 /* GFP_TRY, because if there is no memory available right now, this may
356 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
357 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
358 size, GFP_TRY);
db830c46 359 if (!peer_req)
80a40e43 360 goto defer;
b411b363 361
db830c46 362 peer_req->w.cb = w_e_send_csum;
87eeee41 363 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 364 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 365 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 366
0f0601f4 367 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 368 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 369 return 0;
b411b363 370
10f6d992
LE
371 /* If it failed because of ENOMEM, retry should help. If it failed
372 * because bio_add_page failed (probably broken lower level driver),
373 * retry may or may not help.
374 * If it does not, you may need to force disconnect. */
87eeee41 375 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 376 list_del(&peer_req->w.list);
87eeee41 377 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 378
3967deb1 379 drbd_free_peer_req(mdev, peer_req);
80a40e43 380defer:
45bb912b 381 put_ldev(mdev);
80a40e43 382 return -EAGAIN;
b411b363
PR
383}
384
99920dc5 385int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 386{
00d56944 387 struct drbd_conf *mdev = w->mdev;
63106d3c
PR
388 switch (mdev->state.conn) {
389 case C_VERIFY_S:
00d56944 390 w_make_ov_request(w, cancel);
63106d3c
PR
391 break;
392 case C_SYNC_TARGET:
00d56944 393 w_make_resync_request(w, cancel);
63106d3c 394 break;
b411b363
PR
395 }
396
99920dc5 397 return 0;
794abb75
PR
398}
399
400void resync_timer_fn(unsigned long data)
401{
402 struct drbd_conf *mdev = (struct drbd_conf *) data;
403
404 if (list_empty(&mdev->resync_work.list))
e42325a5 405 drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work);
b411b363
PR
406}
407
778f271d
PR
408static void fifo_set(struct fifo_buffer *fb, int value)
409{
410 int i;
411
412 for (i = 0; i < fb->size; i++)
f10f2623 413 fb->values[i] = value;
778f271d
PR
414}
415
416static int fifo_push(struct fifo_buffer *fb, int value)
417{
418 int ov;
419
420 ov = fb->values[fb->head_index];
421 fb->values[fb->head_index++] = value;
422
423 if (fb->head_index >= fb->size)
424 fb->head_index = 0;
425
426 return ov;
427}
428
429static void fifo_add_val(struct fifo_buffer *fb, int value)
430{
431 int i;
432
433 for (i = 0; i < fb->size; i++)
434 fb->values[i] += value;
435}
436
9958c857
PR
437struct fifo_buffer *fifo_alloc(int fifo_size)
438{
439 struct fifo_buffer *fb;
440
441 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
442 if (!fb)
443 return NULL;
444
445 fb->head_index = 0;
446 fb->size = fifo_size;
447 fb->total = 0;
448
449 return fb;
450}
451
9d77a5fe 452static int drbd_rs_controller(struct drbd_conf *mdev)
778f271d 453{
daeda1cc 454 struct disk_conf *dc;
778f271d
PR
455 unsigned int sect_in; /* Number of sectors that came in since the last turn */
456 unsigned int want; /* The number of sectors we want in the proxy */
457 int req_sect; /* Number of sectors to request in this turn */
458 int correction; /* Number of sectors more we need in the proxy*/
459 int cps; /* correction per invocation of drbd_rs_controller() */
460 int steps; /* Number of time steps to plan ahead */
461 int curr_corr;
462 int max_sect;
813472ce 463 struct fifo_buffer *plan;
778f271d
PR
464
465 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
466 mdev->rs_in_flight -= sect_in;
467
daeda1cc 468 dc = rcu_dereference(mdev->ldev->disk_conf);
813472ce 469 plan = rcu_dereference(mdev->rs_plan_s);
778f271d 470
813472ce 471 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
472
473 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 474 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 475 } else { /* normal path */
daeda1cc
PR
476 want = dc->c_fill_target ? dc->c_fill_target :
477 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
478 }
479
813472ce 480 correction = want - mdev->rs_in_flight - plan->total;
778f271d
PR
481
482 /* Plan ahead */
483 cps = correction / steps;
813472ce
PR
484 fifo_add_val(plan, cps);
485 plan->total += cps * steps;
778f271d
PR
486
487 /* What we do in this step */
813472ce
PR
488 curr_corr = fifo_push(plan, 0);
489 plan->total -= curr_corr;
778f271d
PR
490
491 req_sect = sect_in + curr_corr;
492 if (req_sect < 0)
493 req_sect = 0;
494
daeda1cc 495 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
496 if (req_sect > max_sect)
497 req_sect = max_sect;
498
499 /*
500 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
501 sect_in, mdev->rs_in_flight, want, correction,
502 steps, cps, mdev->rs_planed, curr_corr, req_sect);
503 */
504
505 return req_sect;
506}
507
9d77a5fe 508static int drbd_rs_number_requests(struct drbd_conf *mdev)
e65f440d
LE
509{
510 int number;
813472ce
PR
511
512 rcu_read_lock();
513 if (rcu_dereference(mdev->rs_plan_s)->size) {
e65f440d
LE
514 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
515 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
516 } else {
daeda1cc 517 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
e65f440d
LE
518 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
519 }
813472ce 520 rcu_read_unlock();
e65f440d 521
e65f440d
LE
522 /* ignore the amount of pending requests, the resync controller should
523 * throttle down to incoming reply rate soon enough anyways. */
524 return number;
525}
526
99920dc5 527int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 528{
00d56944 529 struct drbd_conf *mdev = w->mdev;
b411b363
PR
530 unsigned long bit;
531 sector_t sector;
532 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 533 int max_bio_size;
e65f440d 534 int number, rollback_i, size;
b411b363 535 int align, queued, sndbuf;
0f0601f4 536 int i = 0;
b411b363
PR
537
538 if (unlikely(cancel))
99920dc5 539 return 0;
b411b363 540
af85e8e8
LE
541 if (mdev->rs_total == 0) {
542 /* empty resync? */
543 drbd_resync_finished(mdev);
99920dc5 544 return 0;
af85e8e8
LE
545 }
546
b411b363
PR
547 if (!get_ldev(mdev)) {
548 /* Since we only need to access mdev->rsync a
549 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
550 to continue resync with a broken disk makes no sense at
551 all */
552 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 553 return 0;
b411b363
PR
554 }
555
0cfdd247 556 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
557 number = drbd_rs_number_requests(mdev);
558 if (number == 0)
0f0601f4 559 goto requeue;
b411b363 560
b411b363
PR
561 for (i = 0; i < number; i++) {
562 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
563 mutex_lock(&mdev->tconn->data.mutex);
564 if (mdev->tconn->data.socket) {
565 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
566 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
567 } else {
568 queued = 1;
569 sndbuf = 0;
570 }
e42325a5 571 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
572 if (queued > sndbuf / 2)
573 goto requeue;
574
575next_sector:
576 size = BM_BLOCK_SIZE;
577 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
578
4b0715f0 579 if (bit == DRBD_END_OF_BITMAP) {
b411b363 580 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 581 put_ldev(mdev);
99920dc5 582 return 0;
b411b363
PR
583 }
584
585 sector = BM_BIT_TO_SECT(bit);
586
e3555d85
PR
587 if (drbd_rs_should_slow_down(mdev, sector) ||
588 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
589 mdev->bm_resync_fo = bit;
590 goto requeue;
591 }
592 mdev->bm_resync_fo = bit + 1;
593
594 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
595 drbd_rs_complete_io(mdev, sector);
596 goto next_sector;
597 }
598
1816a2b4 599#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
600 /* try to find some adjacent bits.
601 * we stop if we have already the maximum req size.
602 *
603 * Additionally always align bigger requests, in order to
604 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
605 */
606 align = 1;
d207450c 607 rollback_i = i;
b411b363 608 for (;;) {
1816a2b4 609 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
610 break;
611
612 /* Be always aligned */
613 if (sector & ((1<<(align+3))-1))
614 break;
615
616 /* do not cross extent boundaries */
617 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
618 break;
619 /* now, is it actually dirty, after all?
620 * caution, drbd_bm_test_bit is tri-state for some
621 * obscure reason; ( b == 0 ) would get the out-of-band
622 * only accidentally right because of the "oddly sized"
623 * adjustment below */
624 if (drbd_bm_test_bit(mdev, bit+1) != 1)
625 break;
626 bit++;
627 size += BM_BLOCK_SIZE;
628 if ((BM_BLOCK_SIZE << align) <= size)
629 align++;
630 i++;
631 }
632 /* if we merged some,
633 * reset the offset to start the next drbd_bm_find_next from */
634 if (size > BM_BLOCK_SIZE)
635 mdev->bm_resync_fo = bit + 1;
636#endif
637
638 /* adjust very last sectors, in case we are oddly sized */
639 if (sector + (size>>9) > capacity)
640 size = (capacity-sector)<<9;
f399002e 641 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 642 switch (read_for_csum(mdev, sector, size)) {
80a40e43 643 case -EIO: /* Disk failure */
b411b363 644 put_ldev(mdev);
99920dc5 645 return -EIO;
80a40e43 646 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
647 drbd_rs_complete_io(mdev, sector);
648 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 649 i = rollback_i;
b411b363 650 goto requeue;
80a40e43
LE
651 case 0:
652 /* everything ok */
653 break;
654 default:
655 BUG();
b411b363
PR
656 }
657 } else {
99920dc5
AG
658 int err;
659
b411b363 660 inc_rs_pending(mdev);
99920dc5
AG
661 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
662 sector, size, ID_SYNCER);
663 if (err) {
b411b363
PR
664 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
665 dec_rs_pending(mdev);
666 put_ldev(mdev);
99920dc5 667 return err;
b411b363
PR
668 }
669 }
670 }
671
672 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
673 /* last syncer _request_ was sent,
674 * but the P_RS_DATA_REPLY not yet received. sync will end (and
675 * next sync group will resume), as soon as we receive the last
676 * resync data block, and the last bit is cleared.
677 * until then resync "work" is "inactive" ...
678 */
b411b363 679 put_ldev(mdev);
99920dc5 680 return 0;
b411b363
PR
681 }
682
683 requeue:
778f271d 684 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
685 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
686 put_ldev(mdev);
99920dc5 687 return 0;
b411b363
PR
688}
689
00d56944 690static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 691{
00d56944 692 struct drbd_conf *mdev = w->mdev;
b411b363
PR
693 int number, i, size;
694 sector_t sector;
695 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
696
697 if (unlikely(cancel))
698 return 1;
699
2649f080 700 number = drbd_rs_number_requests(mdev);
b411b363
PR
701
702 sector = mdev->ov_position;
703 for (i = 0; i < number; i++) {
704 if (sector >= capacity) {
b411b363
PR
705 return 1;
706 }
707
708 size = BM_BLOCK_SIZE;
709
e3555d85
PR
710 if (drbd_rs_should_slow_down(mdev, sector) ||
711 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
712 mdev->ov_position = sector;
713 goto requeue;
714 }
715
716 if (sector + (size>>9) > capacity)
717 size = (capacity-sector)<<9;
718
719 inc_rs_pending(mdev);
5b9f499c 720 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
721 dec_rs_pending(mdev);
722 return 0;
723 }
724 sector += BM_SECT_PER_BIT;
725 }
726 mdev->ov_position = sector;
727
728 requeue:
2649f080 729 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
730 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
731 return 1;
732}
733
99920dc5 734int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 735{
00d56944 736 struct drbd_conf *mdev = w->mdev;
b411b363 737 kfree(w);
8f7bed77 738 ov_out_of_sync_print(mdev);
b411b363
PR
739 drbd_resync_finished(mdev);
740
99920dc5 741 return 0;
b411b363
PR
742}
743
99920dc5 744static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 745{
00d56944 746 struct drbd_conf *mdev = w->mdev;
b411b363
PR
747 kfree(w);
748
749 drbd_resync_finished(mdev);
750
99920dc5 751 return 0;
b411b363
PR
752}
753
af85e8e8
LE
754static void ping_peer(struct drbd_conf *mdev)
755{
2a67d8b9
PR
756 struct drbd_tconn *tconn = mdev->tconn;
757
758 clear_bit(GOT_PING_ACK, &tconn->flags);
759 request_ping(tconn);
760 wait_event(tconn->ping_wait,
761 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
762}
763
b411b363
PR
764int drbd_resync_finished(struct drbd_conf *mdev)
765{
766 unsigned long db, dt, dbdt;
767 unsigned long n_oos;
768 union drbd_state os, ns;
769 struct drbd_work *w;
770 char *khelper_cmd = NULL;
26525618 771 int verify_done = 0;
b411b363
PR
772
773 /* Remove all elements from the resync LRU. Since future actions
774 * might set bits in the (main) bitmap, then the entries in the
775 * resync LRU would be wrong. */
776 if (drbd_rs_del_all(mdev)) {
777 /* In case this is not possible now, most probably because
778 * there are P_RS_DATA_REPLY Packets lingering on the worker's
779 * queue (or even the read operations for those packets
780 * is not finished by now). Retry in 100ms. */
781
20ee6390 782 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
783 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
784 if (w) {
785 w->cb = w_resync_finished;
e42325a5 786 drbd_queue_work(&mdev->tconn->data.work, w);
b411b363
PR
787 return 1;
788 }
789 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
790 }
791
792 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
793 if (dt <= 0)
794 dt = 1;
795 db = mdev->rs_total;
796 dbdt = Bit2KB(db/dt);
797 mdev->rs_paused /= HZ;
798
799 if (!get_ldev(mdev))
800 goto out;
801
af85e8e8
LE
802 ping_peer(mdev);
803
87eeee41 804 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 805 os = drbd_read_state(mdev);
b411b363 806
26525618
LE
807 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
808
b411b363
PR
809 /* This protects us against multiple calls (that can happen in the presence
810 of application IO), and against connectivity loss just before we arrive here. */
811 if (os.conn <= C_CONNECTED)
812 goto out_unlock;
813
814 ns = os;
815 ns.conn = C_CONNECTED;
816
817 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
26525618 818 verify_done ? "Online verify " : "Resync",
b411b363
PR
819 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
820
821 n_oos = drbd_bm_total_weight(mdev);
822
823 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
824 if (n_oos) {
825 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
826 n_oos, Bit2KB(1));
827 khelper_cmd = "out-of-sync";
828 }
829 } else {
830 D_ASSERT((n_oos - mdev->rs_failed) == 0);
831
832 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
833 khelper_cmd = "after-resync-target";
834
f399002e 835 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
836 const unsigned long s = mdev->rs_same_csum;
837 const unsigned long t = mdev->rs_total;
838 const int ratio =
839 (t == 0) ? 0 :
840 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 841 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
842 "transferred %luK total %luK\n",
843 ratio,
844 Bit2KB(mdev->rs_same_csum),
845 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
846 Bit2KB(mdev->rs_total));
847 }
848 }
849
850 if (mdev->rs_failed) {
851 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
852
853 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
854 ns.disk = D_INCONSISTENT;
855 ns.pdsk = D_UP_TO_DATE;
856 } else {
857 ns.disk = D_UP_TO_DATE;
858 ns.pdsk = D_INCONSISTENT;
859 }
860 } else {
861 ns.disk = D_UP_TO_DATE;
862 ns.pdsk = D_UP_TO_DATE;
863
864 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
865 if (mdev->p_uuid) {
866 int i;
867 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
868 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
869 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
870 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
871 } else {
872 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
873 }
874 }
875
62b0da3a
LE
876 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
877 /* for verify runs, we don't update uuids here,
878 * so there would be nothing to report. */
879 drbd_uuid_set_bm(mdev, 0UL);
880 drbd_print_uuids(mdev, "updated UUIDs");
881 if (mdev->p_uuid) {
882 /* Now the two UUID sets are equal, update what we
883 * know of the peer. */
884 int i;
885 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
886 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
887 }
b411b363
PR
888 }
889 }
890
891 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
892out_unlock:
87eeee41 893 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
894 put_ldev(mdev);
895out:
896 mdev->rs_total = 0;
897 mdev->rs_failed = 0;
898 mdev->rs_paused = 0;
26525618
LE
899 if (verify_done)
900 mdev->ov_start_sector = 0;
b411b363 901
13d42685
LE
902 drbd_md_sync(mdev);
903
b411b363
PR
904 if (khelper_cmd)
905 drbd_khelper(mdev, khelper_cmd);
906
907 return 1;
908}
909
910/* helper */
db830c46 911static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
b411b363 912{
045417f7 913 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 914 /* This might happen if sendpage() has not finished */
db830c46 915 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
916 atomic_add(i, &mdev->pp_in_use_by_net);
917 atomic_sub(i, &mdev->pp_in_use);
87eeee41 918 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 919 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 920 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 921 wake_up(&drbd_pp_wait);
b411b363 922 } else
3967deb1 923 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
924}
925
926/**
927 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
928 * @mdev: DRBD device.
929 * @w: work object.
930 * @cancel: The connection will be closed anyways
931 */
99920dc5 932int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 933{
db830c46 934 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 935 struct drbd_conf *mdev = w->mdev;
99920dc5 936 int err;
b411b363
PR
937
938 if (unlikely(cancel)) {
3967deb1 939 drbd_free_peer_req(mdev, peer_req);
b411b363 940 dec_unacked(mdev);
99920dc5 941 return 0;
b411b363
PR
942 }
943
db830c46 944 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 945 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
946 } else {
947 if (__ratelimit(&drbd_ratelimit_state))
948 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 949 (unsigned long long)peer_req->i.sector);
b411b363 950
99920dc5 951 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
952 }
953
954 dec_unacked(mdev);
955
db830c46 956 move_to_net_ee_or_free(mdev, peer_req);
b411b363 957
99920dc5 958 if (unlikely(err))
b411b363 959 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 960 return err;
b411b363
PR
961}
962
963/**
964 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
965 * @mdev: DRBD device.
966 * @w: work object.
967 * @cancel: The connection will be closed anyways
968 */
99920dc5 969int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 970{
db830c46 971 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 972 struct drbd_conf *mdev = w->mdev;
99920dc5 973 int err;
b411b363
PR
974
975 if (unlikely(cancel)) {
3967deb1 976 drbd_free_peer_req(mdev, peer_req);
b411b363 977 dec_unacked(mdev);
99920dc5 978 return 0;
b411b363
PR
979 }
980
981 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 982 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
983 put_ldev(mdev);
984 }
985
d612d309 986 if (mdev->state.conn == C_AHEAD) {
99920dc5 987 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 988 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
989 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
990 inc_rs_pending(mdev);
99920dc5 991 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
992 } else {
993 if (__ratelimit(&drbd_ratelimit_state))
994 dev_err(DEV, "Not sending RSDataReply, "
995 "partner DISKLESS!\n");
99920dc5 996 err = 0;
b411b363
PR
997 }
998 } else {
999 if (__ratelimit(&drbd_ratelimit_state))
1000 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1001 (unsigned long long)peer_req->i.sector);
b411b363 1002
99920dc5 1003 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1004
1005 /* update resync data with failure */
db830c46 1006 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1007 }
1008
1009 dec_unacked(mdev);
1010
db830c46 1011 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1012
99920dc5 1013 if (unlikely(err))
b411b363 1014 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1015 return err;
b411b363
PR
1016}
1017
99920dc5 1018int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1019{
db830c46 1020 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1021 struct drbd_conf *mdev = w->mdev;
b411b363
PR
1022 struct digest_info *di;
1023 int digest_size;
1024 void *digest = NULL;
99920dc5 1025 int err, eq = 0;
b411b363
PR
1026
1027 if (unlikely(cancel)) {
3967deb1 1028 drbd_free_peer_req(mdev, peer_req);
b411b363 1029 dec_unacked(mdev);
99920dc5 1030 return 0;
b411b363
PR
1031 }
1032
1d53f09e 1033 if (get_ldev(mdev)) {
db830c46 1034 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1035 put_ldev(mdev);
1036 }
b411b363 1037
db830c46 1038 di = peer_req->digest;
b411b363 1039
db830c46 1040 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1041 /* quick hack to try to avoid a race against reconfiguration.
1042 * a real fix would be much more involved,
1043 * introducing more locking mechanisms */
f399002e
LE
1044 if (mdev->tconn->csums_tfm) {
1045 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1046 D_ASSERT(digest_size == di->digest_size);
1047 digest = kmalloc(digest_size, GFP_NOIO);
1048 }
1049 if (digest) {
f399002e 1050 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1051 eq = !memcmp(digest, di->digest, digest_size);
1052 kfree(digest);
1053 }
1054
1055 if (eq) {
db830c46 1056 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1057 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1058 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1059 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1060 } else {
1061 inc_rs_pending(mdev);
db830c46
AG
1062 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1063 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1064 kfree(di);
99920dc5 1065 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1066 }
1067 } else {
99920dc5 1068 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1069 if (__ratelimit(&drbd_ratelimit_state))
1070 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1071 }
1072
1073 dec_unacked(mdev);
db830c46 1074 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1075
99920dc5 1076 if (unlikely(err))
b411b363 1077 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1078 return err;
b411b363
PR
1079}
1080
99920dc5 1081int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1082{
db830c46 1083 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1084 struct drbd_conf *mdev = w->mdev;
db830c46
AG
1085 sector_t sector = peer_req->i.sector;
1086 unsigned int size = peer_req->i.size;
b411b363
PR
1087 int digest_size;
1088 void *digest;
99920dc5 1089 int err = 0;
b411b363
PR
1090
1091 if (unlikely(cancel))
1092 goto out;
1093
f399002e 1094 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1095 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1096 if (!digest) {
99920dc5 1097 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1098 goto out;
b411b363
PR
1099 }
1100
db830c46 1101 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1102 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1103 else
1104 memset(digest, 0, digest_size);
1105
53ea4331
LE
1106 /* Free e and pages before send.
1107 * In case we block on congestion, we could otherwise run into
1108 * some distributed deadlock, if the other side blocks on
1109 * congestion as well, because our receiver blocks in
c37c8ecf 1110 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1111 drbd_free_peer_req(mdev, peer_req);
db830c46 1112 peer_req = NULL;
8f21420e 1113 inc_rs_pending(mdev);
99920dc5
AG
1114 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1115 if (err)
8f21420e
PR
1116 dec_rs_pending(mdev);
1117 kfree(digest);
1118
b411b363 1119out:
db830c46 1120 if (peer_req)
3967deb1 1121 drbd_free_peer_req(mdev, peer_req);
b411b363 1122 dec_unacked(mdev);
99920dc5 1123 return err;
b411b363
PR
1124}
1125
8f7bed77 1126void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
1127{
1128 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1129 mdev->ov_last_oos_size += size>>9;
1130 } else {
1131 mdev->ov_last_oos_start = sector;
1132 mdev->ov_last_oos_size = size>>9;
1133 }
1134 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1135}
1136
99920dc5 1137int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1138{
db830c46 1139 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1140 struct drbd_conf *mdev = w->mdev;
b411b363 1141 struct digest_info *di;
b411b363 1142 void *digest;
db830c46
AG
1143 sector_t sector = peer_req->i.sector;
1144 unsigned int size = peer_req->i.size;
53ea4331 1145 int digest_size;
99920dc5 1146 int err, eq = 0;
b411b363
PR
1147
1148 if (unlikely(cancel)) {
3967deb1 1149 drbd_free_peer_req(mdev, peer_req);
b411b363 1150 dec_unacked(mdev);
99920dc5 1151 return 0;
b411b363
PR
1152 }
1153
1154 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1155 * the resync lru has been cleaned up already */
1d53f09e 1156 if (get_ldev(mdev)) {
db830c46 1157 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1158 put_ldev(mdev);
1159 }
b411b363 1160
db830c46 1161 di = peer_req->digest;
b411b363 1162
db830c46 1163 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1164 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1165 digest = kmalloc(digest_size, GFP_NOIO);
1166 if (digest) {
f399002e 1167 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1168
1169 D_ASSERT(digest_size == di->digest_size);
1170 eq = !memcmp(digest, di->digest, digest_size);
1171 kfree(digest);
1172 }
b411b363
PR
1173 }
1174
9676c760
LE
1175 /* Free peer_req and pages before send.
1176 * In case we block on congestion, we could otherwise run into
1177 * some distributed deadlock, if the other side blocks on
1178 * congestion as well, because our receiver blocks in
c37c8ecf 1179 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1180 drbd_free_peer_req(mdev, peer_req);
b411b363 1181 if (!eq)
8f7bed77 1182 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1183 else
8f7bed77 1184 ov_out_of_sync_print(mdev);
b411b363 1185
99920dc5 1186 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1187 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1188
53ea4331 1189 dec_unacked(mdev);
b411b363 1190
ea5442af
LE
1191 --mdev->ov_left;
1192
1193 /* let's advance progress step marks only for every other megabyte */
1194 if ((mdev->ov_left & 0x200) == 0x200)
1195 drbd_advance_rs_marks(mdev, mdev->ov_left);
1196
1197 if (mdev->ov_left == 0) {
8f7bed77 1198 ov_out_of_sync_print(mdev);
b411b363
PR
1199 drbd_resync_finished(mdev);
1200 }
1201
99920dc5 1202 return err;
b411b363
PR
1203}
1204
99920dc5 1205int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1206{
1207 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1208
b411b363 1209 complete(&b->done);
99920dc5 1210 return 0;
b411b363
PR
1211}
1212
99920dc5 1213int w_send_barrier(struct drbd_work *w, int cancel)
b411b363 1214{
9f5bdc33 1215 struct drbd_socket *sock;
b411b363 1216 struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
00d56944 1217 struct drbd_conf *mdev = w->mdev;
9f5bdc33 1218 struct p_barrier *p;
b411b363
PR
1219
1220 /* really avoid racing with tl_clear. w.cb may have been referenced
1221 * just before it was reassigned and re-queued, so double check that.
1222 * actually, this race was harmless, since we only try to send the
1223 * barrier packet here, and otherwise do nothing with the object.
1224 * but compare with the head of w_clear_epoch */
87eeee41 1225 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
1226 if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
1227 cancel = 1;
87eeee41 1228 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1229 if (cancel)
b411b363 1230 return 0;
99920dc5 1231
9f5bdc33
AG
1232 sock = &mdev->tconn->data;
1233 p = drbd_prepare_command(mdev, sock);
1234 if (!p)
1235 return -EIO;
b411b363
PR
1236 p->barrier = b->br_number;
1237 /* inc_ap_pending was done where this was queued.
1238 * dec_ap_pending will be done in got_BarrierAck
1239 * or (on connection loss) in w_clear_epoch. */
9f5bdc33 1240 return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1241}
1242
99920dc5 1243int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1244{
00d56944 1245 struct drbd_conf *mdev = w->mdev;
9f5bdc33
AG
1246 struct drbd_socket *sock;
1247
b411b363 1248 if (cancel)
99920dc5 1249 return 0;
9f5bdc33
AG
1250 sock = &mdev->tconn->data;
1251 if (!drbd_prepare_command(mdev, sock))
1252 return -EIO;
e658983a 1253 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1254}
1255
8f7bed77 1256int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1257{
1258 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1259 struct drbd_conf *mdev = w->mdev;
99920dc5 1260 int err;
73a01a18
PR
1261
1262 if (unlikely(cancel)) {
8554df1c 1263 req_mod(req, SEND_CANCELED);
99920dc5 1264 return 0;
73a01a18
PR
1265 }
1266
8f7bed77 1267 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1268 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1269
99920dc5 1270 return err;
73a01a18
PR
1271}
1272
b411b363
PR
1273/**
1274 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1275 * @mdev: DRBD device.
1276 * @w: work object.
1277 * @cancel: The connection will be closed anyways
1278 */
99920dc5 1279int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1280{
1281 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1282 struct drbd_conf *mdev = w->mdev;
99920dc5 1283 int err;
b411b363
PR
1284
1285 if (unlikely(cancel)) {
8554df1c 1286 req_mod(req, SEND_CANCELED);
99920dc5 1287 return 0;
b411b363
PR
1288 }
1289
99920dc5
AG
1290 err = drbd_send_dblock(mdev, req);
1291 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1292
99920dc5 1293 return err;
b411b363
PR
1294}
1295
1296/**
1297 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1298 * @mdev: DRBD device.
1299 * @w: work object.
1300 * @cancel: The connection will be closed anyways
1301 */
99920dc5 1302int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1303{
1304 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1305 struct drbd_conf *mdev = w->mdev;
99920dc5 1306 int err;
b411b363
PR
1307
1308 if (unlikely(cancel)) {
8554df1c 1309 req_mod(req, SEND_CANCELED);
99920dc5 1310 return 0;
b411b363
PR
1311 }
1312
99920dc5 1313 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1314 (unsigned long)req);
b411b363 1315
99920dc5 1316 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1317
99920dc5 1318 return err;
b411b363
PR
1319}
1320
99920dc5 1321int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1322{
1323 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1324 struct drbd_conf *mdev = w->mdev;
265be2d0 1325
0778286a 1326 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
181286ad 1327 drbd_al_begin_io(mdev, &req->i);
265be2d0
PR
1328 /* Calling drbd_al_begin_io() out of the worker might deadlocks
1329 theoretically. Practically it can not deadlock, since this is
1330 only used when unfreezing IOs. All the extents of the requests
1331 that made it into the TL are already active */
1332
1333 drbd_req_make_private_bio(req, req->master_bio);
1334 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1335 generic_make_request(req->private_bio);
1336
99920dc5 1337 return 0;
265be2d0
PR
1338}
1339
b411b363
PR
1340static int _drbd_may_sync_now(struct drbd_conf *mdev)
1341{
1342 struct drbd_conf *odev = mdev;
daeda1cc 1343 int ra;
b411b363
PR
1344
1345 while (1) {
438c8374
PR
1346 if (!odev->ldev)
1347 return 1;
daeda1cc
PR
1348 rcu_read_lock();
1349 ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1350 rcu_read_unlock();
1351 if (ra == -1)
b411b363 1352 return 1;
daeda1cc 1353 odev = minor_to_mdev(ra);
841ce241
AG
1354 if (!expect(odev))
1355 return 1;
b411b363
PR
1356 if ((odev->state.conn >= C_SYNC_SOURCE &&
1357 odev->state.conn <= C_PAUSED_SYNC_T) ||
1358 odev->state.aftr_isp || odev->state.peer_isp ||
1359 odev->state.user_isp)
1360 return 0;
1361 }
1362}
1363
1364/**
1365 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1366 * @mdev: DRBD device.
1367 *
1368 * Called from process context only (admin command and after_state_ch).
1369 */
1370static int _drbd_pause_after(struct drbd_conf *mdev)
1371{
1372 struct drbd_conf *odev;
1373 int i, rv = 0;
1374
695d08fa 1375 rcu_read_lock();
81a5d60e 1376 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1377 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1378 continue;
1379 if (!_drbd_may_sync_now(odev))
1380 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1381 != SS_NOTHING_TO_DO);
1382 }
695d08fa 1383 rcu_read_unlock();
b411b363
PR
1384
1385 return rv;
1386}
1387
1388/**
1389 * _drbd_resume_next() - Resume resync on all devices that may resync now
1390 * @mdev: DRBD device.
1391 *
1392 * Called from process context only (admin command and worker).
1393 */
1394static int _drbd_resume_next(struct drbd_conf *mdev)
1395{
1396 struct drbd_conf *odev;
1397 int i, rv = 0;
1398
695d08fa 1399 rcu_read_lock();
81a5d60e 1400 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1401 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1402 continue;
1403 if (odev->state.aftr_isp) {
1404 if (_drbd_may_sync_now(odev))
1405 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1406 CS_HARD, NULL)
1407 != SS_NOTHING_TO_DO) ;
1408 }
1409 }
695d08fa 1410 rcu_read_unlock();
b411b363
PR
1411 return rv;
1412}
1413
1414void resume_next_sg(struct drbd_conf *mdev)
1415{
1416 write_lock_irq(&global_state_lock);
1417 _drbd_resume_next(mdev);
1418 write_unlock_irq(&global_state_lock);
1419}
1420
1421void suspend_other_sg(struct drbd_conf *mdev)
1422{
1423 write_lock_irq(&global_state_lock);
1424 _drbd_pause_after(mdev);
1425 write_unlock_irq(&global_state_lock);
1426}
1427
dc97b708
PR
1428/* caller must hold global_state_lock */
1429enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor)
b411b363
PR
1430{
1431 struct drbd_conf *odev;
daeda1cc 1432 int ra;
b411b363
PR
1433
1434 if (o_minor == -1)
1435 return NO_ERROR;
1436 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
1437 return ERR_SYNC_AFTER;
1438
1439 /* check for loops */
1440 odev = minor_to_mdev(o_minor);
1441 while (1) {
1442 if (odev == mdev)
1443 return ERR_SYNC_AFTER_CYCLE;
1444
daeda1cc
PR
1445 rcu_read_lock();
1446 ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1447 rcu_read_unlock();
b411b363 1448 /* dependency chain ends here, no cycles. */
daeda1cc 1449 if (ra == -1)
b411b363
PR
1450 return NO_ERROR;
1451
1452 /* follow the dependency chain */
daeda1cc 1453 odev = minor_to_mdev(ra);
b411b363
PR
1454 }
1455}
1456
dc97b708
PR
1457/* caller must hold global_state_lock */
1458void drbd_sync_after_changed(struct drbd_conf *mdev)
b411b363
PR
1459{
1460 int changes;
b411b363 1461
dc97b708
PR
1462 do {
1463 changes = _drbd_pause_after(mdev);
1464 changes |= _drbd_resume_next(mdev);
1465 } while (changes);
b411b363
PR
1466}
1467
9bd28d3c
LE
1468void drbd_rs_controller_reset(struct drbd_conf *mdev)
1469{
813472ce
PR
1470 struct fifo_buffer *plan;
1471
9bd28d3c
LE
1472 atomic_set(&mdev->rs_sect_in, 0);
1473 atomic_set(&mdev->rs_sect_ev, 0);
1474 mdev->rs_in_flight = 0;
813472ce
PR
1475
1476 /* Updating the RCU protected object in place is necessary since
1477 this function gets called from atomic context.
1478 It is valid since all other updates also lead to an completely
1479 empty fifo */
1480 rcu_read_lock();
1481 plan = rcu_dereference(mdev->rs_plan_s);
1482 plan->total = 0;
1483 fifo_set(plan, 0);
1484 rcu_read_unlock();
9bd28d3c
LE
1485}
1486
1f04af33
PR
1487void start_resync_timer_fn(unsigned long data)
1488{
1489 struct drbd_conf *mdev = (struct drbd_conf *) data;
1490
1491 drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work);
1492}
1493
99920dc5 1494int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1495{
00d56944
PR
1496 struct drbd_conf *mdev = w->mdev;
1497
1f04af33
PR
1498 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1499 dev_warn(DEV, "w_start_resync later...\n");
1500 mdev->start_resync_timer.expires = jiffies + HZ/10;
1501 add_timer(&mdev->start_resync_timer);
99920dc5 1502 return 0;
1f04af33
PR
1503 }
1504
1505 drbd_start_resync(mdev, C_SYNC_SOURCE);
1506 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
99920dc5 1507 return 0;
1f04af33
PR
1508}
1509
b411b363
PR
1510/**
1511 * drbd_start_resync() - Start the resync process
1512 * @mdev: DRBD device.
1513 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1514 *
1515 * This function might bring you directly into one of the
1516 * C_PAUSED_SYNC_* states.
1517 */
1518void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1519{
1520 union drbd_state ns;
1521 int r;
1522
c4752ef1 1523 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1524 dev_err(DEV, "Resync already running!\n");
1525 return;
1526 }
1527
59817f4f
PR
1528 if (mdev->state.conn < C_AHEAD) {
1529 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1530 drbd_rs_cancel_all(mdev);
1531 /* This should be done when we abort the resync. We definitely do not
1532 want to have this for connections going back and forth between
1533 Ahead/Behind and SyncSource/SyncTarget */
1534 }
b411b363 1535
e64a3294
PR
1536 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1537 if (side == C_SYNC_TARGET) {
1538 /* Since application IO was locked out during C_WF_BITMAP_T and
1539 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1540 we check that we might make the data inconsistent. */
1541 r = drbd_khelper(mdev, "before-resync-target");
1542 r = (r >> 8) & 0xff;
1543 if (r > 0) {
1544 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1545 "dropping connection.\n", r);
38fa9988 1546 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1547 return;
1548 }
e64a3294
PR
1549 } else /* C_SYNC_SOURCE */ {
1550 r = drbd_khelper(mdev, "before-resync-source");
1551 r = (r >> 8) & 0xff;
1552 if (r > 0) {
1553 if (r == 3) {
1554 dev_info(DEV, "before-resync-source handler returned %d, "
1555 "ignoring. Old userland tools?", r);
1556 } else {
1557 dev_info(DEV, "before-resync-source handler returned %d, "
1558 "dropping connection.\n", r);
38fa9988 1559 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1560 return;
1561 }
1562 }
09b9e797 1563 }
b411b363
PR
1564 }
1565
e64a3294 1566 if (current == mdev->tconn->worker.task) {
dad20554 1567 /* The worker should not sleep waiting for state_mutex,
e64a3294 1568 that can take long */
8410da8f 1569 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1570 set_bit(B_RS_H_DONE, &mdev->flags);
1571 mdev->start_resync_timer.expires = jiffies + HZ/5;
1572 add_timer(&mdev->start_resync_timer);
1573 return;
1574 }
1575 } else {
8410da8f 1576 mutex_lock(mdev->state_mutex);
e64a3294
PR
1577 }
1578 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363
PR
1579
1580 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
8410da8f 1581 mutex_unlock(mdev->state_mutex);
b411b363
PR
1582 return;
1583 }
1584
b411b363 1585 write_lock_irq(&global_state_lock);
78bae59b 1586 ns = drbd_read_state(mdev);
b411b363
PR
1587
1588 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1589
1590 ns.conn = side;
1591
1592 if (side == C_SYNC_TARGET)
1593 ns.disk = D_INCONSISTENT;
1594 else /* side == C_SYNC_SOURCE */
1595 ns.pdsk = D_INCONSISTENT;
1596
1597 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1598 ns = drbd_read_state(mdev);
b411b363
PR
1599
1600 if (ns.conn < C_CONNECTED)
1601 r = SS_UNKNOWN_ERROR;
1602
1603 if (r == SS_SUCCESS) {
1d7734a0
LE
1604 unsigned long tw = drbd_bm_total_weight(mdev);
1605 unsigned long now = jiffies;
1606 int i;
1607
b411b363
PR
1608 mdev->rs_failed = 0;
1609 mdev->rs_paused = 0;
b411b363 1610 mdev->rs_same_csum = 0;
0f0601f4
LE
1611 mdev->rs_last_events = 0;
1612 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1613 mdev->rs_total = tw;
1614 mdev->rs_start = now;
1615 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1616 mdev->rs_mark_left[i] = tw;
1617 mdev->rs_mark_time[i] = now;
1618 }
b411b363
PR
1619 _drbd_pause_after(mdev);
1620 }
1621 write_unlock_irq(&global_state_lock);
5a22db89 1622
b411b363
PR
1623 if (r == SS_SUCCESS) {
1624 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1625 drbd_conn_str(ns.conn),
1626 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1627 (unsigned long) mdev->rs_total);
6c922ed5
LE
1628 if (side == C_SYNC_TARGET)
1629 mdev->bm_resync_fo = 0;
1630
1631 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1632 * with w_send_oos, or the sync target will get confused as to
1633 * how much bits to resync. We cannot do that always, because for an
1634 * empty resync and protocol < 95, we need to do it here, as we call
1635 * drbd_resync_finished from here in that case.
1636 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1637 * and from after_state_ch otherwise. */
31890f4a 1638 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1639 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1640
31890f4a 1641 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1642 /* This still has a race (about when exactly the peers
1643 * detect connection loss) that can lead to a full sync
1644 * on next handshake. In 8.3.9 we fixed this with explicit
1645 * resync-finished notifications, but the fix
1646 * introduces a protocol change. Sleeping for some
1647 * time longer than the ping interval + timeout on the
1648 * SyncSource, to give the SyncTarget the chance to
1649 * detect connection loss, then waiting for a ping
1650 * response (implicit in drbd_resync_finished) reduces
1651 * the race considerably, but does not solve it. */
44ed167d
PR
1652 if (side == C_SYNC_SOURCE) {
1653 struct net_conf *nc;
1654 int timeo;
1655
1656 rcu_read_lock();
1657 nc = rcu_dereference(mdev->tconn->net_conf);
1658 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1659 rcu_read_unlock();
1660 schedule_timeout_interruptible(timeo);
1661 }
b411b363 1662 drbd_resync_finished(mdev);
b411b363
PR
1663 }
1664
9bd28d3c 1665 drbd_rs_controller_reset(mdev);
b411b363
PR
1666 /* ns.conn may already be != mdev->state.conn,
1667 * we may have been paused in between, or become paused until
1668 * the timer triggers.
1669 * No matter, that is handled in resync_timer_fn() */
1670 if (ns.conn == C_SYNC_TARGET)
1671 mod_timer(&mdev->resync_timer, jiffies);
1672
1673 drbd_md_sync(mdev);
1674 }
5a22db89 1675 put_ldev(mdev);
8410da8f 1676 mutex_unlock(mdev->state_mutex);
b411b363
PR
1677}
1678
1679int drbd_worker(struct drbd_thread *thi)
1680{
392c8801 1681 struct drbd_tconn *tconn = thi->tconn;
b411b363 1682 struct drbd_work *w = NULL;
0e29d163 1683 struct drbd_conf *mdev;
44ed167d 1684 struct net_conf *nc;
b411b363 1685 LIST_HEAD(work_list);
f399002e 1686 int vnr, intr = 0;
44ed167d 1687 int cork;
b411b363 1688
e77a0a5c 1689 while (get_t_state(thi) == RUNNING) {
80822284 1690 drbd_thread_current_set_cpu(thi);
b411b363 1691
19393e10
PR
1692 if (down_trylock(&tconn->data.work.s)) {
1693 mutex_lock(&tconn->data.mutex);
44ed167d
PR
1694
1695 rcu_read_lock();
1696 nc = rcu_dereference(tconn->net_conf);
bb77d34e 1697 cork = nc ? nc->tcp_cork : 0;
44ed167d
PR
1698 rcu_read_unlock();
1699
1700 if (tconn->data.socket && cork)
19393e10
PR
1701 drbd_tcp_uncork(tconn->data.socket);
1702 mutex_unlock(&tconn->data.mutex);
b411b363 1703
19393e10 1704 intr = down_interruptible(&tconn->data.work.s);
b411b363 1705
19393e10 1706 mutex_lock(&tconn->data.mutex);
44ed167d 1707 if (tconn->data.socket && cork)
19393e10
PR
1708 drbd_tcp_cork(tconn->data.socket);
1709 mutex_unlock(&tconn->data.mutex);
b411b363
PR
1710 }
1711
1712 if (intr) {
b411b363 1713 flush_signals(current);
19393e10
PR
1714 if (get_t_state(thi) == RUNNING) {
1715 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1716 continue;
19393e10 1717 }
b411b363
PR
1718 break;
1719 }
1720
e77a0a5c 1721 if (get_t_state(thi) != RUNNING)
b411b363
PR
1722 break;
1723 /* With this break, we have done a down() but not consumed
1724 the entry from the list. The cleanup code takes care of
1725 this... */
1726
1727 w = NULL;
19393e10
PR
1728 spin_lock_irq(&tconn->data.work.q_lock);
1729 if (list_empty(&tconn->data.work.q)) {
b411b363
PR
1730 /* something terribly wrong in our logic.
1731 * we were able to down() the semaphore,
1732 * but the list is empty... doh.
1733 *
1734 * what is the best thing to do now?
1735 * try again from scratch, restarting the receiver,
1736 * asender, whatnot? could break even more ugly,
1737 * e.g. when we are primary, but no good local data.
1738 *
1739 * I'll try to get away just starting over this loop.
1740 */
19393e10
PR
1741 conn_warn(tconn, "Work list unexpectedly empty\n");
1742 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1743 continue;
1744 }
19393e10 1745 w = list_entry(tconn->data.work.q.next, struct drbd_work, list);
b411b363 1746 list_del_init(&w->list);
19393e10 1747 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1748
99920dc5 1749 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) {
b411b363 1750 /* dev_warn(DEV, "worker: a callback failed! \n"); */
bbeb641c
PR
1751 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1752 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1753 }
1754 }
b411b363 1755
19393e10
PR
1756 spin_lock_irq(&tconn->data.work.q_lock);
1757 while (!list_empty(&tconn->data.work.q)) {
1758 list_splice_init(&tconn->data.work.q, &work_list);
1759 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1760
1761 while (!list_empty(&work_list)) {
1762 w = list_entry(work_list.next, struct drbd_work, list);
1763 list_del_init(&w->list);
00d56944 1764 w->cb(w, 1);
b411b363
PR
1765 }
1766
19393e10 1767 spin_lock_irq(&tconn->data.work.q_lock);
b411b363 1768 }
19393e10 1769 sema_init(&tconn->data.work.s, 0);
b411b363
PR
1770 /* DANGEROUS race: if someone did queue his work within the spinlock,
1771 * but up() ed outside the spinlock, we could get an up() on the
1772 * semaphore without corresponding list entry.
1773 * So don't do that.
1774 */
19393e10 1775 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1776
c141ebda 1777 rcu_read_lock();
f399002e 1778 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1779 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
c141ebda
PR
1780 kref_get(&mdev->kref);
1781 rcu_read_unlock();
0e29d163 1782 drbd_mdev_cleanup(mdev);
c141ebda
PR
1783 kref_put(&mdev->kref, &drbd_minor_destroy);
1784 rcu_read_lock();
0e29d163 1785 }
c141ebda 1786 rcu_read_unlock();
b411b363
PR
1787
1788 return 0;
1789}
This page took 0.355337 seconds and 5 git commands to generate.