drbd: Update some outdated comments to match the code
[deliverable/linux.git] / drivers / block / drbd / drbd_req.c
CommitLineData
b411b363
PR
1/*
2 drbd_req.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363
PR
26#include <linux/module.h>
27
28#include <linux/slab.h>
29#include <linux/drbd.h>
30#include "drbd_int.h"
b411b363
PR
31#include "drbd_req.h"
32
33
34/* Update disk stats at start of I/O request */
35static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
36{
37 const int rw = bio_data_dir(bio);
38 int cpu;
39 cpu = part_stat_lock();
40 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
41 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
753c8913 42 part_inc_in_flight(&mdev->vdisk->part0, rw);
b411b363 43 part_stat_unlock();
b411b363
PR
44}
45
46/* Update disk stats when completing request upwards */
47static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
48{
49 int rw = bio_data_dir(req->master_bio);
50 unsigned long duration = jiffies - req->start_time;
51 int cpu;
52 cpu = part_stat_lock();
53 part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
54 part_round_stats(cpu, &mdev->vdisk->part0);
753c8913 55 part_dec_in_flight(&mdev->vdisk->part0, rw);
b411b363 56 part_stat_unlock();
b411b363
PR
57}
58
9e204cdd
AG
59static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
60 struct bio *bio_src)
61{
62 struct drbd_request *req;
63
64 req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
65 if (!req)
66 return NULL;
67
68 drbd_req_make_private_bio(req, bio_src);
69 req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
a21e9298 70 req->w.mdev = mdev;
9e204cdd
AG
71 req->master_bio = bio_src;
72 req->epoch = 0;
53840641 73
9e204cdd
AG
74 drbd_clear_interval(&req->i);
75 req->i.sector = bio_src->bi_sector;
76 req->i.size = bio_src->bi_size;
5e472264 77 req->i.local = true;
53840641
AG
78 req->i.waiting = false;
79
9e204cdd
AG
80 INIT_LIST_HEAD(&req->tl_requests);
81 INIT_LIST_HEAD(&req->w.list);
82
83 return req;
84}
85
86static void drbd_req_free(struct drbd_request *req)
87{
88 mempool_free(req, drbd_request_mempool);
89}
90
91/* rw is bio_data_dir(), only READ or WRITE */
b411b363
PR
92static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
93{
94 const unsigned long s = req->rq_state;
288f422e
PR
95
96 /* remove it from the transfer log.
97 * well, only if it had been there in the first
98 * place... if it had not (local only or conflicting
99 * and never sent), it should still be "empty" as
100 * initialized in drbd_req_new(), so we can list_del() it
101 * here unconditionally */
102 list_del(&req->tl_requests);
103
b411b363
PR
104 /* if it was a write, we may have to set the corresponding
105 * bit(s) out-of-sync first. If it had a local part, we need to
106 * release the reference to the activity log. */
107 if (rw == WRITE) {
b411b363
PR
108 /* Set out-of-sync unless both OK flags are set
109 * (local only or remote failed).
110 * Other places where we set out-of-sync:
111 * READ with local io-error */
112 if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
ace652ac 113 drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
b411b363
PR
114
115 if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
ace652ac 116 drbd_set_in_sync(mdev, req->i.sector, req->i.size);
b411b363
PR
117
118 /* one might be tempted to move the drbd_al_complete_io
fcefa62e 119 * to the local io completion callback drbd_request_endio.
b411b363
PR
120 * but, if this was a mirror write, we may only
121 * drbd_al_complete_io after this is RQ_NET_DONE,
122 * otherwise the extent could be dropped from the al
123 * before it has actually been written on the peer.
124 * if we crash before our peer knows about the request,
125 * but after the extent has been dropped from the al,
126 * we would forget to resync the corresponding extent.
127 */
128 if (s & RQ_LOCAL_MASK) {
129 if (get_ldev_if_state(mdev, D_FAILED)) {
0778286a 130 if (s & RQ_IN_ACT_LOG)
181286ad 131 drbd_al_complete_io(mdev, &req->i);
b411b363
PR
132 put_ldev(mdev);
133 } else if (__ratelimit(&drbd_ratelimit_state)) {
181286ad
LE
134 dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), "
135 "but my Disk seems to have failed :(\n",
136 (unsigned long long) req->i.sector, req->i.size);
b411b363
PR
137 }
138 }
139 }
140
32fa7e91 141 drbd_req_free(req);
b411b363
PR
142}
143
144static void queue_barrier(struct drbd_conf *mdev)
145{
146 struct drbd_tl_epoch *b;
147
148 /* We are within the req_lock. Once we queued the barrier for sending,
149 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
150 * barrier/epoch object is added. This is the only place this bit is
151 * set. It indicates that the barrier for this epoch is already queued,
152 * and no new epoch has been created yet. */
153 if (test_bit(CREATE_BARRIER, &mdev->flags))
154 return;
155
87eeee41 156 b = mdev->tconn->newest_tle;
b411b363 157 b->w.cb = w_send_barrier;
a21e9298 158 b->w.mdev = mdev;
b411b363
PR
159 /* inc_ap_pending done here, so we won't
160 * get imbalanced on connection loss.
161 * dec_ap_pending will be done in got_BarrierAck
162 * or (on connection loss) in tl_clear. */
163 inc_ap_pending(mdev);
e42325a5 164 drbd_queue_work(&mdev->tconn->data.work, &b->w);
b411b363
PR
165 set_bit(CREATE_BARRIER, &mdev->flags);
166}
167
168static void _about_to_complete_local_write(struct drbd_conf *mdev,
169 struct drbd_request *req)
170{
171 const unsigned long s = req->rq_state;
b411b363 172
8a3c1044
LE
173 /* Before we can signal completion to the upper layers,
174 * we may need to close the current epoch.
175 * We can skip this, if this request has not even been sent, because we
176 * did not have a fully established connection yet/anymore, during
177 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
178 */
179 if (mdev->state.conn >= C_CONNECTED &&
180 (s & RQ_NET_SENT) != 0 &&
87eeee41 181 req->epoch == mdev->tconn->newest_tle->br_number)
b411b363 182 queue_barrier(mdev);
b411b363
PR
183}
184
185void complete_master_bio(struct drbd_conf *mdev,
186 struct bio_and_error *m)
187{
b411b363
PR
188 bio_endio(m->bio, m->error);
189 dec_ap_bio(mdev);
190}
191
53840641
AG
192
193static void drbd_remove_request_interval(struct rb_root *root,
194 struct drbd_request *req)
195{
a21e9298 196 struct drbd_conf *mdev = req->w.mdev;
53840641
AG
197 struct drbd_interval *i = &req->i;
198
199 drbd_remove_interval(root, i);
200
201 /* Wake up any processes waiting for this request to complete. */
202 if (i->waiting)
203 wake_up(&mdev->misc_wait);
204}
205
b411b363
PR
206/* Helper for __req_mod().
207 * Set m->bio to the master bio, if it is fit to be completed,
208 * or leave it alone (it is initialized to NULL in __req_mod),
209 * if it has already been completed, or cannot be completed yet.
210 * If m->bio is set, the error status to be returned is placed in m->error.
211 */
212void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
213{
214 const unsigned long s = req->rq_state;
a21e9298 215 struct drbd_conf *mdev = req->w.mdev;
cdfda633 216 int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
b411b363 217
b411b363
PR
218 /* we must not complete the master bio, while it is
219 * still being processed by _drbd_send_zc_bio (drbd_send_dblock)
220 * not yet acknowledged by the peer
221 * not yet completed by the local io subsystem
222 * these flags may get cleared in any order by
223 * the worker,
224 * the receiver,
225 * the bio_endio completion callbacks.
226 */
cdfda633 227 if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
7be8da07
AG
228 return;
229 if (req->i.waiting) {
230 /* Retry all conflicting peer requests. */
231 wake_up(&mdev->misc_wait);
232 }
b411b363
PR
233 if (s & RQ_NET_QUEUED)
234 return;
235 if (s & RQ_NET_PENDING)
236 return;
b411b363
PR
237
238 if (req->master_bio) {
8554df1c 239 /* this is DATA_RECEIVED (remote read)
b411b363
PR
240 * or protocol C P_WRITE_ACK
241 * or protocol B P_RECV_ACK
8554df1c 242 * or protocol A "HANDED_OVER_TO_NETWORK" (SendAck)
b411b363
PR
243 * or canceled or failed,
244 * or killed from the transfer log due to connection loss.
245 */
246
247 /*
248 * figure out whether to report success or failure.
249 *
250 * report success when at least one of the operations succeeded.
251 * or, to put the other way,
252 * only report failure, when both operations failed.
253 *
254 * what to do about the failures is handled elsewhere.
255 * what we need to do here is just: complete the master_bio.
256 *
257 * local completion error, if any, has been stored as ERR_PTR
fcefa62e 258 * in private_bio within drbd_request_endio.
b411b363
PR
259 */
260 int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
261 int error = PTR_ERR(req->private_bio);
262
263 /* remove the request from the conflict detection
264 * respective block_id verification hash */
dac1389c
AG
265 if (!drbd_interval_empty(&req->i)) {
266 struct rb_root *root;
267
dac1389c
AG
268 if (rw == WRITE)
269 root = &mdev->write_requests;
270 else
271 root = &mdev->read_requests;
53840641 272 drbd_remove_request_interval(root, req);
7be8da07 273 } else if (!(s & RQ_POSTPONED))
8825f7c3 274 D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
b411b363
PR
275
276 /* for writes we need to do some extra housekeeping */
277 if (rw == WRITE)
278 _about_to_complete_local_write(mdev, req);
279
280 /* Update disk stats */
281 _drbd_end_io_acct(mdev, req);
282
7be8da07
AG
283 if (!(s & RQ_POSTPONED)) {
284 m->error = ok ? 0 : (error ?: -EIO);
285 m->bio = req->master_bio;
286 }
b411b363
PR
287 req->master_bio = NULL;
288 }
289
cdfda633
PR
290 if (s & RQ_LOCAL_PENDING)
291 return;
292
b411b363
PR
293 if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
294 /* this is disconnected (local only) operation,
a209b4ae 295 * or protocol A, B, or C P_BARRIER_ACK,
b411b363
PR
296 * or killed from the transfer log due to connection loss. */
297 _req_is_done(mdev, req, rw);
298 }
299 /* else: network part and not DONE yet. that is
a209b4ae 300 * protocol A, B, or C, barrier ack still pending... */
b411b363
PR
301}
302
cfa03415
PR
303static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
304{
a21e9298 305 struct drbd_conf *mdev = req->w.mdev;
cfa03415 306
2aebfabb 307 if (!drbd_suspended(mdev))
cfa03415
PR
308 _req_may_be_done(req, m);
309}
310
b411b363
PR
311/* obviously this could be coded as many single functions
312 * instead of one huge switch,
313 * or by putting the code directly in the respective locations
314 * (as it has been before).
315 *
316 * but having it this way
317 * enforces that it is all in this one place, where it is easier to audit,
318 * it makes it obvious that whatever "event" "happens" to a request should
319 * happen "atomically" within the req_lock,
320 * and it enforces that we have to think in a very structured manner
321 * about the "events" that may happen to a request during its life time ...
322 */
2a80699f 323int __req_mod(struct drbd_request *req, enum drbd_req_event what,
b411b363
PR
324 struct bio_and_error *m)
325{
a21e9298 326 struct drbd_conf *mdev = req->w.mdev;
44ed167d 327 struct net_conf *nc;
303d1448 328 int p, rv = 0;
7be8da07
AG
329
330 if (m)
331 m->bio = NULL;
b411b363 332
b411b363
PR
333 switch (what) {
334 default:
335 dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
336 break;
337
338 /* does not happen...
339 * initialization done in drbd_req_new
8554df1c 340 case CREATED:
b411b363
PR
341 break;
342 */
343
8554df1c 344 case TO_BE_SENT: /* via network */
7be8da07 345 /* reached via __drbd_make_request
b411b363
PR
346 * and from w_read_retry_remote */
347 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
348 req->rq_state |= RQ_NET_PENDING;
44ed167d
PR
349 rcu_read_lock();
350 nc = rcu_dereference(mdev->tconn->net_conf);
351 p = nc->wire_protocol;
352 rcu_read_unlock();
303d1448
PR
353 req->rq_state |=
354 p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
355 p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
b411b363
PR
356 inc_ap_pending(mdev);
357 break;
358
8554df1c 359 case TO_BE_SUBMITTED: /* locally */
7be8da07 360 /* reached via __drbd_make_request */
b411b363
PR
361 D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
362 req->rq_state |= RQ_LOCAL_PENDING;
363 break;
364
8554df1c 365 case COMPLETED_OK:
cdfda633 366 if (req->rq_state & RQ_WRITE)
ace652ac 367 mdev->writ_cnt += req->i.size >> 9;
b411b363 368 else
ace652ac 369 mdev->read_cnt += req->i.size >> 9;
b411b363
PR
370
371 req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
372 req->rq_state &= ~RQ_LOCAL_PENDING;
373
cfa03415 374 _req_may_be_done_not_susp(req, m);
b411b363
PR
375 put_ldev(mdev);
376 break;
377
cdfda633
PR
378 case ABORT_DISK_IO:
379 req->rq_state |= RQ_LOCAL_ABORTED;
380 if (req->rq_state & RQ_WRITE)
381 _req_may_be_done_not_susp(req, m);
382 else
383 goto goto_queue_for_net_read;
384 break;
385
8554df1c 386 case WRITE_COMPLETED_WITH_ERROR:
b411b363
PR
387 req->rq_state |= RQ_LOCAL_COMPLETED;
388 req->rq_state &= ~RQ_LOCAL_PENDING;
389
81e84650 390 __drbd_chk_io_error(mdev, false);
cfa03415 391 _req_may_be_done_not_susp(req, m);
b411b363
PR
392 put_ldev(mdev);
393 break;
394
8554df1c 395 case READ_AHEAD_COMPLETED_WITH_ERROR:
b411b363
PR
396 /* it is legal to fail READA */
397 req->rq_state |= RQ_LOCAL_COMPLETED;
398 req->rq_state &= ~RQ_LOCAL_PENDING;
cfa03415 399 _req_may_be_done_not_susp(req, m);
b411b363
PR
400 put_ldev(mdev);
401 break;
402
8554df1c 403 case READ_COMPLETED_WITH_ERROR:
ace652ac 404 drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
b411b363
PR
405
406 req->rq_state |= RQ_LOCAL_COMPLETED;
407 req->rq_state &= ~RQ_LOCAL_PENDING;
408
b411b363 409 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
b411b363 410
81e84650 411 __drbd_chk_io_error(mdev, false);
b411b363 412 put_ldev(mdev);
b411b363 413
cdfda633
PR
414 goto_queue_for_net_read:
415
d255e5ff
LE
416 /* no point in retrying if there is no good remote data,
417 * or we have no connection. */
418 if (mdev->state.pdsk != D_UP_TO_DATE) {
cfa03415 419 _req_may_be_done_not_susp(req, m);
d255e5ff
LE
420 break;
421 }
422
8554df1c 423 /* _req_mod(req,TO_BE_SENT); oops, recursion... */
d255e5ff
LE
424 req->rq_state |= RQ_NET_PENDING;
425 inc_ap_pending(mdev);
8554df1c 426 /* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */
b411b363 427
8554df1c 428 case QUEUE_FOR_NET_READ:
b411b363
PR
429 /* READ or READA, and
430 * no local disk,
431 * or target area marked as invalid,
432 * or just got an io-error. */
7be8da07 433 /* from __drbd_make_request
b411b363
PR
434 * or from bio_endio during read io-error recovery */
435
436 /* so we can verify the handle in the answer packet
437 * corresponding hlist_del is in _req_may_be_done() */
97ddb687 438 D_ASSERT(drbd_interval_empty(&req->i));
dac1389c 439 drbd_insert_interval(&mdev->read_requests, &req->i);
b411b363 440
83c38830 441 set_bit(UNPLUG_REMOTE, &mdev->flags);
b411b363
PR
442
443 D_ASSERT(req->rq_state & RQ_NET_PENDING);
444 req->rq_state |= RQ_NET_QUEUED;
445 req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
446 ? w_read_retry_remote
447 : w_send_read_req;
e42325a5 448 drbd_queue_work(&mdev->tconn->data.work, &req->w);
b411b363
PR
449 break;
450
8554df1c 451 case QUEUE_FOR_NET_WRITE:
b411b363 452 /* assert something? */
7be8da07 453 /* from __drbd_make_request only */
b411b363 454
b411b363 455 /* corresponding hlist_del is in _req_may_be_done() */
97ddb687 456 D_ASSERT(drbd_interval_empty(&req->i));
de696716 457 drbd_insert_interval(&mdev->write_requests, &req->i);
b411b363
PR
458
459 /* NOTE
460 * In case the req ended up on the transfer log before being
461 * queued on the worker, it could lead to this request being
462 * missed during cleanup after connection loss.
463 * So we have to do both operations here,
464 * within the same lock that protects the transfer log.
465 *
466 * _req_add_to_epoch(req); this has to be after the
467 * _maybe_start_new_epoch(req); which happened in
7be8da07 468 * __drbd_make_request, because we now may set the bit
b411b363
PR
469 * again ourselves to close the current epoch.
470 *
471 * Add req to the (now) current epoch (barrier). */
472
83c38830
LE
473 /* otherwise we may lose an unplug, which may cause some remote
474 * io-scheduler timeout to expire, increasing maximum latency,
475 * hurting performance. */
476 set_bit(UNPLUG_REMOTE, &mdev->flags);
477
7be8da07 478 /* see __drbd_make_request,
b411b363
PR
479 * just after it grabs the req_lock */
480 D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
481
87eeee41 482 req->epoch = mdev->tconn->newest_tle->br_number;
b411b363
PR
483
484 /* increment size of current epoch */
87eeee41 485 mdev->tconn->newest_tle->n_writes++;
b411b363
PR
486
487 /* queue work item to send data */
488 D_ASSERT(req->rq_state & RQ_NET_PENDING);
489 req->rq_state |= RQ_NET_QUEUED;
490 req->w.cb = w_send_dblock;
e42325a5 491 drbd_queue_work(&mdev->tconn->data.work, &req->w);
b411b363
PR
492
493 /* close the epoch, in case it outgrew the limit */
44ed167d
PR
494 rcu_read_lock();
495 nc = rcu_dereference(mdev->tconn->net_conf);
496 p = nc->max_epoch_size;
497 rcu_read_unlock();
498 if (mdev->tconn->newest_tle->n_writes >= p)
b411b363
PR
499 queue_barrier(mdev);
500
501 break;
502
8554df1c 503 case QUEUE_FOR_SEND_OOS:
73a01a18 504 req->rq_state |= RQ_NET_QUEUED;
8f7bed77 505 req->w.cb = w_send_out_of_sync;
e42325a5 506 drbd_queue_work(&mdev->tconn->data.work, &req->w);
73a01a18
PR
507 break;
508
8554df1c 509 case OOS_HANDED_TO_NETWORK:
73a01a18 510 /* actually the same */
8554df1c 511 case SEND_CANCELED:
b411b363 512 /* treat it the same */
8554df1c 513 case SEND_FAILED:
b411b363
PR
514 /* real cleanup will be done from tl_clear. just update flags
515 * so it is no longer marked as on the worker queue */
516 req->rq_state &= ~RQ_NET_QUEUED;
517 /* if we did it right, tl_clear should be scheduled only after
518 * this, so this should not be necessary! */
cfa03415 519 _req_may_be_done_not_susp(req, m);
b411b363
PR
520 break;
521
8554df1c 522 case HANDED_OVER_TO_NETWORK:
b411b363 523 /* assert something? */
759fbdfb 524 if (bio_data_dir(req->master_bio) == WRITE)
ace652ac 525 atomic_add(req->i.size >> 9, &mdev->ap_in_flight);
759fbdfb 526
b411b363 527 if (bio_data_dir(req->master_bio) == WRITE &&
303d1448 528 !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
b411b363
PR
529 /* this is what is dangerous about protocol A:
530 * pretend it was successfully written on the peer. */
531 if (req->rq_state & RQ_NET_PENDING) {
532 dec_ap_pending(mdev);
533 req->rq_state &= ~RQ_NET_PENDING;
534 req->rq_state |= RQ_NET_OK;
535 } /* else: neg-ack was faster... */
536 /* it is still not yet RQ_NET_DONE until the
537 * corresponding epoch barrier got acked as well,
538 * so we know what to dirty on connection loss */
539 }
540 req->rq_state &= ~RQ_NET_QUEUED;
541 req->rq_state |= RQ_NET_SENT;
542 /* because _drbd_send_zc_bio could sleep, and may want to
8554df1c
AG
543 * dereference the bio even after the "WRITE_ACKED_BY_PEER" and
544 * "COMPLETED_OK" events came in, once we return from
b411b363
PR
545 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
546 * whether it is done already, and end it. */
cfa03415 547 _req_may_be_done_not_susp(req, m);
b411b363
PR
548 break;
549
8554df1c 550 case READ_RETRY_REMOTE_CANCELED:
d255e5ff
LE
551 req->rq_state &= ~RQ_NET_QUEUED;
552 /* fall through, in case we raced with drbd_disconnect */
8554df1c 553 case CONNECTION_LOST_WHILE_PENDING:
b411b363
PR
554 /* transfer log cleanup after connection loss */
555 /* assert something? */
556 if (req->rq_state & RQ_NET_PENDING)
557 dec_ap_pending(mdev);
558 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
559 req->rq_state |= RQ_NET_DONE;
759fbdfb 560 if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
ace652ac 561 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
759fbdfb 562
b411b363
PR
563 /* if it is still queued, we may not complete it here.
564 * it will be canceled soon. */
565 if (!(req->rq_state & RQ_NET_QUEUED))
cfa03415 566 _req_may_be_done(req, m); /* Allowed while state.susp */
b411b363
PR
567 break;
568
8554df1c 569 case WRITE_ACKED_BY_PEER_AND_SIS:
b411b363 570 req->rq_state |= RQ_NET_SIS;
7be8da07 571 case DISCARD_WRITE:
b411b363
PR
572 /* for discarded conflicting writes of multiple primaries,
573 * there is no need to keep anything in the tl, potential
574 * node crashes are covered by the activity log. */
b411b363
PR
575 req->rq_state |= RQ_NET_DONE;
576 /* fall through */
8554df1c 577 case WRITE_ACKED_BY_PEER:
303d1448 578 D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
b411b363
PR
579 /* protocol C; successfully written on peer.
580 * Nothing to do here.
581 * We want to keep the tl in place for all protocols, to cater
582 * for volatile write-back caches on lower level devices.
583 *
584 * A barrier request is expected to have forced all prior
585 * requests onto stable storage, so completion of a barrier
586 * request could set NET_DONE right here, and not wait for the
587 * P_BARRIER_ACK, but that is an unnecessary optimization. */
588
303d1448 589 goto ack_common;
b411b363 590 /* this makes it effectively the same as for: */
8554df1c 591 case RECV_ACKED_BY_PEER:
303d1448 592 D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK);
b411b363 593 /* protocol B; pretends to be successfully written on peer.
8554df1c 594 * see also notes above in HANDED_OVER_TO_NETWORK about
b411b363 595 * protocol != C */
303d1448 596 ack_common:
b411b363
PR
597 req->rq_state |= RQ_NET_OK;
598 D_ASSERT(req->rq_state & RQ_NET_PENDING);
599 dec_ap_pending(mdev);
ace652ac 600 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
b411b363 601 req->rq_state &= ~RQ_NET_PENDING;
cfa03415 602 _req_may_be_done_not_susp(req, m);
b411b363
PR
603 break;
604
7be8da07 605 case POSTPONE_WRITE:
303d1448
PR
606 D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
607 /* If this node has already detected the write conflict, the
7be8da07
AG
608 * worker will be waiting on misc_wait. Wake it up once this
609 * request has completed locally.
610 */
611 D_ASSERT(req->rq_state & RQ_NET_PENDING);
612 req->rq_state |= RQ_POSTPONED;
613 _req_may_be_done_not_susp(req, m);
614 break;
615
8554df1c 616 case NEG_ACKED:
b411b363 617 /* assert something? */
759fbdfb 618 if (req->rq_state & RQ_NET_PENDING) {
b411b363 619 dec_ap_pending(mdev);
ace652ac 620 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
759fbdfb 621 }
b411b363
PR
622 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
623
624 req->rq_state |= RQ_NET_DONE;
cfa03415 625 _req_may_be_done_not_susp(req, m);
8554df1c 626 /* else: done by HANDED_OVER_TO_NETWORK */
b411b363
PR
627 break;
628
8554df1c 629 case FAIL_FROZEN_DISK_IO:
265be2d0
PR
630 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
631 break;
632
cfa03415 633 _req_may_be_done(req, m); /* Allowed while state.susp */
265be2d0
PR
634 break;
635
8554df1c 636 case RESTART_FROZEN_DISK_IO:
265be2d0
PR
637 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
638 break;
639
640 req->rq_state &= ~RQ_LOCAL_COMPLETED;
641
642 rv = MR_READ;
643 if (bio_data_dir(req->master_bio) == WRITE)
644 rv = MR_WRITE;
645
646 get_ldev(mdev);
647 req->w.cb = w_restart_disk_io;
e42325a5 648 drbd_queue_work(&mdev->tconn->data.work, &req->w);
265be2d0
PR
649 break;
650
8554df1c 651 case RESEND:
11b58e73 652 /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
47ff2d0a 653 before the connection loss (B&C only); only P_BARRIER_ACK was missing.
11b58e73 654 Trowing them out of the TL here by pretending we got a BARRIER_ACK
481c6f50 655 We ensure that the peer was not rebooted */
11b58e73
PR
656 if (!(req->rq_state & RQ_NET_OK)) {
657 if (req->w.cb) {
e42325a5 658 drbd_queue_work(&mdev->tconn->data.work, &req->w);
11b58e73
PR
659 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
660 }
661 break;
662 }
8554df1c 663 /* else, fall through to BARRIER_ACKED */
11b58e73 664
8554df1c 665 case BARRIER_ACKED:
288f422e
PR
666 if (!(req->rq_state & RQ_WRITE))
667 break;
668
b411b363 669 if (req->rq_state & RQ_NET_PENDING) {
a209b4ae 670 /* barrier came in before all requests were acked.
b411b363
PR
671 * this is bad, because if the connection is lost now,
672 * we won't be able to clean them up... */
8554df1c 673 dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n");
87eeee41 674 list_move(&req->tl_requests, &mdev->tconn->out_of_sequence_requests);
b411b363 675 }
e636db5b
LE
676 if ((req->rq_state & RQ_NET_MASK) != 0) {
677 req->rq_state |= RQ_NET_DONE;
303d1448 678 if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)))
89e58e75 679 atomic_sub(req->i.size>>9, &mdev->ap_in_flight);
e636db5b 680 }
cfa03415 681 _req_may_be_done(req, m); /* Allowed while state.susp */
b411b363
PR
682 break;
683
8554df1c 684 case DATA_RECEIVED:
b411b363
PR
685 D_ASSERT(req->rq_state & RQ_NET_PENDING);
686 dec_ap_pending(mdev);
687 req->rq_state &= ~RQ_NET_PENDING;
688 req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
cfa03415 689 _req_may_be_done_not_susp(req, m);
b411b363
PR
690 break;
691 };
2a80699f
PR
692
693 return rv;
b411b363
PR
694}
695
696/* we may do a local read if:
697 * - we are consistent (of course),
698 * - or we are generally inconsistent,
699 * BUT we are still/already IN SYNC for this area.
700 * since size may be bigger than BM_BLOCK_SIZE,
701 * we may need to check several bits.
702 */
0da34df0 703static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
704{
705 unsigned long sbnr, ebnr;
706 sector_t esector, nr_sectors;
707
708 if (mdev->state.disk == D_UP_TO_DATE)
0da34df0 709 return true;
8c387def 710 if (mdev->state.disk != D_INCONSISTENT)
0da34df0 711 return false;
b411b363 712 esector = sector + (size >> 9) - 1;
8ca9844f 713 nr_sectors = drbd_get_capacity(mdev->this_bdev);
b411b363
PR
714 D_ASSERT(sector < nr_sectors);
715 D_ASSERT(esector < nr_sectors);
716
717 sbnr = BM_SECT_TO_BIT(sector);
718 ebnr = BM_SECT_TO_BIT(esector);
719
0da34df0 720 return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
b411b363
PR
721}
722
6024fece
AG
723/*
724 * complete_conflicting_writes - wait for any conflicting write requests
725 *
726 * The write_requests tree contains all active write requests which we
727 * currently know about. Wait for any requests to complete which conflict with
728 * the new one.
729 */
730static int complete_conflicting_writes(struct drbd_conf *mdev,
731 sector_t sector, int size)
732{
733 for(;;) {
6024fece 734 struct drbd_interval *i;
7be8da07 735 int err;
6024fece
AG
736
737 i = drbd_find_overlap(&mdev->write_requests, sector, size);
738 if (!i)
739 return 0;
7be8da07
AG
740 err = drbd_wait_misc(mdev, i);
741 if (err)
742 return err;
6024fece
AG
743 }
744}
745
7be8da07 746int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
b411b363
PR
747{
748 const int rw = bio_rw(bio);
749 const int size = bio->bi_size;
750 const sector_t sector = bio->bi_sector;
751 struct drbd_tl_epoch *b = NULL;
752 struct drbd_request *req;
44ed167d 753 struct net_conf *nc;
73a01a18 754 int local, remote, send_oos = 0;
6024fece 755 int err;
9a25a04c 756 int ret = 0;
b411b363
PR
757
758 /* allocate outside of all locks; */
759 req = drbd_req_new(mdev, bio);
760 if (!req) {
761 dec_ap_bio(mdev);
762 /* only pass the error to the upper layers.
763 * if user cannot handle io errors, that's not our business. */
764 dev_err(DEV, "could not kmalloc() req\n");
765 bio_endio(bio, -ENOMEM);
766 return 0;
767 }
aeda1cd6 768 req->start_time = start_time;
b411b363 769
b411b363
PR
770 local = get_ldev(mdev);
771 if (!local) {
772 bio_put(req->private_bio); /* or we get a bio leak */
773 req->private_bio = NULL;
774 }
775 if (rw == WRITE) {
776 remote = 1;
777 } else {
778 /* READ || READA */
779 if (local) {
780 if (!drbd_may_do_local_read(mdev, sector, size)) {
781 /* we could kick the syncer to
782 * sync this extent asap, wait for
783 * it, then continue locally.
784 * Or just issue the request remotely.
785 */
786 local = 0;
787 bio_put(req->private_bio);
788 req->private_bio = NULL;
789 put_ldev(mdev);
790 }
791 }
792 remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
793 }
794
795 /* If we have a disk, but a READA request is mapped to remote,
796 * we are R_PRIMARY, D_INCONSISTENT, SyncTarget.
797 * Just fail that READA request right here.
798 *
799 * THINK: maybe fail all READA when not local?
800 * or make this configurable...
801 * if network is slow, READA won't do any good.
802 */
803 if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) {
804 err = -EWOULDBLOCK;
805 goto fail_and_free_req;
806 }
807
808 /* For WRITES going to the local disk, grab a reference on the target
809 * extent. This waits for any resync activity in the corresponding
810 * resync extent to finish, and, if necessary, pulls in the target
811 * extent into the activity log, which involves further disk io because
812 * of transactional on-disk meta data updates. */
0778286a
PR
813 if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
814 req->rq_state |= RQ_IN_ACT_LOG;
181286ad 815 drbd_al_begin_io(mdev, &req->i);
0778286a 816 }
b411b363 817
6a35c45f 818 remote = remote && drbd_should_do_remote(mdev->state);
8f7bed77 819 send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state);
3719094e 820 D_ASSERT(!(remote && send_oos));
b411b363 821
2aebfabb 822 if (!(local || remote) && !drbd_suspended(mdev)) {
fb2c7a10
LE
823 if (__ratelimit(&drbd_ratelimit_state))
824 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
6024fece 825 err = -EIO;
b411b363
PR
826 goto fail_free_complete;
827 }
828
829 /* For WRITE request, we have to make sure that we have an
830 * unused_spare_tle, in case we need to start a new epoch.
831 * I try to be smart and avoid to pre-allocate always "just in case",
832 * but there is a race between testing the bit and pointer outside the
833 * spinlock, and grabbing the spinlock.
834 * if we lost that race, we retry. */
73a01a18 835 if (rw == WRITE && (remote || send_oos) &&
87eeee41 836 mdev->tconn->unused_spare_tle == NULL &&
b411b363
PR
837 test_bit(CREATE_BARRIER, &mdev->flags)) {
838allocate_barrier:
839 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
840 if (!b) {
841 dev_err(DEV, "Failed to alloc barrier.\n");
842 err = -ENOMEM;
843 goto fail_free_complete;
844 }
845 }
846
847 /* GOOD, everything prepared, grab the spin_lock */
87eeee41 848 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 849
6024fece
AG
850 if (rw == WRITE) {
851 err = complete_conflicting_writes(mdev, sector, size);
852 if (err) {
7be8da07
AG
853 if (err != -ERESTARTSYS)
854 _conn_request_state(mdev->tconn,
855 NS(conn, C_TIMEOUT),
856 CS_HARD);
6024fece 857 spin_unlock_irq(&mdev->tconn->req_lock);
7be8da07 858 err = -EIO;
6024fece
AG
859 goto fail_free_complete;
860 }
861 }
862
2aebfabb 863 if (drbd_suspended(mdev)) {
9a25a04c
PR
864 /* If we got suspended, use the retry mechanism of
865 generic_make_request() to restart processing of this
2f58dcfc 866 bio. In the next call to drbd_make_request
9a25a04c
PR
867 we sleep in inc_ap_bio() */
868 ret = 1;
87eeee41 869 spin_unlock_irq(&mdev->tconn->req_lock);
9a25a04c
PR
870 goto fail_free_complete;
871 }
872
73a01a18 873 if (remote || send_oos) {
6a35c45f 874 remote = drbd_should_do_remote(mdev->state);
8f7bed77 875 send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state);
3719094e 876 D_ASSERT(!(remote && send_oos));
73a01a18
PR
877
878 if (!(remote || send_oos))
b411b363
PR
879 dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
880 if (!(local || remote)) {
881 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
87eeee41 882 spin_unlock_irq(&mdev->tconn->req_lock);
6024fece 883 err = -EIO;
b411b363
PR
884 goto fail_free_complete;
885 }
886 }
887
87eeee41
PR
888 if (b && mdev->tconn->unused_spare_tle == NULL) {
889 mdev->tconn->unused_spare_tle = b;
b411b363
PR
890 b = NULL;
891 }
73a01a18 892 if (rw == WRITE && (remote || send_oos) &&
87eeee41 893 mdev->tconn->unused_spare_tle == NULL &&
b411b363
PR
894 test_bit(CREATE_BARRIER, &mdev->flags)) {
895 /* someone closed the current epoch
896 * while we were grabbing the spinlock */
87eeee41 897 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
898 goto allocate_barrier;
899 }
900
901
902 /* Update disk stats */
903 _drbd_start_io_acct(mdev, req, bio);
904
905 /* _maybe_start_new_epoch(mdev);
906 * If we need to generate a write barrier packet, we have to add the
907 * new epoch (barrier) object, and queue the barrier packet for sending,
908 * and queue the req's data after it _within the same lock_, otherwise
909 * we have race conditions were the reorder domains could be mixed up.
910 *
911 * Even read requests may start a new epoch and queue the corresponding
912 * barrier packet. To get the write ordering right, we only have to
913 * make sure that, if this is a write request and it triggered a
914 * barrier packet, this request is queued within the same spinlock. */
87eeee41 915 if ((remote || send_oos) && mdev->tconn->unused_spare_tle &&
b411b363 916 test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
2f5cdd0b 917 _tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle);
87eeee41 918 mdev->tconn->unused_spare_tle = NULL;
b411b363
PR
919 } else {
920 D_ASSERT(!(remote && rw == WRITE &&
921 test_bit(CREATE_BARRIER, &mdev->flags)));
922 }
923
924 /* NOTE
925 * Actually, 'local' may be wrong here already, since we may have failed
926 * to write to the meta data, and may become wrong anytime because of
927 * local io-error for some other request, which would lead to us
928 * "detaching" the local disk.
929 *
930 * 'remote' may become wrong any time because the network could fail.
931 *
932 * This is a harmless race condition, though, since it is handled
933 * correctly at the appropriate places; so it just defers the failure
934 * of the respective operation.
935 */
936
937 /* mark them early for readability.
938 * this just sets some state flags. */
939 if (remote)
8554df1c 940 _req_mod(req, TO_BE_SENT);
b411b363 941 if (local)
8554df1c 942 _req_mod(req, TO_BE_SUBMITTED);
b411b363 943
87eeee41 944 list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests);
288f422e 945
b411b363
PR
946 /* NOTE remote first: to get the concurrent write detection right,
947 * we must register the request before start of local IO. */
948 if (remote) {
949 /* either WRITE and C_CONNECTED,
950 * or READ, and no local disk,
951 * or READ, but not in sync.
952 */
953 _req_mod(req, (rw == WRITE)
8554df1c
AG
954 ? QUEUE_FOR_NET_WRITE
955 : QUEUE_FOR_NET_READ);
b411b363 956 }
73a01a18 957 if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
8554df1c 958 _req_mod(req, QUEUE_FOR_SEND_OOS);
67531718 959
44ed167d
PR
960 rcu_read_lock();
961 nc = rcu_dereference(mdev->tconn->net_conf);
73a01a18 962 if (remote &&
44ed167d 963 nc->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) {
67531718
PR
964 int congested = 0;
965
44ed167d
PR
966 if (nc->cong_fill &&
967 atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) {
67531718
PR
968 dev_info(DEV, "Congestion-fill threshold reached\n");
969 congested = 1;
970 }
971
44ed167d 972 if (mdev->act_log->used >= nc->cong_extents) {
67531718
PR
973 dev_info(DEV, "Congestion-extents threshold reached\n");
974 congested = 1;
975 }
976
71c78cfb 977 if (congested) {
039312b6 978 queue_barrier(mdev); /* last barrier, after mirrored writes */
73a01a18 979
44ed167d 980 if (nc->on_congestion == OC_PULL_AHEAD)
67531718 981 _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
44ed167d 982 else /*nc->on_congestion == OC_DISCONNECT */
67531718
PR
983 _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
984 }
985 }
44ed167d 986 rcu_read_unlock();
67531718 987
87eeee41 988 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
989 kfree(b); /* if someone else has beaten us to it... */
990
991 if (local) {
992 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
993
6719fb03
LE
994 /* State may have changed since we grabbed our reference on the
995 * mdev->ldev member. Double check, and short-circuit to endio.
996 * In case the last activity log transaction failed to get on
997 * stable storage, and this is a WRITE, we may not even submit
998 * this bio. */
999 if (get_ldev(mdev)) {
0cf9d27e
AG
1000 if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
1001 : rw == READ ? DRBD_FAULT_DT_RD
1002 : DRBD_FAULT_DT_RA))
6719fb03
LE
1003 bio_endio(req->private_bio, -EIO);
1004 else
1005 generic_make_request(req->private_bio);
1006 put_ldev(mdev);
1007 } else
b411b363 1008 bio_endio(req->private_bio, -EIO);
b411b363
PR
1009 }
1010
b411b363
PR
1011 return 0;
1012
1013fail_free_complete:
76727f68 1014 if (req->rq_state & RQ_IN_ACT_LOG)
181286ad 1015 drbd_al_complete_io(mdev, &req->i);
b411b363
PR
1016fail_and_free_req:
1017 if (local) {
1018 bio_put(req->private_bio);
1019 req->private_bio = NULL;
1020 put_ldev(mdev);
1021 }
9a25a04c
PR
1022 if (!ret)
1023 bio_endio(bio, err);
1024
b411b363
PR
1025 drbd_req_free(req);
1026 dec_ap_bio(mdev);
1027 kfree(b);
1028
9a25a04c 1029 return ret;
b411b363
PR
1030}
1031
2f58dcfc 1032int drbd_make_request(struct request_queue *q, struct bio *bio)
b411b363 1033{
b411b363 1034 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
aeda1cd6 1035 unsigned long start_time;
b411b363 1036
aeda1cd6
PR
1037 start_time = jiffies;
1038
b411b363
PR
1039 /*
1040 * what we "blindly" assume:
1041 */
1042 D_ASSERT(bio->bi_size > 0);
c670a398 1043 D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
b411b363 1044
23361cf3
LE
1045 inc_ap_bio(mdev);
1046 return __drbd_make_request(mdev, bio, start_time);
b411b363
PR
1047}
1048
23361cf3
LE
1049/* This is called by bio_add_page().
1050 *
1051 * q->max_hw_sectors and other global limits are already enforced there.
b411b363 1052 *
23361cf3
LE
1053 * We need to call down to our lower level device,
1054 * in case it has special restrictions.
1055 *
1056 * We also may need to enforce configured max-bio-bvecs limits.
b411b363
PR
1057 *
1058 * As long as the BIO is empty we have to allow at least one bvec,
23361cf3 1059 * regardless of size and offset, so no need to ask lower levels.
b411b363
PR
1060 */
1061int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
1062{
1063 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
b411b363 1064 unsigned int bio_size = bvm->bi_size;
23361cf3
LE
1065 int limit = DRBD_MAX_BIO_SIZE;
1066 int backing_limit;
1067
1068 if (bio_size && get_ldev(mdev)) {
b411b363
PR
1069 struct request_queue * const b =
1070 mdev->ldev->backing_bdev->bd_disk->queue;
a1c88d0d 1071 if (b->merge_bvec_fn) {
b411b363
PR
1072 backing_limit = b->merge_bvec_fn(b, bvm, bvec);
1073 limit = min(limit, backing_limit);
1074 }
1075 put_ldev(mdev);
1076 }
1077 return limit;
1078}
7fde2be9
PR
1079
1080void request_timer_fn(unsigned long data)
1081{
1082 struct drbd_conf *mdev = (struct drbd_conf *) data;
8b924f1d 1083 struct drbd_tconn *tconn = mdev->tconn;
7fde2be9
PR
1084 struct drbd_request *req; /* oldest request */
1085 struct list_head *le;
44ed167d 1086 struct net_conf *nc;
3b03ad59 1087 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
44ed167d
PR
1088
1089 rcu_read_lock();
1090 nc = rcu_dereference(tconn->net_conf);
cdfda633
PR
1091 ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
1092
1093 if (get_ldev(mdev)) {
1094 dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
1095 put_ldev(mdev);
1096 }
44ed167d 1097 rcu_read_unlock();
7fde2be9 1098
cdfda633
PR
1099 et = min_not_zero(dt, ent);
1100
1101 if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
7fde2be9
PR
1102 return; /* Recurring timer stopped */
1103
8b924f1d
PR
1104 spin_lock_irq(&tconn->req_lock);
1105 le = &tconn->oldest_tle->requests;
7fde2be9 1106 if (list_empty(le)) {
8b924f1d 1107 spin_unlock_irq(&tconn->req_lock);
7fde2be9
PR
1108 mod_timer(&mdev->request_timer, jiffies + et);
1109 return;
1110 }
1111
1112 le = le->prev;
1113 req = list_entry(le, struct drbd_request, tl_requests);
cdfda633
PR
1114 if (ent && req->rq_state & RQ_NET_PENDING) {
1115 if (time_is_before_eq_jiffies(req->start_time + ent)) {
7fde2be9 1116 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
cdfda633
PR
1117 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1118 }
1119 }
1120 if (dt && req->rq_state & RQ_LOCAL_PENDING) {
1121 if (time_is_before_eq_jiffies(req->start_time + dt)) {
1122 dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
1123 __drbd_chk_io_error(mdev, 1);
7fde2be9 1124 }
7fde2be9 1125 }
3b03ad59 1126 nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
8b924f1d 1127 spin_unlock_irq(&tconn->req_lock);
3b03ad59 1128 mod_timer(&mdev->request_timer, nt);
7fde2be9 1129}
This page took 0.189259 seconds and 5 git commands to generate.