block: simplify empty barrier implementation
[deliverable/linux.git] / block / blk-barrier.c
1 /*
2 * Functions related to barrier IO handling
3 */
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/bio.h>
7 #include <linux/blkdev.h>
8
9 #include "blk.h"
10
11 /**
12 * blk_queue_ordered - does this queue support ordered writes
13 * @q: the request queue
14 * @ordered: one of QUEUE_ORDERED_*
15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
16 *
17 * Description:
18 * For journalled file systems, doing ordered writes on a commit
19 * block instead of explicitly doing wait_on_buffer (which is bad
20 * for performance) can be a big win. Block drivers supporting this
21 * feature should call this function and indicate so.
22 *
23 **/
24 int blk_queue_ordered(struct request_queue *q, unsigned ordered,
25 prepare_flush_fn *prepare_flush_fn)
26 {
27 if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
28 QUEUE_ORDERED_DO_POSTFLUSH))) {
29 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
30 return -EINVAL;
31 }
32
33 if (ordered != QUEUE_ORDERED_NONE &&
34 ordered != QUEUE_ORDERED_DRAIN &&
35 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
36 ordered != QUEUE_ORDERED_DRAIN_FUA &&
37 ordered != QUEUE_ORDERED_TAG &&
38 ordered != QUEUE_ORDERED_TAG_FLUSH &&
39 ordered != QUEUE_ORDERED_TAG_FUA) {
40 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
41 return -EINVAL;
42 }
43
44 q->ordered = ordered;
45 q->next_ordered = ordered;
46 q->prepare_flush_fn = prepare_flush_fn;
47
48 return 0;
49 }
50 EXPORT_SYMBOL(blk_queue_ordered);
51
52 /*
53 * Cache flushing for ordered writes handling
54 */
55 unsigned blk_ordered_cur_seq(struct request_queue *q)
56 {
57 if (!q->ordseq)
58 return 0;
59 return 1 << ffz(q->ordseq);
60 }
61
62 unsigned blk_ordered_req_seq(struct request *rq)
63 {
64 struct request_queue *q = rq->q;
65
66 BUG_ON(q->ordseq == 0);
67
68 if (rq == &q->pre_flush_rq)
69 return QUEUE_ORDSEQ_PREFLUSH;
70 if (rq == &q->bar_rq)
71 return QUEUE_ORDSEQ_BAR;
72 if (rq == &q->post_flush_rq)
73 return QUEUE_ORDSEQ_POSTFLUSH;
74
75 /*
76 * !fs requests don't need to follow barrier ordering. Always
77 * put them at the front. This fixes the following deadlock.
78 *
79 * http://thread.gmane.org/gmane.linux.kernel/537473
80 */
81 if (!blk_fs_request(rq))
82 return QUEUE_ORDSEQ_DRAIN;
83
84 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
85 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
86 return QUEUE_ORDSEQ_DRAIN;
87 else
88 return QUEUE_ORDSEQ_DONE;
89 }
90
91 bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
92 {
93 struct request *rq;
94
95 if (error && !q->orderr)
96 q->orderr = error;
97
98 BUG_ON(q->ordseq & seq);
99 q->ordseq |= seq;
100
101 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
102 return false;
103
104 /*
105 * Okay, sequence complete.
106 */
107 q->ordseq = 0;
108 rq = q->orig_bar_rq;
109
110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
111 BUG();
112
113 return true;
114 }
115
116 static void pre_flush_end_io(struct request *rq, int error)
117 {
118 elv_completed_request(rq->q, rq);
119 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
120 }
121
122 static void bar_end_io(struct request *rq, int error)
123 {
124 elv_completed_request(rq->q, rq);
125 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
126 }
127
128 static void post_flush_end_io(struct request *rq, int error)
129 {
130 elv_completed_request(rq->q, rq);
131 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
132 }
133
134 static void queue_flush(struct request_queue *q, unsigned which)
135 {
136 struct request *rq;
137 rq_end_io_fn *end_io;
138
139 if (which == QUEUE_ORDERED_DO_PREFLUSH) {
140 rq = &q->pre_flush_rq;
141 end_io = pre_flush_end_io;
142 } else {
143 rq = &q->post_flush_rq;
144 end_io = post_flush_end_io;
145 }
146
147 blk_rq_init(q, rq);
148 rq->cmd_flags = REQ_HARDBARRIER;
149 rq->rq_disk = q->bar_rq.rq_disk;
150 rq->end_io = end_io;
151 q->prepare_flush_fn(q, rq);
152
153 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
154 }
155
156 static inline bool start_ordered(struct request_queue *q, struct request **rqp)
157 {
158 struct request *rq = *rqp;
159 unsigned skip = 0;
160
161 q->orderr = 0;
162 q->ordered = q->next_ordered;
163 q->ordseq |= QUEUE_ORDSEQ_STARTED;
164
165 /*
166 * For an empty barrier, there's no actual BAR request, which
167 * in turn makes POSTFLUSH unnecessary. Mask them off.
168 */
169 if (!rq->hard_nr_sectors)
170 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
171 QUEUE_ORDERED_DO_POSTFLUSH);
172
173 /* stash away the original request */
174 elv_dequeue_request(q, rq);
175 q->orig_bar_rq = rq;
176 rq = NULL;
177
178 /*
179 * Queue ordered sequence. As we stack them at the head, we
180 * need to queue in reverse order. Note that we rely on that
181 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
182 * request gets inbetween ordered sequence.
183 */
184 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
185 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
186 rq = &q->post_flush_rq;
187 } else
188 skip |= QUEUE_ORDSEQ_POSTFLUSH;
189
190 if (q->ordered & QUEUE_ORDERED_DO_BAR) {
191 rq = &q->bar_rq;
192
193 /* initialize proxy request and queue it */
194 blk_rq_init(q, rq);
195 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
196 rq->cmd_flags |= REQ_RW;
197 if (q->ordered & QUEUE_ORDERED_DO_FUA)
198 rq->cmd_flags |= REQ_FUA;
199 init_request_from_bio(rq, q->orig_bar_rq->bio);
200 rq->end_io = bar_end_io;
201
202 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
203 } else
204 skip |= QUEUE_ORDSEQ_BAR;
205
206 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
207 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
208 rq = &q->pre_flush_rq;
209 } else
210 skip |= QUEUE_ORDSEQ_PREFLUSH;
211
212 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
213 rq = NULL;
214 else
215 skip |= QUEUE_ORDSEQ_DRAIN;
216
217 *rqp = rq;
218
219 /*
220 * Complete skipped sequences. If whole sequence is complete,
221 * return false to tell elevator that this request is gone.
222 */
223 return !blk_ordered_complete_seq(q, skip, 0);
224 }
225
226 bool blk_do_ordered(struct request_queue *q, struct request **rqp)
227 {
228 struct request *rq = *rqp;
229 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
230
231 if (!q->ordseq) {
232 if (!is_barrier)
233 return true;
234
235 if (q->next_ordered != QUEUE_ORDERED_NONE)
236 return start_ordered(q, rqp);
237 else {
238 /*
239 * Queue ordering not supported. Terminate
240 * with prejudice.
241 */
242 elv_dequeue_request(q, rq);
243 if (__blk_end_request(rq, -EOPNOTSUPP,
244 blk_rq_bytes(rq)))
245 BUG();
246 *rqp = NULL;
247 return false;
248 }
249 }
250
251 /*
252 * Ordered sequence in progress
253 */
254
255 /* Special requests are not subject to ordering rules. */
256 if (!blk_fs_request(rq) &&
257 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
258 return true;
259
260 if (q->ordered & QUEUE_ORDERED_BY_TAG) {
261 /* Ordered by tag. Blocking the next barrier is enough. */
262 if (is_barrier && rq != &q->bar_rq)
263 *rqp = NULL;
264 } else {
265 /* Ordered by draining. Wait for turn. */
266 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
267 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
268 *rqp = NULL;
269 }
270
271 return true;
272 }
273
274 static void bio_end_empty_barrier(struct bio *bio, int err)
275 {
276 if (err) {
277 if (err == -EOPNOTSUPP)
278 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
279 clear_bit(BIO_UPTODATE, &bio->bi_flags);
280 }
281
282 complete(bio->bi_private);
283 }
284
285 /**
286 * blkdev_issue_flush - queue a flush
287 * @bdev: blockdev to issue flush for
288 * @error_sector: error sector
289 *
290 * Description:
291 * Issue a flush for the block device in question. Caller can supply
292 * room for storing the error offset in case of a flush error, if they
293 * wish to. Caller must run wait_for_completion() on its own.
294 */
295 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
296 {
297 DECLARE_COMPLETION_ONSTACK(wait);
298 struct request_queue *q;
299 struct bio *bio;
300 int ret;
301
302 if (bdev->bd_disk == NULL)
303 return -ENXIO;
304
305 q = bdev_get_queue(bdev);
306 if (!q)
307 return -ENXIO;
308
309 bio = bio_alloc(GFP_KERNEL, 0);
310 if (!bio)
311 return -ENOMEM;
312
313 bio->bi_end_io = bio_end_empty_barrier;
314 bio->bi_private = &wait;
315 bio->bi_bdev = bdev;
316 submit_bio(WRITE_BARRIER, bio);
317
318 wait_for_completion(&wait);
319
320 /*
321 * The driver must store the error location in ->bi_sector, if
322 * it supports it. For non-stacked drivers, this should be copied
323 * from rq->sector.
324 */
325 if (error_sector)
326 *error_sector = bio->bi_sector;
327
328 ret = 0;
329 if (bio_flagged(bio, BIO_EOPNOTSUPP))
330 ret = -EOPNOTSUPP;
331 else if (!bio_flagged(bio, BIO_UPTODATE))
332 ret = -EIO;
333
334 bio_put(bio);
335 return ret;
336 }
337 EXPORT_SYMBOL(blkdev_issue_flush);
338
339 static void blkdev_discard_end_io(struct bio *bio, int err)
340 {
341 if (err) {
342 if (err == -EOPNOTSUPP)
343 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
344 clear_bit(BIO_UPTODATE, &bio->bi_flags);
345 }
346
347 bio_put(bio);
348 }
349
350 /**
351 * blkdev_issue_discard - queue a discard
352 * @bdev: blockdev to issue discard for
353 * @sector: start sector
354 * @nr_sects: number of sectors to discard
355 * @gfp_mask: memory allocation flags (for bio_alloc)
356 *
357 * Description:
358 * Issue a discard request for the sectors in question. Does not wait.
359 */
360 int blkdev_issue_discard(struct block_device *bdev,
361 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
362 {
363 struct request_queue *q;
364 struct bio *bio;
365 int ret = 0;
366
367 if (bdev->bd_disk == NULL)
368 return -ENXIO;
369
370 q = bdev_get_queue(bdev);
371 if (!q)
372 return -ENXIO;
373
374 if (!q->prepare_discard_fn)
375 return -EOPNOTSUPP;
376
377 while (nr_sects && !ret) {
378 bio = bio_alloc(gfp_mask, 0);
379 if (!bio)
380 return -ENOMEM;
381
382 bio->bi_end_io = blkdev_discard_end_io;
383 bio->bi_bdev = bdev;
384
385 bio->bi_sector = sector;
386
387 if (nr_sects > q->max_hw_sectors) {
388 bio->bi_size = q->max_hw_sectors << 9;
389 nr_sects -= q->max_hw_sectors;
390 sector += q->max_hw_sectors;
391 } else {
392 bio->bi_size = nr_sects << 9;
393 nr_sects = 0;
394 }
395 bio_get(bio);
396 submit_bio(DISCARD_BARRIER, bio);
397
398 /* Check if it failed immediately */
399 if (bio_flagged(bio, BIO_EOPNOTSUPP))
400 ret = -EOPNOTSUPP;
401 else if (!bio_flagged(bio, BIO_UPTODATE))
402 ret = -EIO;
403 bio_put(bio);
404 }
405 return ret;
406 }
407 EXPORT_SYMBOL(blkdev_issue_discard);
This page took 0.040027 seconds and 5 git commands to generate.