Btrfs: Fix checkpatch.pl warnings
[deliverable/linux.git] / fs / btrfs / extent_io.c
CommitLineData
d1310b2e
CM
1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/version.h>
13#include <linux/writeback.h>
14#include <linux/pagevec.h>
15#include "extent_io.h"
16#include "extent_map.h"
2db04966 17#include "compat.h"
902b22f3
DW
18#include "ctree.h"
19#include "btrfs_inode.h"
d1310b2e
CM
20
21/* temporary define until extent_map moves out of btrfs */
22struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
23 unsigned long extra_flags,
24 void (*ctor)(void *, struct kmem_cache *,
25 unsigned long));
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29
30static LIST_HEAD(buffers);
31static LIST_HEAD(states);
4bef0848 32
b47eda86 33#define LEAK_DEBUG 0
4bef0848 34#ifdef LEAK_DEBUG
d397712b 35static DEFINE_SPINLOCK(leak_lock);
4bef0848 36#endif
d1310b2e 37
d1310b2e
CM
38#define BUFFER_LRU_MAX 64
39
40struct tree_entry {
41 u64 start;
42 u64 end;
d1310b2e
CM
43 struct rb_node rb_node;
44};
45
46struct extent_page_data {
47 struct bio *bio;
48 struct extent_io_tree *tree;
49 get_extent_t *get_extent;
771ed689
CM
50
51 /* tells writepage not to lock the state bits for this range
52 * it still does the unlocking
53 */
54 int extent_locked;
d1310b2e
CM
55};
56
57int __init extent_io_init(void)
58{
59 extent_state_cache = btrfs_cache_create("extent_state",
60 sizeof(struct extent_state), 0,
61 NULL);
62 if (!extent_state_cache)
63 return -ENOMEM;
64
65 extent_buffer_cache = btrfs_cache_create("extent_buffers",
66 sizeof(struct extent_buffer), 0,
67 NULL);
68 if (!extent_buffer_cache)
69 goto free_state_cache;
70 return 0;
71
72free_state_cache:
73 kmem_cache_destroy(extent_state_cache);
74 return -ENOMEM;
75}
76
77void extent_io_exit(void)
78{
79 struct extent_state *state;
2d2ae547 80 struct extent_buffer *eb;
d1310b2e
CM
81
82 while (!list_empty(&states)) {
2d2ae547 83 state = list_entry(states.next, struct extent_state, leak_list);
d397712b
CM
84 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
85 "state %lu in tree %p refs %d\n",
86 (unsigned long long)state->start,
87 (unsigned long long)state->end,
88 state->state, state->tree, atomic_read(&state->refs));
2d2ae547 89 list_del(&state->leak_list);
d1310b2e
CM
90 kmem_cache_free(extent_state_cache, state);
91
92 }
93
2d2ae547
CM
94 while (!list_empty(&buffers)) {
95 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
d397712b
CM
96 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
97 "refs %d\n", (unsigned long long)eb->start,
98 eb->len, atomic_read(&eb->refs));
2d2ae547
CM
99 list_del(&eb->leak_list);
100 kmem_cache_free(extent_buffer_cache, eb);
101 }
d1310b2e
CM
102 if (extent_state_cache)
103 kmem_cache_destroy(extent_state_cache);
104 if (extent_buffer_cache)
105 kmem_cache_destroy(extent_buffer_cache);
106}
107
108void extent_io_tree_init(struct extent_io_tree *tree,
109 struct address_space *mapping, gfp_t mask)
110{
111 tree->state.rb_node = NULL;
6af118ce 112 tree->buffer.rb_node = NULL;
d1310b2e
CM
113 tree->ops = NULL;
114 tree->dirty_bytes = 0;
70dec807 115 spin_lock_init(&tree->lock);
6af118ce 116 spin_lock_init(&tree->buffer_lock);
d1310b2e 117 tree->mapping = mapping;
d1310b2e
CM
118}
119EXPORT_SYMBOL(extent_io_tree_init);
120
b2950863 121static struct extent_state *alloc_extent_state(gfp_t mask)
d1310b2e
CM
122{
123 struct extent_state *state;
4bef0848 124#ifdef LEAK_DEBUG
2d2ae547 125 unsigned long flags;
4bef0848 126#endif
d1310b2e
CM
127
128 state = kmem_cache_alloc(extent_state_cache, mask);
2b114d1d 129 if (!state)
d1310b2e
CM
130 return state;
131 state->state = 0;
d1310b2e 132 state->private = 0;
70dec807 133 state->tree = NULL;
4bef0848 134#ifdef LEAK_DEBUG
2d2ae547
CM
135 spin_lock_irqsave(&leak_lock, flags);
136 list_add(&state->leak_list, &states);
137 spin_unlock_irqrestore(&leak_lock, flags);
4bef0848 138#endif
d1310b2e
CM
139 atomic_set(&state->refs, 1);
140 init_waitqueue_head(&state->wq);
141 return state;
142}
143EXPORT_SYMBOL(alloc_extent_state);
144
b2950863 145static void free_extent_state(struct extent_state *state)
d1310b2e 146{
d1310b2e
CM
147 if (!state)
148 return;
149 if (atomic_dec_and_test(&state->refs)) {
4bef0848 150#ifdef LEAK_DEBUG
2d2ae547 151 unsigned long flags;
4bef0848 152#endif
70dec807 153 WARN_ON(state->tree);
4bef0848 154#ifdef LEAK_DEBUG
2d2ae547
CM
155 spin_lock_irqsave(&leak_lock, flags);
156 list_del(&state->leak_list);
157 spin_unlock_irqrestore(&leak_lock, flags);
4bef0848 158#endif
d1310b2e
CM
159 kmem_cache_free(extent_state_cache, state);
160 }
161}
162EXPORT_SYMBOL(free_extent_state);
163
164static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
165 struct rb_node *node)
166{
d397712b
CM
167 struct rb_node **p = &root->rb_node;
168 struct rb_node *parent = NULL;
d1310b2e
CM
169 struct tree_entry *entry;
170
d397712b 171 while (*p) {
d1310b2e
CM
172 parent = *p;
173 entry = rb_entry(parent, struct tree_entry, rb_node);
174
175 if (offset < entry->start)
176 p = &(*p)->rb_left;
177 else if (offset > entry->end)
178 p = &(*p)->rb_right;
179 else
180 return parent;
181 }
182
183 entry = rb_entry(node, struct tree_entry, rb_node);
d1310b2e
CM
184 rb_link_node(node, parent, p);
185 rb_insert_color(node, root);
186 return NULL;
187}
188
80ea96b1 189static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
d1310b2e
CM
190 struct rb_node **prev_ret,
191 struct rb_node **next_ret)
192{
80ea96b1 193 struct rb_root *root = &tree->state;
d397712b 194 struct rb_node *n = root->rb_node;
d1310b2e
CM
195 struct rb_node *prev = NULL;
196 struct rb_node *orig_prev = NULL;
197 struct tree_entry *entry;
198 struct tree_entry *prev_entry = NULL;
199
d397712b 200 while (n) {
d1310b2e
CM
201 entry = rb_entry(n, struct tree_entry, rb_node);
202 prev = n;
203 prev_entry = entry;
204
205 if (offset < entry->start)
206 n = n->rb_left;
207 else if (offset > entry->end)
208 n = n->rb_right;
d397712b 209 else
d1310b2e
CM
210 return n;
211 }
212
213 if (prev_ret) {
214 orig_prev = prev;
d397712b 215 while (prev && offset > prev_entry->end) {
d1310b2e
CM
216 prev = rb_next(prev);
217 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
218 }
219 *prev_ret = prev;
220 prev = orig_prev;
221 }
222
223 if (next_ret) {
224 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
d397712b 225 while (prev && offset < prev_entry->start) {
d1310b2e
CM
226 prev = rb_prev(prev);
227 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
228 }
229 *next_ret = prev;
230 }
231 return NULL;
232}
233
80ea96b1
CM
234static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 u64 offset)
d1310b2e 236{
70dec807 237 struct rb_node *prev = NULL;
d1310b2e 238 struct rb_node *ret;
70dec807 239
80ea96b1 240 ret = __etree_search(tree, offset, &prev, NULL);
d397712b 241 if (!ret)
d1310b2e
CM
242 return prev;
243 return ret;
244}
245
6af118ce
CM
246static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
247 u64 offset, struct rb_node *node)
248{
249 struct rb_root *root = &tree->buffer;
d397712b
CM
250 struct rb_node **p = &root->rb_node;
251 struct rb_node *parent = NULL;
6af118ce
CM
252 struct extent_buffer *eb;
253
d397712b 254 while (*p) {
6af118ce
CM
255 parent = *p;
256 eb = rb_entry(parent, struct extent_buffer, rb_node);
257
258 if (offset < eb->start)
259 p = &(*p)->rb_left;
260 else if (offset > eb->start)
261 p = &(*p)->rb_right;
262 else
263 return eb;
264 }
265
266 rb_link_node(node, parent, p);
267 rb_insert_color(node, root);
268 return NULL;
269}
270
271static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
272 u64 offset)
273{
274 struct rb_root *root = &tree->buffer;
d397712b 275 struct rb_node *n = root->rb_node;
6af118ce
CM
276 struct extent_buffer *eb;
277
d397712b 278 while (n) {
6af118ce
CM
279 eb = rb_entry(n, struct extent_buffer, rb_node);
280 if (offset < eb->start)
281 n = n->rb_left;
282 else if (offset > eb->start)
283 n = n->rb_right;
284 else
285 return eb;
286 }
287 return NULL;
288}
289
d1310b2e
CM
290/*
291 * utility function to look for merge candidates inside a given range.
292 * Any extents with matching state are merged together into a single
293 * extent in the tree. Extents with EXTENT_IO in their state field
294 * are not merged because the end_io handlers need to be able to do
295 * operations on them without sleeping (or doing allocations/splits).
296 *
297 * This should be called with the tree lock held.
298 */
299static int merge_state(struct extent_io_tree *tree,
300 struct extent_state *state)
301{
302 struct extent_state *other;
303 struct rb_node *other_node;
304
5b21f2ed 305 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
d1310b2e
CM
306 return 0;
307
308 other_node = rb_prev(&state->rb_node);
309 if (other_node) {
310 other = rb_entry(other_node, struct extent_state, rb_node);
311 if (other->end == state->start - 1 &&
312 other->state == state->state) {
313 state->start = other->start;
70dec807 314 other->tree = NULL;
d1310b2e
CM
315 rb_erase(&other->rb_node, &tree->state);
316 free_extent_state(other);
317 }
318 }
319 other_node = rb_next(&state->rb_node);
320 if (other_node) {
321 other = rb_entry(other_node, struct extent_state, rb_node);
322 if (other->start == state->end + 1 &&
323 other->state == state->state) {
324 other->start = state->start;
70dec807 325 state->tree = NULL;
d1310b2e
CM
326 rb_erase(&state->rb_node, &tree->state);
327 free_extent_state(state);
328 }
329 }
330 return 0;
331}
332
291d673e
CM
333static void set_state_cb(struct extent_io_tree *tree,
334 struct extent_state *state,
335 unsigned long bits)
336{
337 if (tree->ops && tree->ops->set_bit_hook) {
338 tree->ops->set_bit_hook(tree->mapping->host, state->start,
b0c68f8b 339 state->end, state->state, bits);
291d673e
CM
340 }
341}
342
343static void clear_state_cb(struct extent_io_tree *tree,
344 struct extent_state *state,
345 unsigned long bits)
346{
c584482b 347 if (tree->ops && tree->ops->clear_bit_hook) {
291d673e 348 tree->ops->clear_bit_hook(tree->mapping->host, state->start,
b0c68f8b 349 state->end, state->state, bits);
291d673e
CM
350 }
351}
352
d1310b2e
CM
353/*
354 * insert an extent_state struct into the tree. 'bits' are set on the
355 * struct before it is inserted.
356 *
357 * This may return -EEXIST if the extent is already there, in which case the
358 * state struct is freed.
359 *
360 * The tree lock is not taken internally. This is a utility function and
361 * probably isn't what you want to call (see set/clear_extent_bit).
362 */
363static int insert_state(struct extent_io_tree *tree,
364 struct extent_state *state, u64 start, u64 end,
365 int bits)
366{
367 struct rb_node *node;
368
369 if (end < start) {
d397712b
CM
370 printk(KERN_ERR "btrfs end < start %llu %llu\n",
371 (unsigned long long)end,
372 (unsigned long long)start);
d1310b2e
CM
373 WARN_ON(1);
374 }
375 if (bits & EXTENT_DIRTY)
376 tree->dirty_bytes += end - start + 1;
b0c68f8b 377 set_state_cb(tree, state, bits);
d1310b2e
CM
378 state->state |= bits;
379 state->start = start;
380 state->end = end;
381 node = tree_insert(&tree->state, end, &state->rb_node);
382 if (node) {
383 struct extent_state *found;
384 found = rb_entry(node, struct extent_state, rb_node);
d397712b
CM
385 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
386 "%llu %llu\n", (unsigned long long)found->start,
387 (unsigned long long)found->end,
388 (unsigned long long)start, (unsigned long long)end);
d1310b2e
CM
389 free_extent_state(state);
390 return -EEXIST;
391 }
70dec807 392 state->tree = tree;
d1310b2e
CM
393 merge_state(tree, state);
394 return 0;
395}
396
397/*
398 * split a given extent state struct in two, inserting the preallocated
399 * struct 'prealloc' as the newly created second half. 'split' indicates an
400 * offset inside 'orig' where it should be split.
401 *
402 * Before calling,
403 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
404 * are two extent state structs in the tree:
405 * prealloc: [orig->start, split - 1]
406 * orig: [ split, orig->end ]
407 *
408 * The tree locks are not taken by this function. They need to be held
409 * by the caller.
410 */
411static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
412 struct extent_state *prealloc, u64 split)
413{
414 struct rb_node *node;
415 prealloc->start = orig->start;
416 prealloc->end = split - 1;
417 prealloc->state = orig->state;
418 orig->start = split;
419
420 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
421 if (node) {
422 struct extent_state *found;
423 found = rb_entry(node, struct extent_state, rb_node);
d1310b2e
CM
424 free_extent_state(prealloc);
425 return -EEXIST;
426 }
70dec807 427 prealloc->tree = tree;
d1310b2e
CM
428 return 0;
429}
430
431/*
432 * utility function to clear some bits in an extent state struct.
433 * it will optionally wake up any one waiting on this state (wake == 1), or
434 * forcibly remove the state from the tree (delete == 1).
435 *
436 * If no bits are set on the state struct after clearing things, the
437 * struct is freed and removed from the tree
438 */
439static int clear_state_bit(struct extent_io_tree *tree,
440 struct extent_state *state, int bits, int wake,
441 int delete)
442{
443 int ret = state->state & bits;
444
445 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
446 u64 range = state->end - state->start + 1;
447 WARN_ON(range > tree->dirty_bytes);
448 tree->dirty_bytes -= range;
449 }
291d673e 450 clear_state_cb(tree, state, bits);
b0c68f8b 451 state->state &= ~bits;
d1310b2e
CM
452 if (wake)
453 wake_up(&state->wq);
454 if (delete || state->state == 0) {
70dec807 455 if (state->tree) {
ae9d1285 456 clear_state_cb(tree, state, state->state);
d1310b2e 457 rb_erase(&state->rb_node, &tree->state);
70dec807 458 state->tree = NULL;
d1310b2e
CM
459 free_extent_state(state);
460 } else {
461 WARN_ON(1);
462 }
463 } else {
464 merge_state(tree, state);
465 }
466 return ret;
467}
468
469/*
470 * clear some bits on a range in the tree. This may require splitting
471 * or inserting elements in the tree, so the gfp mask is used to
472 * indicate which allocations or sleeping are allowed.
473 *
474 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
475 * the given range from the tree regardless of state (ie for truncate).
476 *
477 * the range [start, end] is inclusive.
478 *
479 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
480 * bits were already set, or zero if none of the bits were already set.
481 */
482int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
483 int bits, int wake, int delete, gfp_t mask)
484{
485 struct extent_state *state;
486 struct extent_state *prealloc = NULL;
487 struct rb_node *node;
d1310b2e
CM
488 int err;
489 int set = 0;
490
491again:
492 if (!prealloc && (mask & __GFP_WAIT)) {
493 prealloc = alloc_extent_state(mask);
494 if (!prealloc)
495 return -ENOMEM;
496 }
497
cad321ad 498 spin_lock(&tree->lock);
d1310b2e
CM
499 /*
500 * this search will find the extents that end after
501 * our range starts
502 */
80ea96b1 503 node = tree_search(tree, start);
d1310b2e
CM
504 if (!node)
505 goto out;
506 state = rb_entry(node, struct extent_state, rb_node);
507 if (state->start > end)
508 goto out;
509 WARN_ON(state->end < start);
510
511 /*
512 * | ---- desired range ---- |
513 * | state | or
514 * | ------------- state -------------- |
515 *
516 * We need to split the extent we found, and may flip
517 * bits on second half.
518 *
519 * If the extent we found extends past our range, we
520 * just split and search again. It'll get split again
521 * the next time though.
522 *
523 * If the extent we found is inside our range, we clear
524 * the desired bit on it.
525 */
526
527 if (state->start < start) {
70dec807
CM
528 if (!prealloc)
529 prealloc = alloc_extent_state(GFP_ATOMIC);
d1310b2e
CM
530 err = split_state(tree, state, prealloc, start);
531 BUG_ON(err == -EEXIST);
532 prealloc = NULL;
533 if (err)
534 goto out;
535 if (state->end <= end) {
536 start = state->end + 1;
537 set |= clear_state_bit(tree, state, bits,
538 wake, delete);
539 } else {
540 start = state->start;
541 }
542 goto search_again;
543 }
544 /*
545 * | ---- desired range ---- |
546 * | state |
547 * We need to split the extent, and clear the bit
548 * on the first half
549 */
550 if (state->start <= end && state->end > end) {
70dec807
CM
551 if (!prealloc)
552 prealloc = alloc_extent_state(GFP_ATOMIC);
d1310b2e
CM
553 err = split_state(tree, state, prealloc, end + 1);
554 BUG_ON(err == -EEXIST);
555
556 if (wake)
557 wake_up(&state->wq);
558 set |= clear_state_bit(tree, prealloc, bits,
559 wake, delete);
560 prealloc = NULL;
561 goto out;
562 }
563
564 start = state->end + 1;
565 set |= clear_state_bit(tree, state, bits, wake, delete);
566 goto search_again;
567
568out:
cad321ad 569 spin_unlock(&tree->lock);
d1310b2e
CM
570 if (prealloc)
571 free_extent_state(prealloc);
572
573 return set;
574
575search_again:
576 if (start > end)
577 goto out;
cad321ad 578 spin_unlock(&tree->lock);
d1310b2e
CM
579 if (mask & __GFP_WAIT)
580 cond_resched();
581 goto again;
582}
583EXPORT_SYMBOL(clear_extent_bit);
584
585static int wait_on_state(struct extent_io_tree *tree,
586 struct extent_state *state)
641f5219
CH
587 __releases(tree->lock)
588 __acquires(tree->lock)
d1310b2e
CM
589{
590 DEFINE_WAIT(wait);
591 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
cad321ad 592 spin_unlock(&tree->lock);
d1310b2e 593 schedule();
cad321ad 594 spin_lock(&tree->lock);
d1310b2e
CM
595 finish_wait(&state->wq, &wait);
596 return 0;
597}
598
599/*
600 * waits for one or more bits to clear on a range in the state tree.
601 * The range [start, end] is inclusive.
602 * The tree lock is taken by this function
603 */
604int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
605{
606 struct extent_state *state;
607 struct rb_node *node;
608
cad321ad 609 spin_lock(&tree->lock);
d1310b2e
CM
610again:
611 while (1) {
612 /*
613 * this search will find all the extents that end after
614 * our range starts
615 */
80ea96b1 616 node = tree_search(tree, start);
d1310b2e
CM
617 if (!node)
618 break;
619
620 state = rb_entry(node, struct extent_state, rb_node);
621
622 if (state->start > end)
623 goto out;
624
625 if (state->state & bits) {
626 start = state->start;
627 atomic_inc(&state->refs);
628 wait_on_state(tree, state);
629 free_extent_state(state);
630 goto again;
631 }
632 start = state->end + 1;
633
634 if (start > end)
635 break;
636
637 if (need_resched()) {
cad321ad 638 spin_unlock(&tree->lock);
d1310b2e 639 cond_resched();
cad321ad 640 spin_lock(&tree->lock);
d1310b2e
CM
641 }
642 }
643out:
cad321ad 644 spin_unlock(&tree->lock);
d1310b2e
CM
645 return 0;
646}
647EXPORT_SYMBOL(wait_extent_bit);
648
649static void set_state_bits(struct extent_io_tree *tree,
650 struct extent_state *state,
651 int bits)
652{
653 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
654 u64 range = state->end - state->start + 1;
655 tree->dirty_bytes += range;
656 }
291d673e 657 set_state_cb(tree, state, bits);
b0c68f8b 658 state->state |= bits;
d1310b2e
CM
659}
660
661/*
662 * set some bits on a range in the tree. This may require allocations
663 * or sleeping, so the gfp mask is used to indicate what is allowed.
664 *
665 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
666 * range already has the desired bits set. The start of the existing
667 * range is returned in failed_start in this case.
668 *
669 * [start, end] is inclusive
670 * This takes the tree lock.
671 */
d397712b
CM
672static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
673 int bits, int exclusive, u64 *failed_start,
674 gfp_t mask)
d1310b2e
CM
675{
676 struct extent_state *state;
677 struct extent_state *prealloc = NULL;
678 struct rb_node *node;
d1310b2e
CM
679 int err = 0;
680 int set;
681 u64 last_start;
682 u64 last_end;
683again:
684 if (!prealloc && (mask & __GFP_WAIT)) {
685 prealloc = alloc_extent_state(mask);
686 if (!prealloc)
687 return -ENOMEM;
688 }
689
cad321ad 690 spin_lock(&tree->lock);
d1310b2e
CM
691 /*
692 * this search will find all the extents that end after
693 * our range starts.
694 */
80ea96b1 695 node = tree_search(tree, start);
d1310b2e
CM
696 if (!node) {
697 err = insert_state(tree, prealloc, start, end, bits);
698 prealloc = NULL;
699 BUG_ON(err == -EEXIST);
700 goto out;
701 }
702
703 state = rb_entry(node, struct extent_state, rb_node);
704 last_start = state->start;
705 last_end = state->end;
706
707 /*
708 * | ---- desired range ---- |
709 * | state |
710 *
711 * Just lock what we found and keep going
712 */
713 if (state->start == start && state->end <= end) {
714 set = state->state & bits;
715 if (set && exclusive) {
716 *failed_start = state->start;
717 err = -EEXIST;
718 goto out;
719 }
720 set_state_bits(tree, state, bits);
721 start = state->end + 1;
722 merge_state(tree, state);
723 goto search_again;
724 }
725
726 /*
727 * | ---- desired range ---- |
728 * | state |
729 * or
730 * | ------------- state -------------- |
731 *
732 * We need to split the extent we found, and may flip bits on
733 * second half.
734 *
735 * If the extent we found extends past our
736 * range, we just split and search again. It'll get split
737 * again the next time though.
738 *
739 * If the extent we found is inside our range, we set the
740 * desired bit on it.
741 */
742 if (state->start < start) {
743 set = state->state & bits;
744 if (exclusive && set) {
745 *failed_start = start;
746 err = -EEXIST;
747 goto out;
748 }
749 err = split_state(tree, state, prealloc, start);
750 BUG_ON(err == -EEXIST);
751 prealloc = NULL;
752 if (err)
753 goto out;
754 if (state->end <= end) {
755 set_state_bits(tree, state, bits);
756 start = state->end + 1;
757 merge_state(tree, state);
758 } else {
759 start = state->start;
760 }
761 goto search_again;
762 }
763 /*
764 * | ---- desired range ---- |
765 * | state | or | state |
766 *
767 * There's a hole, we need to insert something in it and
768 * ignore the extent we found.
769 */
770 if (state->start > start) {
771 u64 this_end;
772 if (end < last_start)
773 this_end = end;
774 else
d397712b 775 this_end = last_start - 1;
d1310b2e
CM
776 err = insert_state(tree, prealloc, start, this_end,
777 bits);
778 prealloc = NULL;
779 BUG_ON(err == -EEXIST);
780 if (err)
781 goto out;
782 start = this_end + 1;
783 goto search_again;
784 }
785 /*
786 * | ---- desired range ---- |
787 * | state |
788 * We need to split the extent, and set the bit
789 * on the first half
790 */
791 if (state->start <= end && state->end > end) {
792 set = state->state & bits;
793 if (exclusive && set) {
794 *failed_start = start;
795 err = -EEXIST;
796 goto out;
797 }
798 err = split_state(tree, state, prealloc, end + 1);
799 BUG_ON(err == -EEXIST);
800
801 set_state_bits(tree, prealloc, bits);
802 merge_state(tree, prealloc);
803 prealloc = NULL;
804 goto out;
805 }
806
807 goto search_again;
808
809out:
cad321ad 810 spin_unlock(&tree->lock);
d1310b2e
CM
811 if (prealloc)
812 free_extent_state(prealloc);
813
814 return err;
815
816search_again:
817 if (start > end)
818 goto out;
cad321ad 819 spin_unlock(&tree->lock);
d1310b2e
CM
820 if (mask & __GFP_WAIT)
821 cond_resched();
822 goto again;
823}
824EXPORT_SYMBOL(set_extent_bit);
825
826/* wrappers around set/clear extent bit */
827int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
828 gfp_t mask)
829{
830 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
831 mask);
832}
833EXPORT_SYMBOL(set_extent_dirty);
834
e6dcd2dc
CM
835int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
836 gfp_t mask)
837{
838 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
839}
840EXPORT_SYMBOL(set_extent_ordered);
841
d1310b2e
CM
842int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
843 int bits, gfp_t mask)
844{
845 return set_extent_bit(tree, start, end, bits, 0, NULL,
846 mask);
847}
848EXPORT_SYMBOL(set_extent_bits);
849
850int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
851 int bits, gfp_t mask)
852{
853 return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
854}
855EXPORT_SYMBOL(clear_extent_bits);
856
857int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
858 gfp_t mask)
859{
860 return set_extent_bit(tree, start, end,
e6dcd2dc
CM
861 EXTENT_DELALLOC | EXTENT_DIRTY,
862 0, NULL, mask);
d1310b2e
CM
863}
864EXPORT_SYMBOL(set_extent_delalloc);
865
866int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
867 gfp_t mask)
868{
869 return clear_extent_bit(tree, start, end,
870 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
871}
872EXPORT_SYMBOL(clear_extent_dirty);
873
e6dcd2dc
CM
874int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
875 gfp_t mask)
876{
877 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
878}
879EXPORT_SYMBOL(clear_extent_ordered);
880
d1310b2e
CM
881int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
882 gfp_t mask)
883{
884 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
885 mask);
886}
887EXPORT_SYMBOL(set_extent_new);
888
b2950863 889static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
d1310b2e
CM
890 gfp_t mask)
891{
892 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
893}
d1310b2e
CM
894
895int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
896 gfp_t mask)
897{
898 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
899 mask);
900}
901EXPORT_SYMBOL(set_extent_uptodate);
902
d397712b
CM
903static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
904 u64 end, gfp_t mask)
d1310b2e
CM
905{
906 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
907}
d1310b2e 908
b2950863 909static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
d1310b2e
CM
910 gfp_t mask)
911{
912 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
913 0, NULL, mask);
914}
d1310b2e 915
d397712b
CM
916static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
917 u64 end, gfp_t mask)
d1310b2e
CM
918{
919 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
920}
d1310b2e
CM
921
922int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
923{
924 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
925}
926EXPORT_SYMBOL(wait_on_extent_writeback);
927
d352ac68
CM
928/*
929 * either insert or lock state struct between start and end use mask to tell
930 * us if waiting is desired.
931 */
d1310b2e
CM
932int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
933{
934 int err;
935 u64 failed_start;
936 while (1) {
937 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
938 &failed_start, mask);
939 if (err == -EEXIST && (mask & __GFP_WAIT)) {
940 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
941 start = failed_start;
942 } else {
943 break;
944 }
945 WARN_ON(start > end);
946 }
947 return err;
948}
949EXPORT_SYMBOL(lock_extent);
950
25179201
JB
951int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
952 gfp_t mask)
953{
954 int err;
955 u64 failed_start;
956
957 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
958 &failed_start, mask);
6643558d
YZ
959 if (err == -EEXIST) {
960 if (failed_start > start)
961 clear_extent_bit(tree, start, failed_start - 1,
962 EXTENT_LOCKED, 1, 0, mask);
25179201 963 return 0;
6643558d 964 }
25179201
JB
965 return 1;
966}
967EXPORT_SYMBOL(try_lock_extent);
968
d1310b2e
CM
969int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
970 gfp_t mask)
971{
972 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
973}
974EXPORT_SYMBOL(unlock_extent);
975
976/*
977 * helper function to set pages and extents in the tree dirty
978 */
979int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
980{
981 unsigned long index = start >> PAGE_CACHE_SHIFT;
982 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
983 struct page *page;
984
985 while (index <= end_index) {
986 page = find_get_page(tree->mapping, index);
987 BUG_ON(!page);
988 __set_page_dirty_nobuffers(page);
989 page_cache_release(page);
990 index++;
991 }
992 set_extent_dirty(tree, start, end, GFP_NOFS);
993 return 0;
994}
995EXPORT_SYMBOL(set_range_dirty);
996
997/*
998 * helper function to set both pages and extents in the tree writeback
999 */
b2950863 1000static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
d1310b2e
CM
1001{
1002 unsigned long index = start >> PAGE_CACHE_SHIFT;
1003 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1004 struct page *page;
1005
1006 while (index <= end_index) {
1007 page = find_get_page(tree->mapping, index);
1008 BUG_ON(!page);
1009 set_page_writeback(page);
1010 page_cache_release(page);
1011 index++;
1012 }
1013 set_extent_writeback(tree, start, end, GFP_NOFS);
1014 return 0;
1015}
d1310b2e 1016
d352ac68
CM
1017/*
1018 * find the first offset in the io tree with 'bits' set. zero is
1019 * returned if we find something, and *start_ret and *end_ret are
1020 * set to reflect the state struct that was found.
1021 *
1022 * If nothing was found, 1 is returned, < 0 on error
1023 */
d1310b2e
CM
1024int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1025 u64 *start_ret, u64 *end_ret, int bits)
1026{
1027 struct rb_node *node;
1028 struct extent_state *state;
1029 int ret = 1;
1030
cad321ad 1031 spin_lock(&tree->lock);
d1310b2e
CM
1032 /*
1033 * this search will find all the extents that end after
1034 * our range starts.
1035 */
80ea96b1 1036 node = tree_search(tree, start);
d397712b 1037 if (!node)
d1310b2e 1038 goto out;
d1310b2e 1039
d397712b 1040 while (1) {
d1310b2e
CM
1041 state = rb_entry(node, struct extent_state, rb_node);
1042 if (state->end >= start && (state->state & bits)) {
1043 *start_ret = state->start;
1044 *end_ret = state->end;
1045 ret = 0;
1046 break;
1047 }
1048 node = rb_next(node);
1049 if (!node)
1050 break;
1051 }
1052out:
cad321ad 1053 spin_unlock(&tree->lock);
d1310b2e
CM
1054 return ret;
1055}
1056EXPORT_SYMBOL(find_first_extent_bit);
1057
d352ac68
CM
1058/* find the first state struct with 'bits' set after 'start', and
1059 * return it. tree->lock must be held. NULL will returned if
1060 * nothing was found after 'start'
1061 */
d7fc640e
CM
1062struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1063 u64 start, int bits)
1064{
1065 struct rb_node *node;
1066 struct extent_state *state;
1067
1068 /*
1069 * this search will find all the extents that end after
1070 * our range starts.
1071 */
1072 node = tree_search(tree, start);
d397712b 1073 if (!node)
d7fc640e 1074 goto out;
d7fc640e 1075
d397712b 1076 while (1) {
d7fc640e 1077 state = rb_entry(node, struct extent_state, rb_node);
d397712b 1078 if (state->end >= start && (state->state & bits))
d7fc640e 1079 return state;
d397712b 1080
d7fc640e
CM
1081 node = rb_next(node);
1082 if (!node)
1083 break;
1084 }
1085out:
1086 return NULL;
1087}
1088EXPORT_SYMBOL(find_first_extent_bit_state);
1089
d352ac68
CM
1090/*
1091 * find a contiguous range of bytes in the file marked as delalloc, not
1092 * more than 'max_bytes'. start and end are used to return the range,
1093 *
1094 * 1 is returned if we find something, 0 if nothing was in the tree
1095 */
c8b97818
CM
1096static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1097 u64 *start, u64 *end, u64 max_bytes)
d1310b2e
CM
1098{
1099 struct rb_node *node;
1100 struct extent_state *state;
1101 u64 cur_start = *start;
1102 u64 found = 0;
1103 u64 total_bytes = 0;
1104
cad321ad 1105 spin_lock(&tree->lock);
c8b97818 1106
d1310b2e
CM
1107 /*
1108 * this search will find all the extents that end after
1109 * our range starts.
1110 */
80ea96b1 1111 node = tree_search(tree, cur_start);
2b114d1d 1112 if (!node) {
3b951516
CM
1113 if (!found)
1114 *end = (u64)-1;
d1310b2e
CM
1115 goto out;
1116 }
1117
d397712b 1118 while (1) {
d1310b2e 1119 state = rb_entry(node, struct extent_state, rb_node);
5b21f2ed
ZY
1120 if (found && (state->start != cur_start ||
1121 (state->state & EXTENT_BOUNDARY))) {
d1310b2e
CM
1122 goto out;
1123 }
1124 if (!(state->state & EXTENT_DELALLOC)) {
1125 if (!found)
1126 *end = state->end;
1127 goto out;
1128 }
d1310b2e
CM
1129 if (!found)
1130 *start = state->start;
1131 found++;
1132 *end = state->end;
1133 cur_start = state->end + 1;
1134 node = rb_next(node);
1135 if (!node)
1136 break;
1137 total_bytes += state->end - state->start + 1;
1138 if (total_bytes >= max_bytes)
1139 break;
1140 }
1141out:
cad321ad 1142 spin_unlock(&tree->lock);
d1310b2e
CM
1143 return found;
1144}
1145
c8b97818
CM
1146static noinline int __unlock_for_delalloc(struct inode *inode,
1147 struct page *locked_page,
1148 u64 start, u64 end)
1149{
1150 int ret;
1151 struct page *pages[16];
1152 unsigned long index = start >> PAGE_CACHE_SHIFT;
1153 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1154 unsigned long nr_pages = end_index - index + 1;
1155 int i;
1156
1157 if (index == locked_page->index && end_index == index)
1158 return 0;
1159
d397712b 1160 while (nr_pages > 0) {
c8b97818 1161 ret = find_get_pages_contig(inode->i_mapping, index,
5b050f04
CM
1162 min_t(unsigned long, nr_pages,
1163 ARRAY_SIZE(pages)), pages);
c8b97818
CM
1164 for (i = 0; i < ret; i++) {
1165 if (pages[i] != locked_page)
1166 unlock_page(pages[i]);
1167 page_cache_release(pages[i]);
1168 }
1169 nr_pages -= ret;
1170 index += ret;
1171 cond_resched();
1172 }
1173 return 0;
1174}
1175
1176static noinline int lock_delalloc_pages(struct inode *inode,
1177 struct page *locked_page,
1178 u64 delalloc_start,
1179 u64 delalloc_end)
1180{
1181 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1182 unsigned long start_index = index;
1183 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1184 unsigned long pages_locked = 0;
1185 struct page *pages[16];
1186 unsigned long nrpages;
1187 int ret;
1188 int i;
1189
1190 /* the caller is responsible for locking the start index */
1191 if (index == locked_page->index && index == end_index)
1192 return 0;
1193
1194 /* skip the page at the start index */
1195 nrpages = end_index - index + 1;
d397712b 1196 while (nrpages > 0) {
c8b97818 1197 ret = find_get_pages_contig(inode->i_mapping, index,
5b050f04
CM
1198 min_t(unsigned long,
1199 nrpages, ARRAY_SIZE(pages)), pages);
c8b97818
CM
1200 if (ret == 0) {
1201 ret = -EAGAIN;
1202 goto done;
1203 }
1204 /* now we have an array of pages, lock them all */
1205 for (i = 0; i < ret; i++) {
1206 /*
1207 * the caller is taking responsibility for
1208 * locked_page
1209 */
771ed689 1210 if (pages[i] != locked_page) {
c8b97818 1211 lock_page(pages[i]);
f2b1c41c
CM
1212 if (!PageDirty(pages[i]) ||
1213 pages[i]->mapping != inode->i_mapping) {
771ed689
CM
1214 ret = -EAGAIN;
1215 unlock_page(pages[i]);
1216 page_cache_release(pages[i]);
1217 goto done;
1218 }
1219 }
c8b97818 1220 page_cache_release(pages[i]);
771ed689 1221 pages_locked++;
c8b97818 1222 }
c8b97818
CM
1223 nrpages -= ret;
1224 index += ret;
1225 cond_resched();
1226 }
1227 ret = 0;
1228done:
1229 if (ret && pages_locked) {
1230 __unlock_for_delalloc(inode, locked_page,
1231 delalloc_start,
1232 ((u64)(start_index + pages_locked - 1)) <<
1233 PAGE_CACHE_SHIFT);
1234 }
1235 return ret;
1236}
1237
1238/*
1239 * find a contiguous range of bytes in the file marked as delalloc, not
1240 * more than 'max_bytes'. start and end are used to return the range,
1241 *
1242 * 1 is returned if we find something, 0 if nothing was in the tree
1243 */
1244static noinline u64 find_lock_delalloc_range(struct inode *inode,
1245 struct extent_io_tree *tree,
1246 struct page *locked_page,
1247 u64 *start, u64 *end,
1248 u64 max_bytes)
1249{
1250 u64 delalloc_start;
1251 u64 delalloc_end;
1252 u64 found;
1253 int ret;
1254 int loops = 0;
1255
1256again:
1257 /* step one, find a bunch of delalloc bytes starting at start */
1258 delalloc_start = *start;
1259 delalloc_end = 0;
1260 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1261 max_bytes);
70b99e69 1262 if (!found || delalloc_end <= *start) {
c8b97818
CM
1263 *start = delalloc_start;
1264 *end = delalloc_end;
1265 return found;
1266 }
1267
70b99e69
CM
1268 /*
1269 * start comes from the offset of locked_page. We have to lock
1270 * pages in order, so we can't process delalloc bytes before
1271 * locked_page
1272 */
d397712b 1273 if (delalloc_start < *start)
70b99e69 1274 delalloc_start = *start;
70b99e69 1275
c8b97818
CM
1276 /*
1277 * make sure to limit the number of pages we try to lock down
1278 * if we're looping.
1279 */
d397712b 1280 if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
771ed689 1281 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
d397712b 1282
c8b97818
CM
1283 /* step two, lock all the pages after the page that has start */
1284 ret = lock_delalloc_pages(inode, locked_page,
1285 delalloc_start, delalloc_end);
1286 if (ret == -EAGAIN) {
1287 /* some of the pages are gone, lets avoid looping by
1288 * shortening the size of the delalloc range we're searching
1289 */
1290 if (!loops) {
1291 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1292 max_bytes = PAGE_CACHE_SIZE - offset;
1293 loops = 1;
1294 goto again;
1295 } else {
1296 found = 0;
1297 goto out_failed;
1298 }
1299 }
1300 BUG_ON(ret);
1301
1302 /* step three, lock the state bits for the whole range */
1303 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1304
1305 /* then test to make sure it is all still delalloc */
1306 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1307 EXTENT_DELALLOC, 1);
1308 if (!ret) {
1309 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1310 __unlock_for_delalloc(inode, locked_page,
1311 delalloc_start, delalloc_end);
1312 cond_resched();
1313 goto again;
1314 }
1315 *start = delalloc_start;
1316 *end = delalloc_end;
1317out_failed:
1318 return found;
1319}
1320
1321int extent_clear_unlock_delalloc(struct inode *inode,
1322 struct extent_io_tree *tree,
1323 u64 start, u64 end, struct page *locked_page,
771ed689
CM
1324 int unlock_pages,
1325 int clear_unlock,
1326 int clear_delalloc, int clear_dirty,
1327 int set_writeback,
c8b97818
CM
1328 int end_writeback)
1329{
1330 int ret;
1331 struct page *pages[16];
1332 unsigned long index = start >> PAGE_CACHE_SHIFT;
1333 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1334 unsigned long nr_pages = end_index - index + 1;
1335 int i;
771ed689 1336 int clear_bits = 0;
c8b97818 1337
771ed689
CM
1338 if (clear_unlock)
1339 clear_bits |= EXTENT_LOCKED;
c8b97818
CM
1340 if (clear_dirty)
1341 clear_bits |= EXTENT_DIRTY;
1342
771ed689
CM
1343 if (clear_delalloc)
1344 clear_bits |= EXTENT_DELALLOC;
1345
c8b97818 1346 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
771ed689
CM
1347 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
1348 return 0;
c8b97818 1349
d397712b 1350 while (nr_pages > 0) {
c8b97818 1351 ret = find_get_pages_contig(inode->i_mapping, index,
5b050f04
CM
1352 min_t(unsigned long,
1353 nr_pages, ARRAY_SIZE(pages)), pages);
c8b97818
CM
1354 for (i = 0; i < ret; i++) {
1355 if (pages[i] == locked_page) {
1356 page_cache_release(pages[i]);
1357 continue;
1358 }
1359 if (clear_dirty)
1360 clear_page_dirty_for_io(pages[i]);
1361 if (set_writeback)
1362 set_page_writeback(pages[i]);
1363 if (end_writeback)
1364 end_page_writeback(pages[i]);
771ed689
CM
1365 if (unlock_pages)
1366 unlock_page(pages[i]);
c8b97818
CM
1367 page_cache_release(pages[i]);
1368 }
1369 nr_pages -= ret;
1370 index += ret;
1371 cond_resched();
1372 }
1373 return 0;
1374}
1375EXPORT_SYMBOL(extent_clear_unlock_delalloc);
1376
d352ac68
CM
1377/*
1378 * count the number of bytes in the tree that have a given bit(s)
1379 * set. This can be fairly slow, except for EXTENT_DIRTY which is
1380 * cached. The total number found is returned.
1381 */
d1310b2e
CM
1382u64 count_range_bits(struct extent_io_tree *tree,
1383 u64 *start, u64 search_end, u64 max_bytes,
1384 unsigned long bits)
1385{
1386 struct rb_node *node;
1387 struct extent_state *state;
1388 u64 cur_start = *start;
1389 u64 total_bytes = 0;
1390 int found = 0;
1391
1392 if (search_end <= cur_start) {
d1310b2e
CM
1393 WARN_ON(1);
1394 return 0;
1395 }
1396
cad321ad 1397 spin_lock(&tree->lock);
d1310b2e
CM
1398 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1399 total_bytes = tree->dirty_bytes;
1400 goto out;
1401 }
1402 /*
1403 * this search will find all the extents that end after
1404 * our range starts.
1405 */
80ea96b1 1406 node = tree_search(tree, cur_start);
d397712b 1407 if (!node)
d1310b2e 1408 goto out;
d1310b2e 1409
d397712b 1410 while (1) {
d1310b2e
CM
1411 state = rb_entry(node, struct extent_state, rb_node);
1412 if (state->start > search_end)
1413 break;
1414 if (state->end >= cur_start && (state->state & bits)) {
1415 total_bytes += min(search_end, state->end) + 1 -
1416 max(cur_start, state->start);
1417 if (total_bytes >= max_bytes)
1418 break;
1419 if (!found) {
1420 *start = state->start;
1421 found = 1;
1422 }
1423 }
1424 node = rb_next(node);
1425 if (!node)
1426 break;
1427 }
1428out:
cad321ad 1429 spin_unlock(&tree->lock);
d1310b2e
CM
1430 return total_bytes;
1431}
b2950863
CH
1432
1433#if 0
d1310b2e
CM
1434/*
1435 * helper function to lock both pages and extents in the tree.
1436 * pages must be locked first.
1437 */
b2950863 1438static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
d1310b2e
CM
1439{
1440 unsigned long index = start >> PAGE_CACHE_SHIFT;
1441 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1442 struct page *page;
1443 int err;
1444
1445 while (index <= end_index) {
1446 page = grab_cache_page(tree->mapping, index);
1447 if (!page) {
1448 err = -ENOMEM;
1449 goto failed;
1450 }
1451 if (IS_ERR(page)) {
1452 err = PTR_ERR(page);
1453 goto failed;
1454 }
1455 index++;
1456 }
1457 lock_extent(tree, start, end, GFP_NOFS);
1458 return 0;
1459
1460failed:
1461 /*
1462 * we failed above in getting the page at 'index', so we undo here
1463 * up to but not including the page at 'index'
1464 */
1465 end_index = index;
1466 index = start >> PAGE_CACHE_SHIFT;
1467 while (index < end_index) {
1468 page = find_get_page(tree->mapping, index);
1469 unlock_page(page);
1470 page_cache_release(page);
1471 index++;
1472 }
1473 return err;
1474}
d1310b2e
CM
1475
1476/*
1477 * helper function to unlock both pages and extents in the tree.
1478 */
b2950863 1479static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
d1310b2e
CM
1480{
1481 unsigned long index = start >> PAGE_CACHE_SHIFT;
1482 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1483 struct page *page;
1484
1485 while (index <= end_index) {
1486 page = find_get_page(tree->mapping, index);
1487 unlock_page(page);
1488 page_cache_release(page);
1489 index++;
1490 }
1491 unlock_extent(tree, start, end, GFP_NOFS);
1492 return 0;
1493}
b2950863 1494#endif
d1310b2e 1495
d352ac68
CM
1496/*
1497 * set the private field for a given byte offset in the tree. If there isn't
1498 * an extent_state there already, this does nothing.
1499 */
d1310b2e
CM
1500int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1501{
1502 struct rb_node *node;
1503 struct extent_state *state;
1504 int ret = 0;
1505
cad321ad 1506 spin_lock(&tree->lock);
d1310b2e
CM
1507 /*
1508 * this search will find all the extents that end after
1509 * our range starts.
1510 */
80ea96b1 1511 node = tree_search(tree, start);
2b114d1d 1512 if (!node) {
d1310b2e
CM
1513 ret = -ENOENT;
1514 goto out;
1515 }
1516 state = rb_entry(node, struct extent_state, rb_node);
1517 if (state->start != start) {
1518 ret = -ENOENT;
1519 goto out;
1520 }
1521 state->private = private;
1522out:
cad321ad 1523 spin_unlock(&tree->lock);
d1310b2e
CM
1524 return ret;
1525}
1526
1527int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1528{
1529 struct rb_node *node;
1530 struct extent_state *state;
1531 int ret = 0;
1532
cad321ad 1533 spin_lock(&tree->lock);
d1310b2e
CM
1534 /*
1535 * this search will find all the extents that end after
1536 * our range starts.
1537 */
80ea96b1 1538 node = tree_search(tree, start);
2b114d1d 1539 if (!node) {
d1310b2e
CM
1540 ret = -ENOENT;
1541 goto out;
1542 }
1543 state = rb_entry(node, struct extent_state, rb_node);
1544 if (state->start != start) {
1545 ret = -ENOENT;
1546 goto out;
1547 }
1548 *private = state->private;
1549out:
cad321ad 1550 spin_unlock(&tree->lock);
d1310b2e
CM
1551 return ret;
1552}
1553
1554/*
1555 * searches a range in the state tree for a given mask.
70dec807 1556 * If 'filled' == 1, this returns 1 only if every extent in the tree
d1310b2e
CM
1557 * has the bits set. Otherwise, 1 is returned if any bit in the
1558 * range is found set.
1559 */
1560int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1561 int bits, int filled)
1562{
1563 struct extent_state *state = NULL;
1564 struct rb_node *node;
1565 int bitset = 0;
d1310b2e 1566
cad321ad 1567 spin_lock(&tree->lock);
80ea96b1 1568 node = tree_search(tree, start);
d1310b2e
CM
1569 while (node && start <= end) {
1570 state = rb_entry(node, struct extent_state, rb_node);
1571
1572 if (filled && state->start > start) {
1573 bitset = 0;
1574 break;
1575 }
1576
1577 if (state->start > end)
1578 break;
1579
1580 if (state->state & bits) {
1581 bitset = 1;
1582 if (!filled)
1583 break;
1584 } else if (filled) {
1585 bitset = 0;
1586 break;
1587 }
1588 start = state->end + 1;
1589 if (start > end)
1590 break;
1591 node = rb_next(node);
1592 if (!node) {
1593 if (filled)
1594 bitset = 0;
1595 break;
1596 }
1597 }
cad321ad 1598 spin_unlock(&tree->lock);
d1310b2e
CM
1599 return bitset;
1600}
1601EXPORT_SYMBOL(test_range_bit);
1602
1603/*
1604 * helper function to set a given page up to date if all the
1605 * extents in the tree for that page are up to date
1606 */
1607static int check_page_uptodate(struct extent_io_tree *tree,
1608 struct page *page)
1609{
1610 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1611 u64 end = start + PAGE_CACHE_SIZE - 1;
1612 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1613 SetPageUptodate(page);
1614 return 0;
1615}
1616
1617/*
1618 * helper function to unlock a page if all the extents in the tree
1619 * for that page are unlocked
1620 */
1621static int check_page_locked(struct extent_io_tree *tree,
1622 struct page *page)
1623{
1624 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1625 u64 end = start + PAGE_CACHE_SIZE - 1;
1626 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1627 unlock_page(page);
1628 return 0;
1629}
1630
1631/*
1632 * helper function to end page writeback if all the extents
1633 * in the tree for that page are done with writeback
1634 */
1635static int check_page_writeback(struct extent_io_tree *tree,
1636 struct page *page)
1637{
1638 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1639 u64 end = start + PAGE_CACHE_SIZE - 1;
1640 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1641 end_page_writeback(page);
1642 return 0;
1643}
1644
1645/* lots and lots of room for performance fixes in the end_bio funcs */
1646
1647/*
1648 * after a writepage IO is done, we need to:
1649 * clear the uptodate bits on error
1650 * clear the writeback bits in the extent tree for this IO
1651 * end_page_writeback if the page has no more pending IO
1652 *
1653 * Scheduling is not allowed, so the extent state tree is expected
1654 * to have one and only one object corresponding to this IO.
1655 */
d1310b2e 1656static void end_bio_extent_writepage(struct bio *bio, int err)
d1310b2e 1657{
1259ab75 1658 int uptodate = err == 0;
d1310b2e 1659 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
902b22f3 1660 struct extent_io_tree *tree;
d1310b2e
CM
1661 u64 start;
1662 u64 end;
1663 int whole_page;
1259ab75 1664 int ret;
d1310b2e 1665
d1310b2e
CM
1666 do {
1667 struct page *page = bvec->bv_page;
902b22f3
DW
1668 tree = &BTRFS_I(page->mapping->host)->io_tree;
1669
d1310b2e
CM
1670 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1671 bvec->bv_offset;
1672 end = start + bvec->bv_len - 1;
1673
1674 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1675 whole_page = 1;
1676 else
1677 whole_page = 0;
1678
1679 if (--bvec >= bio->bi_io_vec)
1680 prefetchw(&bvec->bv_page->flags);
1259ab75
CM
1681 if (tree->ops && tree->ops->writepage_end_io_hook) {
1682 ret = tree->ops->writepage_end_io_hook(page, start,
902b22f3 1683 end, NULL, uptodate);
1259ab75
CM
1684 if (ret)
1685 uptodate = 0;
1686 }
1687
1688 if (!uptodate && tree->ops &&
1689 tree->ops->writepage_io_failed_hook) {
1690 ret = tree->ops->writepage_io_failed_hook(bio, page,
902b22f3 1691 start, end, NULL);
1259ab75 1692 if (ret == 0) {
1259ab75
CM
1693 uptodate = (err == 0);
1694 continue;
1695 }
1696 }
1697
d1310b2e
CM
1698 if (!uptodate) {
1699 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1700 ClearPageUptodate(page);
1701 SetPageError(page);
1702 }
70dec807 1703
902b22f3 1704 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
d1310b2e
CM
1705
1706 if (whole_page)
1707 end_page_writeback(page);
1708 else
1709 check_page_writeback(tree, page);
d1310b2e 1710 } while (bvec >= bio->bi_io_vec);
2b1f55b0 1711
d1310b2e 1712 bio_put(bio);
d1310b2e
CM
1713}
1714
1715/*
1716 * after a readpage IO is done, we need to:
1717 * clear the uptodate bits on error
1718 * set the uptodate bits if things worked
1719 * set the page up to date if all extents in the tree are uptodate
1720 * clear the lock bit in the extent tree
1721 * unlock the page if there are no other extents locked for it
1722 *
1723 * Scheduling is not allowed, so the extent state tree is expected
1724 * to have one and only one object corresponding to this IO.
1725 */
d1310b2e 1726static void end_bio_extent_readpage(struct bio *bio, int err)
d1310b2e
CM
1727{
1728 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1729 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
902b22f3 1730 struct extent_io_tree *tree;
d1310b2e
CM
1731 u64 start;
1732 u64 end;
1733 int whole_page;
1734 int ret;
1735
d20f7043
CM
1736 if (err)
1737 uptodate = 0;
1738
d1310b2e
CM
1739 do {
1740 struct page *page = bvec->bv_page;
902b22f3
DW
1741 tree = &BTRFS_I(page->mapping->host)->io_tree;
1742
d1310b2e
CM
1743 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1744 bvec->bv_offset;
1745 end = start + bvec->bv_len - 1;
1746
1747 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1748 whole_page = 1;
1749 else
1750 whole_page = 0;
1751
1752 if (--bvec >= bio->bi_io_vec)
1753 prefetchw(&bvec->bv_page->flags);
1754
1755 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
70dec807 1756 ret = tree->ops->readpage_end_io_hook(page, start, end,
902b22f3 1757 NULL);
d1310b2e
CM
1758 if (ret)
1759 uptodate = 0;
1760 }
7e38326f
CM
1761 if (!uptodate && tree->ops &&
1762 tree->ops->readpage_io_failed_hook) {
1763 ret = tree->ops->readpage_io_failed_hook(bio, page,
902b22f3 1764 start, end, NULL);
7e38326f 1765 if (ret == 0) {
3b951516
CM
1766 uptodate =
1767 test_bit(BIO_UPTODATE, &bio->bi_flags);
d20f7043
CM
1768 if (err)
1769 uptodate = 0;
7e38326f
CM
1770 continue;
1771 }
1772 }
d1310b2e 1773
771ed689 1774 if (uptodate) {
902b22f3
DW
1775 set_extent_uptodate(tree, start, end,
1776 GFP_ATOMIC);
771ed689 1777 }
902b22f3 1778 unlock_extent(tree, start, end, GFP_ATOMIC);
d1310b2e 1779
70dec807
CM
1780 if (whole_page) {
1781 if (uptodate) {
1782 SetPageUptodate(page);
1783 } else {
1784 ClearPageUptodate(page);
1785 SetPageError(page);
1786 }
d1310b2e 1787 unlock_page(page);
70dec807
CM
1788 } else {
1789 if (uptodate) {
1790 check_page_uptodate(tree, page);
1791 } else {
1792 ClearPageUptodate(page);
1793 SetPageError(page);
1794 }
d1310b2e 1795 check_page_locked(tree, page);
70dec807 1796 }
d1310b2e
CM
1797 } while (bvec >= bio->bi_io_vec);
1798
1799 bio_put(bio);
d1310b2e
CM
1800}
1801
1802/*
1803 * IO done from prepare_write is pretty simple, we just unlock
1804 * the structs in the extent tree when done, and set the uptodate bits
1805 * as appropriate.
1806 */
d1310b2e 1807static void end_bio_extent_preparewrite(struct bio *bio, int err)
d1310b2e
CM
1808{
1809 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1810 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
902b22f3 1811 struct extent_io_tree *tree;
d1310b2e
CM
1812 u64 start;
1813 u64 end;
1814
d1310b2e
CM
1815 do {
1816 struct page *page = bvec->bv_page;
902b22f3
DW
1817 tree = &BTRFS_I(page->mapping->host)->io_tree;
1818
d1310b2e
CM
1819 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1820 bvec->bv_offset;
1821 end = start + bvec->bv_len - 1;
1822
1823 if (--bvec >= bio->bi_io_vec)
1824 prefetchw(&bvec->bv_page->flags);
1825
1826 if (uptodate) {
1827 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1828 } else {
1829 ClearPageUptodate(page);
1830 SetPageError(page);
1831 }
1832
1833 unlock_extent(tree, start, end, GFP_ATOMIC);
1834
1835 } while (bvec >= bio->bi_io_vec);
1836
1837 bio_put(bio);
d1310b2e
CM
1838}
1839
1840static struct bio *
1841extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1842 gfp_t gfp_flags)
1843{
1844 struct bio *bio;
1845
1846 bio = bio_alloc(gfp_flags, nr_vecs);
1847
1848 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1849 while (!bio && (nr_vecs /= 2))
1850 bio = bio_alloc(gfp_flags, nr_vecs);
1851 }
1852
1853 if (bio) {
e1c4b745 1854 bio->bi_size = 0;
d1310b2e
CM
1855 bio->bi_bdev = bdev;
1856 bio->bi_sector = first_sector;
1857 }
1858 return bio;
1859}
1860
c8b97818
CM
1861static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1862 unsigned long bio_flags)
d1310b2e 1863{
d1310b2e 1864 int ret = 0;
70dec807
CM
1865 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1866 struct page *page = bvec->bv_page;
1867 struct extent_io_tree *tree = bio->bi_private;
70dec807
CM
1868 u64 start;
1869 u64 end;
1870
1871 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1872 end = start + bvec->bv_len - 1;
1873
902b22f3 1874 bio->bi_private = NULL;
d1310b2e
CM
1875
1876 bio_get(bio);
1877
065631f6 1878 if (tree->ops && tree->ops->submit_bio_hook)
f188591e 1879 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
c8b97818 1880 mirror_num, bio_flags);
0b86a832
CM
1881 else
1882 submit_bio(rw, bio);
d1310b2e
CM
1883 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1884 ret = -EOPNOTSUPP;
1885 bio_put(bio);
1886 return ret;
1887}
1888
1889static int submit_extent_page(int rw, struct extent_io_tree *tree,
1890 struct page *page, sector_t sector,
1891 size_t size, unsigned long offset,
1892 struct block_device *bdev,
1893 struct bio **bio_ret,
1894 unsigned long max_pages,
f188591e 1895 bio_end_io_t end_io_func,
c8b97818
CM
1896 int mirror_num,
1897 unsigned long prev_bio_flags,
1898 unsigned long bio_flags)
d1310b2e
CM
1899{
1900 int ret = 0;
1901 struct bio *bio;
1902 int nr;
c8b97818
CM
1903 int contig = 0;
1904 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
1905 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
5b050f04 1906 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
d1310b2e
CM
1907
1908 if (bio_ret && *bio_ret) {
1909 bio = *bio_ret;
c8b97818
CM
1910 if (old_compressed)
1911 contig = bio->bi_sector == sector;
1912 else
1913 contig = bio->bi_sector + (bio->bi_size >> 9) ==
1914 sector;
1915
1916 if (prev_bio_flags != bio_flags || !contig ||
239b14b3 1917 (tree->ops && tree->ops->merge_bio_hook &&
c8b97818
CM
1918 tree->ops->merge_bio_hook(page, offset, page_size, bio,
1919 bio_flags)) ||
1920 bio_add_page(bio, page, page_size, offset) < page_size) {
1921 ret = submit_one_bio(rw, bio, mirror_num,
1922 prev_bio_flags);
d1310b2e
CM
1923 bio = NULL;
1924 } else {
1925 return 0;
1926 }
1927 }
c8b97818
CM
1928 if (this_compressed)
1929 nr = BIO_MAX_PAGES;
1930 else
1931 nr = bio_get_nr_vecs(bdev);
1932
d1310b2e 1933 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
70dec807 1934
c8b97818 1935 bio_add_page(bio, page, page_size, offset);
d1310b2e
CM
1936 bio->bi_end_io = end_io_func;
1937 bio->bi_private = tree;
70dec807 1938
d397712b 1939 if (bio_ret)
d1310b2e 1940 *bio_ret = bio;
d397712b 1941 else
c8b97818 1942 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
d1310b2e
CM
1943
1944 return ret;
1945}
1946
1947void set_page_extent_mapped(struct page *page)
1948{
1949 if (!PagePrivate(page)) {
1950 SetPagePrivate(page);
d1310b2e 1951 page_cache_get(page);
6af118ce 1952 set_page_private(page, EXTENT_PAGE_PRIVATE);
d1310b2e
CM
1953 }
1954}
771ed689 1955EXPORT_SYMBOL(set_page_extent_mapped);
d1310b2e 1956
b2950863 1957static void set_page_extent_head(struct page *page, unsigned long len)
d1310b2e
CM
1958{
1959 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1960}
1961
1962/*
1963 * basic readpage implementation. Locked extent state structs are inserted
1964 * into the tree that are removed when the IO is done (by the end_io
1965 * handlers)
1966 */
1967static int __extent_read_full_page(struct extent_io_tree *tree,
1968 struct page *page,
1969 get_extent_t *get_extent,
c8b97818
CM
1970 struct bio **bio, int mirror_num,
1971 unsigned long *bio_flags)
d1310b2e
CM
1972{
1973 struct inode *inode = page->mapping->host;
1974 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1975 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1976 u64 end;
1977 u64 cur = start;
1978 u64 extent_offset;
1979 u64 last_byte = i_size_read(inode);
1980 u64 block_start;
1981 u64 cur_end;
1982 sector_t sector;
1983 struct extent_map *em;
1984 struct block_device *bdev;
1985 int ret;
1986 int nr = 0;
1987 size_t page_offset = 0;
1988 size_t iosize;
c8b97818 1989 size_t disk_io_size;
d1310b2e 1990 size_t blocksize = inode->i_sb->s_blocksize;
c8b97818 1991 unsigned long this_bio_flag = 0;
d1310b2e
CM
1992
1993 set_page_extent_mapped(page);
1994
1995 end = page_end;
1996 lock_extent(tree, start, end, GFP_NOFS);
1997
c8b97818
CM
1998 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
1999 char *userpage;
2000 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2001
2002 if (zero_offset) {
2003 iosize = PAGE_CACHE_SIZE - zero_offset;
2004 userpage = kmap_atomic(page, KM_USER0);
2005 memset(userpage + zero_offset, 0, iosize);
2006 flush_dcache_page(page);
2007 kunmap_atomic(userpage, KM_USER0);
2008 }
2009 }
d1310b2e
CM
2010 while (cur <= end) {
2011 if (cur >= last_byte) {
2012 char *userpage;
2013 iosize = PAGE_CACHE_SIZE - page_offset;
2014 userpage = kmap_atomic(page, KM_USER0);
2015 memset(userpage + page_offset, 0, iosize);
2016 flush_dcache_page(page);
2017 kunmap_atomic(userpage, KM_USER0);
2018 set_extent_uptodate(tree, cur, cur + iosize - 1,
2019 GFP_NOFS);
2020 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2021 break;
2022 }
2023 em = get_extent(inode, page, page_offset, cur,
2024 end - cur + 1, 0);
2025 if (IS_ERR(em) || !em) {
2026 SetPageError(page);
2027 unlock_extent(tree, cur, end, GFP_NOFS);
2028 break;
2029 }
d1310b2e
CM
2030 extent_offset = cur - em->start;
2031 BUG_ON(extent_map_end(em) <= cur);
2032 BUG_ON(end < cur);
2033
c8b97818
CM
2034 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2035 this_bio_flag = EXTENT_BIO_COMPRESSED;
2036
d1310b2e
CM
2037 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2038 cur_end = min(extent_map_end(em) - 1, end);
2039 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
c8b97818
CM
2040 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2041 disk_io_size = em->block_len;
2042 sector = em->block_start >> 9;
2043 } else {
2044 sector = (em->block_start + extent_offset) >> 9;
2045 disk_io_size = iosize;
2046 }
d1310b2e
CM
2047 bdev = em->bdev;
2048 block_start = em->block_start;
d899e052
YZ
2049 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2050 block_start = EXTENT_MAP_HOLE;
d1310b2e
CM
2051 free_extent_map(em);
2052 em = NULL;
2053
2054 /* we've found a hole, just zero and go on */
2055 if (block_start == EXTENT_MAP_HOLE) {
2056 char *userpage;
2057 userpage = kmap_atomic(page, KM_USER0);
2058 memset(userpage + page_offset, 0, iosize);
2059 flush_dcache_page(page);
2060 kunmap_atomic(userpage, KM_USER0);
2061
2062 set_extent_uptodate(tree, cur, cur + iosize - 1,
2063 GFP_NOFS);
2064 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2065 cur = cur + iosize;
2066 page_offset += iosize;
2067 continue;
2068 }
2069 /* the get_extent function already copied into the page */
2070 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
a1b32a59 2071 check_page_uptodate(tree, page);
d1310b2e
CM
2072 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2073 cur = cur + iosize;
2074 page_offset += iosize;
2075 continue;
2076 }
70dec807
CM
2077 /* we have an inline extent but it didn't get marked up
2078 * to date. Error out
2079 */
2080 if (block_start == EXTENT_MAP_INLINE) {
2081 SetPageError(page);
2082 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2083 cur = cur + iosize;
2084 page_offset += iosize;
2085 continue;
2086 }
d1310b2e
CM
2087
2088 ret = 0;
2089 if (tree->ops && tree->ops->readpage_io_hook) {
2090 ret = tree->ops->readpage_io_hook(page, cur,
2091 cur + iosize - 1);
2092 }
2093 if (!ret) {
89642229
CM
2094 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2095 pnr -= page->index;
d1310b2e 2096 ret = submit_extent_page(READ, tree, page,
c8b97818 2097 sector, disk_io_size, page_offset,
89642229 2098 bdev, bio, pnr,
c8b97818
CM
2099 end_bio_extent_readpage, mirror_num,
2100 *bio_flags,
2101 this_bio_flag);
89642229 2102 nr++;
c8b97818 2103 *bio_flags = this_bio_flag;
d1310b2e
CM
2104 }
2105 if (ret)
2106 SetPageError(page);
2107 cur = cur + iosize;
2108 page_offset += iosize;
d1310b2e
CM
2109 }
2110 if (!nr) {
2111 if (!PageError(page))
2112 SetPageUptodate(page);
2113 unlock_page(page);
2114 }
2115 return 0;
2116}
2117
2118int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2119 get_extent_t *get_extent)
2120{
2121 struct bio *bio = NULL;
c8b97818 2122 unsigned long bio_flags = 0;
d1310b2e
CM
2123 int ret;
2124
c8b97818
CM
2125 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2126 &bio_flags);
d1310b2e 2127 if (bio)
c8b97818 2128 submit_one_bio(READ, bio, 0, bio_flags);
d1310b2e
CM
2129 return ret;
2130}
2131EXPORT_SYMBOL(extent_read_full_page);
2132
2133/*
2134 * the writepage semantics are similar to regular writepage. extent
2135 * records are inserted to lock ranges in the tree, and as dirty areas
2136 * are found, they are marked writeback. Then the lock bits are removed
2137 * and the end_io handler clears the writeback ranges
2138 */
2139static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2140 void *data)
2141{
2142 struct inode *inode = page->mapping->host;
2143 struct extent_page_data *epd = data;
2144 struct extent_io_tree *tree = epd->tree;
2145 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2146 u64 delalloc_start;
2147 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2148 u64 end;
2149 u64 cur = start;
2150 u64 extent_offset;
2151 u64 last_byte = i_size_read(inode);
2152 u64 block_start;
2153 u64 iosize;
e6dcd2dc 2154 u64 unlock_start;
d1310b2e
CM
2155 sector_t sector;
2156 struct extent_map *em;
2157 struct block_device *bdev;
2158 int ret;
2159 int nr = 0;
7f3c74fb 2160 size_t pg_offset = 0;
d1310b2e
CM
2161 size_t blocksize;
2162 loff_t i_size = i_size_read(inode);
2163 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2164 u64 nr_delalloc;
2165 u64 delalloc_end;
c8b97818
CM
2166 int page_started;
2167 int compressed;
771ed689 2168 unsigned long nr_written = 0;
d1310b2e
CM
2169
2170 WARN_ON(!PageLocked(page));
7f3c74fb 2171 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
211c17f5 2172 if (page->index > end_index ||
7f3c74fb 2173 (page->index == end_index && !pg_offset)) {
39be25cd 2174 page->mapping->a_ops->invalidatepage(page, 0);
d1310b2e
CM
2175 unlock_page(page);
2176 return 0;
2177 }
2178
2179 if (page->index == end_index) {
2180 char *userpage;
2181
d1310b2e 2182 userpage = kmap_atomic(page, KM_USER0);
7f3c74fb
CM
2183 memset(userpage + pg_offset, 0,
2184 PAGE_CACHE_SIZE - pg_offset);
d1310b2e 2185 kunmap_atomic(userpage, KM_USER0);
211c17f5 2186 flush_dcache_page(page);
d1310b2e 2187 }
7f3c74fb 2188 pg_offset = 0;
d1310b2e
CM
2189
2190 set_page_extent_mapped(page);
2191
2192 delalloc_start = start;
2193 delalloc_end = 0;
c8b97818 2194 page_started = 0;
771ed689 2195 if (!epd->extent_locked) {
d397712b 2196 while (delalloc_end < page_end) {
771ed689 2197 nr_delalloc = find_lock_delalloc_range(inode, tree,
c8b97818
CM
2198 page,
2199 &delalloc_start,
d1310b2e
CM
2200 &delalloc_end,
2201 128 * 1024 * 1024);
771ed689
CM
2202 if (nr_delalloc == 0) {
2203 delalloc_start = delalloc_end + 1;
2204 continue;
2205 }
2206 tree->ops->fill_delalloc(inode, page, delalloc_start,
2207 delalloc_end, &page_started,
2208 &nr_written);
d1310b2e 2209 delalloc_start = delalloc_end + 1;
d1310b2e 2210 }
c8b97818 2211
771ed689
CM
2212 /* did the fill delalloc function already unlock and start
2213 * the IO?
2214 */
2215 if (page_started) {
2216 ret = 0;
2217 goto update_nr_written;
2218 }
c8b97818 2219 }
d1310b2e 2220 lock_extent(tree, start, page_end, GFP_NOFS);
771ed689 2221
e6dcd2dc 2222 unlock_start = start;
d1310b2e 2223
247e743c 2224 if (tree->ops && tree->ops->writepage_start_hook) {
c8b97818
CM
2225 ret = tree->ops->writepage_start_hook(page, start,
2226 page_end);
247e743c
CM
2227 if (ret == -EAGAIN) {
2228 unlock_extent(tree, start, page_end, GFP_NOFS);
2229 redirty_page_for_writepage(wbc, page);
2230 unlock_page(page);
771ed689
CM
2231 ret = 0;
2232 goto update_nr_written;
247e743c
CM
2233 }
2234 }
2235
771ed689
CM
2236 nr_written++;
2237
d1310b2e 2238 end = page_end;
d397712b
CM
2239 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2240 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
d1310b2e
CM
2241
2242 if (last_byte <= start) {
2243 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
e6dcd2dc
CM
2244 unlock_extent(tree, start, page_end, GFP_NOFS);
2245 if (tree->ops && tree->ops->writepage_end_io_hook)
2246 tree->ops->writepage_end_io_hook(page, start,
2247 page_end, NULL, 1);
2248 unlock_start = page_end + 1;
d1310b2e
CM
2249 goto done;
2250 }
2251
2252 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2253 blocksize = inode->i_sb->s_blocksize;
2254
2255 while (cur <= end) {
2256 if (cur >= last_byte) {
2257 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
e6dcd2dc
CM
2258 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2259 if (tree->ops && tree->ops->writepage_end_io_hook)
2260 tree->ops->writepage_end_io_hook(page, cur,
2261 page_end, NULL, 1);
2262 unlock_start = page_end + 1;
d1310b2e
CM
2263 break;
2264 }
7f3c74fb 2265 em = epd->get_extent(inode, page, pg_offset, cur,
d1310b2e
CM
2266 end - cur + 1, 1);
2267 if (IS_ERR(em) || !em) {
2268 SetPageError(page);
2269 break;
2270 }
2271
2272 extent_offset = cur - em->start;
2273 BUG_ON(extent_map_end(em) <= cur);
2274 BUG_ON(end < cur);
2275 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2276 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2277 sector = (em->block_start + extent_offset) >> 9;
2278 bdev = em->bdev;
2279 block_start = em->block_start;
c8b97818 2280 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
d1310b2e
CM
2281 free_extent_map(em);
2282 em = NULL;
2283
c8b97818
CM
2284 /*
2285 * compressed and inline extents are written through other
2286 * paths in the FS
2287 */
2288 if (compressed || block_start == EXTENT_MAP_HOLE ||
d1310b2e
CM
2289 block_start == EXTENT_MAP_INLINE) {
2290 clear_extent_dirty(tree, cur,
2291 cur + iosize - 1, GFP_NOFS);
e6dcd2dc 2292
d397712b 2293 unlock_extent(tree, unlock_start, cur + iosize - 1,
e6dcd2dc 2294 GFP_NOFS);
7f3c74fb 2295
c8b97818
CM
2296 /*
2297 * end_io notification does not happen here for
2298 * compressed extents
2299 */
2300 if (!compressed && tree->ops &&
2301 tree->ops->writepage_end_io_hook)
e6dcd2dc
CM
2302 tree->ops->writepage_end_io_hook(page, cur,
2303 cur + iosize - 1,
2304 NULL, 1);
c8b97818
CM
2305 else if (compressed) {
2306 /* we don't want to end_page_writeback on
2307 * a compressed extent. this happens
2308 * elsewhere
2309 */
2310 nr++;
2311 }
2312
2313 cur += iosize;
7f3c74fb 2314 pg_offset += iosize;
e6dcd2dc 2315 unlock_start = cur;
d1310b2e
CM
2316 continue;
2317 }
d1310b2e
CM
2318 /* leave this out until we have a page_mkwrite call */
2319 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2320 EXTENT_DIRTY, 0)) {
2321 cur = cur + iosize;
7f3c74fb 2322 pg_offset += iosize;
d1310b2e
CM
2323 continue;
2324 }
c8b97818 2325
d1310b2e
CM
2326 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2327 if (tree->ops && tree->ops->writepage_io_hook) {
2328 ret = tree->ops->writepage_io_hook(page, cur,
2329 cur + iosize - 1);
2330 } else {
2331 ret = 0;
2332 }
1259ab75 2333 if (ret) {
d1310b2e 2334 SetPageError(page);
1259ab75 2335 } else {
d1310b2e 2336 unsigned long max_nr = end_index + 1;
7f3c74fb 2337
d1310b2e
CM
2338 set_range_writeback(tree, cur, cur + iosize - 1);
2339 if (!PageWriteback(page)) {
d397712b
CM
2340 printk(KERN_ERR "btrfs warning page %lu not "
2341 "writeback, cur %llu end %llu\n",
2342 page->index, (unsigned long long)cur,
d1310b2e
CM
2343 (unsigned long long)end);
2344 }
2345
2346 ret = submit_extent_page(WRITE, tree, page, sector,
7f3c74fb 2347 iosize, pg_offset, bdev,
d1310b2e 2348 &epd->bio, max_nr,
c8b97818
CM
2349 end_bio_extent_writepage,
2350 0, 0, 0);
d1310b2e
CM
2351 if (ret)
2352 SetPageError(page);
2353 }
2354 cur = cur + iosize;
7f3c74fb 2355 pg_offset += iosize;
d1310b2e
CM
2356 nr++;
2357 }
2358done:
2359 if (nr == 0) {
2360 /* make sure the mapping tag for page dirty gets cleared */
2361 set_page_writeback(page);
2362 end_page_writeback(page);
2363 }
e6dcd2dc
CM
2364 if (unlock_start <= page_end)
2365 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
d1310b2e 2366 unlock_page(page);
771ed689
CM
2367
2368update_nr_written:
2369 wbc->nr_to_write -= nr_written;
2370 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2371 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2372 page->mapping->writeback_index = page->index + nr_written;
d1310b2e
CM
2373 return 0;
2374}
2375
d1310b2e 2376/**
4bef0848 2377 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
d1310b2e
CM
2378 * @mapping: address space structure to write
2379 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2380 * @writepage: function called for each page
2381 * @data: data passed to writepage function
2382 *
2383 * If a page is already under I/O, write_cache_pages() skips it, even
2384 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2385 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2386 * and msync() need to guarantee that all the data which was dirty at the time
2387 * the call was made get new I/O started against them. If wbc->sync_mode is
2388 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2389 * existing IO to complete.
2390 */
b2950863 2391static int extent_write_cache_pages(struct extent_io_tree *tree,
4bef0848
CM
2392 struct address_space *mapping,
2393 struct writeback_control *wbc,
d2c3f4f6
CM
2394 writepage_t writepage, void *data,
2395 void (*flush_fn)(void *))
d1310b2e
CM
2396{
2397 struct backing_dev_info *bdi = mapping->backing_dev_info;
2398 int ret = 0;
2399 int done = 0;
2400 struct pagevec pvec;
2401 int nr_pages;
2402 pgoff_t index;
2403 pgoff_t end; /* Inclusive */
2404 int scanned = 0;
2405 int range_whole = 0;
2406
2407 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2408 wbc->encountered_congestion = 1;
2409 return 0;
2410 }
2411
2412 pagevec_init(&pvec, 0);
2413 if (wbc->range_cyclic) {
2414 index = mapping->writeback_index; /* Start from prev offset */
2415 end = -1;
2416 } else {
2417 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2418 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2419 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2420 range_whole = 1;
2421 scanned = 1;
2422 }
2423retry:
2424 while (!done && (index <= end) &&
2425 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
d397712b
CM
2426 PAGECACHE_TAG_DIRTY, min(end - index,
2427 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
d1310b2e
CM
2428 unsigned i;
2429
2430 scanned = 1;
2431 for (i = 0; i < nr_pages; i++) {
2432 struct page *page = pvec.pages[i];
2433
2434 /*
2435 * At this point we hold neither mapping->tree_lock nor
2436 * lock on the page itself: the page may be truncated or
2437 * invalidated (changing page->mapping to NULL), or even
2438 * swizzled back from swapper_space to tmpfs file
2439 * mapping
2440 */
4bef0848
CM
2441 if (tree->ops && tree->ops->write_cache_pages_lock_hook)
2442 tree->ops->write_cache_pages_lock_hook(page);
2443 else
2444 lock_page(page);
d1310b2e
CM
2445
2446 if (unlikely(page->mapping != mapping)) {
2447 unlock_page(page);
2448 continue;
2449 }
2450
2451 if (!wbc->range_cyclic && page->index > end) {
2452 done = 1;
2453 unlock_page(page);
2454 continue;
2455 }
2456
d2c3f4f6 2457 if (wbc->sync_mode != WB_SYNC_NONE) {
0e6bd956
CM
2458 if (PageWriteback(page))
2459 flush_fn(data);
d1310b2e 2460 wait_on_page_writeback(page);
d2c3f4f6 2461 }
d1310b2e
CM
2462
2463 if (PageWriteback(page) ||
2464 !clear_page_dirty_for_io(page)) {
2465 unlock_page(page);
2466 continue;
2467 }
2468
2469 ret = (*writepage)(page, wbc, data);
2470
2471 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2472 unlock_page(page);
2473 ret = 0;
2474 }
771ed689 2475 if (ret || wbc->nr_to_write <= 0)
d1310b2e
CM
2476 done = 1;
2477 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2478 wbc->encountered_congestion = 1;
2479 done = 1;
2480 }
2481 }
2482 pagevec_release(&pvec);
2483 cond_resched();
2484 }
2485 if (!scanned && !done) {
2486 /*
2487 * We hit the last page and there is more work to be done: wrap
2488 * back to the start of the file
2489 */
2490 scanned = 1;
2491 index = 0;
2492 goto retry;
2493 }
d1310b2e
CM
2494 return ret;
2495}
d1310b2e 2496
d2c3f4f6
CM
2497static noinline void flush_write_bio(void *data)
2498{
2499 struct extent_page_data *epd = data;
2500 if (epd->bio) {
2501 submit_one_bio(WRITE, epd->bio, 0, 0);
2502 epd->bio = NULL;
2503 }
2504}
2505
d1310b2e
CM
2506int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2507 get_extent_t *get_extent,
2508 struct writeback_control *wbc)
2509{
2510 int ret;
2511 struct address_space *mapping = page->mapping;
2512 struct extent_page_data epd = {
2513 .bio = NULL,
2514 .tree = tree,
2515 .get_extent = get_extent,
771ed689 2516 .extent_locked = 0,
d1310b2e
CM
2517 };
2518 struct writeback_control wbc_writepages = {
2519 .bdi = wbc->bdi,
2520 .sync_mode = WB_SYNC_NONE,
2521 .older_than_this = NULL,
2522 .nr_to_write = 64,
2523 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2524 .range_end = (loff_t)-1,
2525 };
2526
2527
2528 ret = __extent_writepage(page, wbc, &epd);
2529
4bef0848 2530 extent_write_cache_pages(tree, mapping, &wbc_writepages,
d2c3f4f6 2531 __extent_writepage, &epd, flush_write_bio);
d397712b 2532 if (epd.bio)
c8b97818 2533 submit_one_bio(WRITE, epd.bio, 0, 0);
d1310b2e
CM
2534 return ret;
2535}
2536EXPORT_SYMBOL(extent_write_full_page);
2537
771ed689
CM
2538int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2539 u64 start, u64 end, get_extent_t *get_extent,
2540 int mode)
2541{
2542 int ret = 0;
2543 struct address_space *mapping = inode->i_mapping;
2544 struct page *page;
2545 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
2546 PAGE_CACHE_SHIFT;
2547
2548 struct extent_page_data epd = {
2549 .bio = NULL,
2550 .tree = tree,
2551 .get_extent = get_extent,
2552 .extent_locked = 1,
2553 };
2554 struct writeback_control wbc_writepages = {
2555 .bdi = inode->i_mapping->backing_dev_info,
2556 .sync_mode = mode,
2557 .older_than_this = NULL,
2558 .nr_to_write = nr_pages * 2,
2559 .range_start = start,
2560 .range_end = end + 1,
2561 };
2562
d397712b 2563 while (start <= end) {
771ed689
CM
2564 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
2565 if (clear_page_dirty_for_io(page))
2566 ret = __extent_writepage(page, &wbc_writepages, &epd);
2567 else {
2568 if (tree->ops && tree->ops->writepage_end_io_hook)
2569 tree->ops->writepage_end_io_hook(page, start,
2570 start + PAGE_CACHE_SIZE - 1,
2571 NULL, 1);
2572 unlock_page(page);
2573 }
2574 page_cache_release(page);
2575 start += PAGE_CACHE_SIZE;
2576 }
2577
2578 if (epd.bio)
2579 submit_one_bio(WRITE, epd.bio, 0, 0);
2580 return ret;
2581}
2582EXPORT_SYMBOL(extent_write_locked_range);
2583
d1310b2e
CM
2584
2585int extent_writepages(struct extent_io_tree *tree,
2586 struct address_space *mapping,
2587 get_extent_t *get_extent,
2588 struct writeback_control *wbc)
2589{
2590 int ret = 0;
2591 struct extent_page_data epd = {
2592 .bio = NULL,
2593 .tree = tree,
2594 .get_extent = get_extent,
771ed689 2595 .extent_locked = 0,
d1310b2e
CM
2596 };
2597
4bef0848 2598 ret = extent_write_cache_pages(tree, mapping, wbc,
d2c3f4f6
CM
2599 __extent_writepage, &epd,
2600 flush_write_bio);
d397712b 2601 if (epd.bio)
c8b97818 2602 submit_one_bio(WRITE, epd.bio, 0, 0);
d1310b2e
CM
2603 return ret;
2604}
2605EXPORT_SYMBOL(extent_writepages);
2606
2607int extent_readpages(struct extent_io_tree *tree,
2608 struct address_space *mapping,
2609 struct list_head *pages, unsigned nr_pages,
2610 get_extent_t get_extent)
2611{
2612 struct bio *bio = NULL;
2613 unsigned page_idx;
2614 struct pagevec pvec;
c8b97818 2615 unsigned long bio_flags = 0;
d1310b2e
CM
2616
2617 pagevec_init(&pvec, 0);
2618 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2619 struct page *page = list_entry(pages->prev, struct page, lru);
2620
2621 prefetchw(&page->flags);
2622 list_del(&page->lru);
2623 /*
2624 * what we want to do here is call add_to_page_cache_lru,
2625 * but that isn't exported, so we reproduce it here
2626 */
2627 if (!add_to_page_cache(page, mapping,
2628 page->index, GFP_KERNEL)) {
2629
2630 /* open coding of lru_cache_add, also not exported */
2631 page_cache_get(page);
2632 if (!pagevec_add(&pvec, page))
15916de8 2633 __pagevec_lru_add_file(&pvec);
f188591e 2634 __extent_read_full_page(tree, page, get_extent,
c8b97818 2635 &bio, 0, &bio_flags);
d1310b2e
CM
2636 }
2637 page_cache_release(page);
2638 }
2639 if (pagevec_count(&pvec))
15916de8 2640 __pagevec_lru_add_file(&pvec);
d1310b2e
CM
2641 BUG_ON(!list_empty(pages));
2642 if (bio)
c8b97818 2643 submit_one_bio(READ, bio, 0, bio_flags);
d1310b2e
CM
2644 return 0;
2645}
2646EXPORT_SYMBOL(extent_readpages);
2647
2648/*
2649 * basic invalidatepage code, this waits on any locked or writeback
2650 * ranges corresponding to the page, and then deletes any extent state
2651 * records from the tree
2652 */
2653int extent_invalidatepage(struct extent_io_tree *tree,
2654 struct page *page, unsigned long offset)
2655{
2656 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2657 u64 end = start + PAGE_CACHE_SIZE - 1;
2658 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2659
d397712b 2660 start += (offset + blocksize - 1) & ~(blocksize - 1);
d1310b2e
CM
2661 if (start > end)
2662 return 0;
2663
2664 lock_extent(tree, start, end, GFP_NOFS);
2665 wait_on_extent_writeback(tree, start, end);
2666 clear_extent_bit(tree, start, end,
2667 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2668 1, 1, GFP_NOFS);
2669 return 0;
2670}
2671EXPORT_SYMBOL(extent_invalidatepage);
2672
2673/*
2674 * simple commit_write call, set_range_dirty is used to mark both
2675 * the pages and the extent records as dirty
2676 */
2677int extent_commit_write(struct extent_io_tree *tree,
2678 struct inode *inode, struct page *page,
2679 unsigned from, unsigned to)
2680{
2681 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2682
2683 set_page_extent_mapped(page);
2684 set_page_dirty(page);
2685
2686 if (pos > inode->i_size) {
2687 i_size_write(inode, pos);
2688 mark_inode_dirty(inode);
2689 }
2690 return 0;
2691}
2692EXPORT_SYMBOL(extent_commit_write);
2693
2694int extent_prepare_write(struct extent_io_tree *tree,
2695 struct inode *inode, struct page *page,
2696 unsigned from, unsigned to, get_extent_t *get_extent)
2697{
2698 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2699 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2700 u64 block_start;
2701 u64 orig_block_start;
2702 u64 block_end;
2703 u64 cur_end;
2704 struct extent_map *em;
2705 unsigned blocksize = 1 << inode->i_blkbits;
2706 size_t page_offset = 0;
2707 size_t block_off_start;
2708 size_t block_off_end;
2709 int err = 0;
2710 int iocount = 0;
2711 int ret = 0;
2712 int isnew;
2713
2714 set_page_extent_mapped(page);
2715
2716 block_start = (page_start + from) & ~((u64)blocksize - 1);
2717 block_end = (page_start + to - 1) | (blocksize - 1);
2718 orig_block_start = block_start;
2719
2720 lock_extent(tree, page_start, page_end, GFP_NOFS);
d397712b 2721 while (block_start <= block_end) {
d1310b2e
CM
2722 em = get_extent(inode, page, page_offset, block_start,
2723 block_end - block_start + 1, 1);
d397712b 2724 if (IS_ERR(em) || !em)
d1310b2e 2725 goto err;
d397712b 2726
d1310b2e
CM
2727 cur_end = min(block_end, extent_map_end(em) - 1);
2728 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2729 block_off_end = block_off_start + blocksize;
2730 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2731
2732 if (!PageUptodate(page) && isnew &&
2733 (block_off_end > to || block_off_start < from)) {
2734 void *kaddr;
2735
2736 kaddr = kmap_atomic(page, KM_USER0);
2737 if (block_off_end > to)
2738 memset(kaddr + to, 0, block_off_end - to);
2739 if (block_off_start < from)
2740 memset(kaddr + block_off_start, 0,
2741 from - block_off_start);
2742 flush_dcache_page(page);
2743 kunmap_atomic(kaddr, KM_USER0);
2744 }
2745 if ((em->block_start != EXTENT_MAP_HOLE &&
2746 em->block_start != EXTENT_MAP_INLINE) &&
2747 !isnew && !PageUptodate(page) &&
2748 (block_off_end > to || block_off_start < from) &&
2749 !test_range_bit(tree, block_start, cur_end,
2750 EXTENT_UPTODATE, 1)) {
2751 u64 sector;
2752 u64 extent_offset = block_start - em->start;
2753 size_t iosize;
2754 sector = (em->block_start + extent_offset) >> 9;
2755 iosize = (cur_end - block_start + blocksize) &
2756 ~((u64)blocksize - 1);
2757 /*
2758 * we've already got the extent locked, but we
2759 * need to split the state such that our end_bio
2760 * handler can clear the lock.
2761 */
2762 set_extent_bit(tree, block_start,
2763 block_start + iosize - 1,
2764 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2765 ret = submit_extent_page(READ, tree, page,
2766 sector, iosize, page_offset, em->bdev,
2767 NULL, 1,
c8b97818
CM
2768 end_bio_extent_preparewrite, 0,
2769 0, 0);
d1310b2e
CM
2770 iocount++;
2771 block_start = block_start + iosize;
2772 } else {
2773 set_extent_uptodate(tree, block_start, cur_end,
2774 GFP_NOFS);
2775 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2776 block_start = cur_end + 1;
2777 }
2778 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2779 free_extent_map(em);
2780 }
2781 if (iocount) {
2782 wait_extent_bit(tree, orig_block_start,
2783 block_end, EXTENT_LOCKED);
2784 }
2785 check_page_uptodate(tree, page);
2786err:
2787 /* FIXME, zero out newly allocated blocks on error */
2788 return err;
2789}
2790EXPORT_SYMBOL(extent_prepare_write);
2791
7b13b7b1
CM
2792/*
2793 * a helper for releasepage, this tests for areas of the page that
2794 * are locked or under IO and drops the related state bits if it is safe
2795 * to drop the page.
2796 */
2797int try_release_extent_state(struct extent_map_tree *map,
2798 struct extent_io_tree *tree, struct page *page,
2799 gfp_t mask)
2800{
2801 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2802 u64 end = start + PAGE_CACHE_SIZE - 1;
2803 int ret = 1;
2804
211f90e6
CM
2805 if (test_range_bit(tree, start, end,
2806 EXTENT_IOBITS | EXTENT_ORDERED, 0))
7b13b7b1
CM
2807 ret = 0;
2808 else {
2809 if ((mask & GFP_NOFS) == GFP_NOFS)
2810 mask = GFP_NOFS;
2811 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2812 1, 1, mask);
2813 }
2814 return ret;
2815}
2816EXPORT_SYMBOL(try_release_extent_state);
2817
d1310b2e
CM
2818/*
2819 * a helper for releasepage. As long as there are no locked extents
2820 * in the range corresponding to the page, both state records and extent
2821 * map records are removed
2822 */
2823int try_release_extent_mapping(struct extent_map_tree *map,
70dec807
CM
2824 struct extent_io_tree *tree, struct page *page,
2825 gfp_t mask)
d1310b2e
CM
2826{
2827 struct extent_map *em;
2828 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2829 u64 end = start + PAGE_CACHE_SIZE - 1;
7b13b7b1 2830
70dec807
CM
2831 if ((mask & __GFP_WAIT) &&
2832 page->mapping->host->i_size > 16 * 1024 * 1024) {
39b5637f 2833 u64 len;
70dec807 2834 while (start <= end) {
39b5637f 2835 len = end - start + 1;
70dec807 2836 spin_lock(&map->lock);
39b5637f 2837 em = lookup_extent_mapping(map, start, len);
70dec807
CM
2838 if (!em || IS_ERR(em)) {
2839 spin_unlock(&map->lock);
2840 break;
2841 }
7f3c74fb
CM
2842 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2843 em->start != start) {
70dec807
CM
2844 spin_unlock(&map->lock);
2845 free_extent_map(em);
2846 break;
2847 }
2848 if (!test_range_bit(tree, em->start,
2849 extent_map_end(em) - 1,
c8b97818
CM
2850 EXTENT_LOCKED | EXTENT_WRITEBACK |
2851 EXTENT_ORDERED,
2852 0)) {
70dec807
CM
2853 remove_extent_mapping(map, em);
2854 /* once for the rb tree */
2855 free_extent_map(em);
2856 }
2857 start = extent_map_end(em);
d1310b2e 2858 spin_unlock(&map->lock);
70dec807
CM
2859
2860 /* once for us */
d1310b2e
CM
2861 free_extent_map(em);
2862 }
d1310b2e 2863 }
7b13b7b1 2864 return try_release_extent_state(map, tree, page, mask);
d1310b2e
CM
2865}
2866EXPORT_SYMBOL(try_release_extent_mapping);
2867
2868sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2869 get_extent_t *get_extent)
2870{
2871 struct inode *inode = mapping->host;
2872 u64 start = iblock << inode->i_blkbits;
2873 sector_t sector = 0;
d899e052 2874 size_t blksize = (1 << inode->i_blkbits);
d1310b2e
CM
2875 struct extent_map *em;
2876
d899e052
YZ
2877 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2878 GFP_NOFS);
2879 em = get_extent(inode, NULL, 0, start, blksize, 0);
2880 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2881 GFP_NOFS);
d1310b2e
CM
2882 if (!em || IS_ERR(em))
2883 return 0;
2884
d899e052 2885 if (em->block_start > EXTENT_MAP_LAST_BYTE)
d1310b2e
CM
2886 goto out;
2887
2888 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
d1310b2e
CM
2889out:
2890 free_extent_map(em);
2891 return sector;
2892}
2893
d1310b2e
CM
2894static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2895 unsigned long i)
2896{
2897 struct page *p;
2898 struct address_space *mapping;
2899
2900 if (i == 0)
2901 return eb->first_page;
2902 i += eb->start >> PAGE_CACHE_SHIFT;
2903 mapping = eb->first_page->mapping;
33958dc6
CM
2904 if (!mapping)
2905 return NULL;
0ee0fda0
SW
2906
2907 /*
2908 * extent_buffer_page is only called after pinning the page
2909 * by increasing the reference count. So we know the page must
2910 * be in the radix tree.
2911 */
0ee0fda0 2912 rcu_read_lock();
d1310b2e 2913 p = radix_tree_lookup(&mapping->page_tree, i);
0ee0fda0 2914 rcu_read_unlock();
2b1f55b0 2915
d1310b2e
CM
2916 return p;
2917}
2918
6af118ce 2919static inline unsigned long num_extent_pages(u64 start, u64 len)
728131d8 2920{
6af118ce
CM
2921 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2922 (start >> PAGE_CACHE_SHIFT);
728131d8
CM
2923}
2924
d1310b2e
CM
2925static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2926 u64 start,
2927 unsigned long len,
2928 gfp_t mask)
2929{
2930 struct extent_buffer *eb = NULL;
4bef0848 2931#ifdef LEAK_DEBUG
2d2ae547 2932 unsigned long flags;
4bef0848 2933#endif
d1310b2e 2934
d1310b2e 2935 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
d1310b2e
CM
2936 eb->start = start;
2937 eb->len = len;
a61e6f29 2938 mutex_init(&eb->mutex);
4bef0848 2939#ifdef LEAK_DEBUG
2d2ae547
CM
2940 spin_lock_irqsave(&leak_lock, flags);
2941 list_add(&eb->leak_list, &buffers);
2942 spin_unlock_irqrestore(&leak_lock, flags);
4bef0848 2943#endif
d1310b2e
CM
2944 atomic_set(&eb->refs, 1);
2945
2946 return eb;
2947}
2948
2949static void __free_extent_buffer(struct extent_buffer *eb)
2950{
4bef0848 2951#ifdef LEAK_DEBUG
2d2ae547
CM
2952 unsigned long flags;
2953 spin_lock_irqsave(&leak_lock, flags);
2954 list_del(&eb->leak_list);
2955 spin_unlock_irqrestore(&leak_lock, flags);
4bef0848 2956#endif
d1310b2e
CM
2957 kmem_cache_free(extent_buffer_cache, eb);
2958}
2959
2960struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2961 u64 start, unsigned long len,
2962 struct page *page0,
2963 gfp_t mask)
2964{
2965 unsigned long num_pages = num_extent_pages(start, len);
2966 unsigned long i;
2967 unsigned long index = start >> PAGE_CACHE_SHIFT;
2968 struct extent_buffer *eb;
6af118ce 2969 struct extent_buffer *exists = NULL;
d1310b2e
CM
2970 struct page *p;
2971 struct address_space *mapping = tree->mapping;
2972 int uptodate = 1;
2973
6af118ce
CM
2974 spin_lock(&tree->buffer_lock);
2975 eb = buffer_search(tree, start);
2976 if (eb) {
2977 atomic_inc(&eb->refs);
2978 spin_unlock(&tree->buffer_lock);
0f9dd46c 2979 mark_page_accessed(eb->first_page);
6af118ce
CM
2980 return eb;
2981 }
2982 spin_unlock(&tree->buffer_lock);
2983
d1310b2e 2984 eb = __alloc_extent_buffer(tree, start, len, mask);
2b114d1d 2985 if (!eb)
d1310b2e
CM
2986 return NULL;
2987
d1310b2e
CM
2988 if (page0) {
2989 eb->first_page = page0;
2990 i = 1;
2991 index++;
2992 page_cache_get(page0);
2993 mark_page_accessed(page0);
2994 set_page_extent_mapped(page0);
d1310b2e 2995 set_page_extent_head(page0, len);
f188591e 2996 uptodate = PageUptodate(page0);
d1310b2e
CM
2997 } else {
2998 i = 0;
2999 }
3000 for (; i < num_pages; i++, index++) {
3001 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
3002 if (!p) {
3003 WARN_ON(1);
6af118ce 3004 goto free_eb;
d1310b2e
CM
3005 }
3006 set_page_extent_mapped(p);
3007 mark_page_accessed(p);
3008 if (i == 0) {
3009 eb->first_page = p;
3010 set_page_extent_head(p, len);
3011 } else {
3012 set_page_private(p, EXTENT_PAGE_PRIVATE);
3013 }
3014 if (!PageUptodate(p))
3015 uptodate = 0;
3016 unlock_page(p);
3017 }
3018 if (uptodate)
3019 eb->flags |= EXTENT_UPTODATE;
3020 eb->flags |= EXTENT_BUFFER_FILLED;
3021
6af118ce
CM
3022 spin_lock(&tree->buffer_lock);
3023 exists = buffer_tree_insert(tree, start, &eb->rb_node);
3024 if (exists) {
3025 /* add one reference for the caller */
3026 atomic_inc(&exists->refs);
3027 spin_unlock(&tree->buffer_lock);
3028 goto free_eb;
3029 }
3030 spin_unlock(&tree->buffer_lock);
3031
3032 /* add one reference for the tree */
3033 atomic_inc(&eb->refs);
d1310b2e
CM
3034 return eb;
3035
6af118ce 3036free_eb:
d1310b2e 3037 if (!atomic_dec_and_test(&eb->refs))
6af118ce
CM
3038 return exists;
3039 for (index = 1; index < i; index++)
d1310b2e 3040 page_cache_release(extent_buffer_page(eb, index));
6af118ce 3041 page_cache_release(extent_buffer_page(eb, 0));
d1310b2e 3042 __free_extent_buffer(eb);
6af118ce 3043 return exists;
d1310b2e
CM
3044}
3045EXPORT_SYMBOL(alloc_extent_buffer);
3046
3047struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3048 u64 start, unsigned long len,
3049 gfp_t mask)
3050{
d1310b2e 3051 struct extent_buffer *eb;
d1310b2e 3052
6af118ce
CM
3053 spin_lock(&tree->buffer_lock);
3054 eb = buffer_search(tree, start);
3055 if (eb)
3056 atomic_inc(&eb->refs);
3057 spin_unlock(&tree->buffer_lock);
d1310b2e 3058
0f9dd46c
JB
3059 if (eb)
3060 mark_page_accessed(eb->first_page);
3061
d1310b2e 3062 return eb;
d1310b2e
CM
3063}
3064EXPORT_SYMBOL(find_extent_buffer);
3065
3066void free_extent_buffer(struct extent_buffer *eb)
3067{
d1310b2e
CM
3068 if (!eb)
3069 return;
3070
3071 if (!atomic_dec_and_test(&eb->refs))
3072 return;
3073
6af118ce 3074 WARN_ON(1);
d1310b2e
CM
3075}
3076EXPORT_SYMBOL(free_extent_buffer);
3077
3078int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3079 struct extent_buffer *eb)
3080{
3081 int set;
3082 unsigned long i;
3083 unsigned long num_pages;
3084 struct page *page;
3085
3086 u64 start = eb->start;
3087 u64 end = start + eb->len - 1;
3088
3089 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
3090 num_pages = num_extent_pages(eb->start, eb->len);
3091
3092 for (i = 0; i < num_pages; i++) {
3093 page = extent_buffer_page(eb, i);
d2c3f4f6
CM
3094 if (!set && !PageDirty(page))
3095 continue;
3096
a61e6f29 3097 lock_page(page);
d1310b2e
CM
3098 if (i == 0)
3099 set_page_extent_head(page, eb->len);
3100 else
3101 set_page_private(page, EXTENT_PAGE_PRIVATE);
3102
3103 /*
3104 * if we're on the last page or the first page and the
3105 * block isn't aligned on a page boundary, do extra checks
3106 * to make sure we don't clean page that is partially dirty
3107 */
3108 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3109 ((i == num_pages - 1) &&
3110 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3111 start = (u64)page->index << PAGE_CACHE_SHIFT;
3112 end = start + PAGE_CACHE_SIZE - 1;
3113 if (test_range_bit(tree, start, end,
3114 EXTENT_DIRTY, 0)) {
a61e6f29 3115 unlock_page(page);
d1310b2e
CM
3116 continue;
3117 }
3118 }
3119 clear_page_dirty_for_io(page);
0ee0fda0 3120 spin_lock_irq(&page->mapping->tree_lock);
d1310b2e
CM
3121 if (!PageDirty(page)) {
3122 radix_tree_tag_clear(&page->mapping->page_tree,
3123 page_index(page),
3124 PAGECACHE_TAG_DIRTY);
3125 }
0ee0fda0 3126 spin_unlock_irq(&page->mapping->tree_lock);
a61e6f29 3127 unlock_page(page);
d1310b2e
CM
3128 }
3129 return 0;
3130}
3131EXPORT_SYMBOL(clear_extent_buffer_dirty);
3132
3133int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3134 struct extent_buffer *eb)
3135{
3136 return wait_on_extent_writeback(tree, eb->start,
3137 eb->start + eb->len - 1);
3138}
3139EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
3140
3141int set_extent_buffer_dirty(struct extent_io_tree *tree,
3142 struct extent_buffer *eb)
3143{
3144 unsigned long i;
3145 unsigned long num_pages;
3146
3147 num_pages = num_extent_pages(eb->start, eb->len);
3148 for (i = 0; i < num_pages; i++) {
3149 struct page *page = extent_buffer_page(eb, i);
3150 /* writepage may need to do something special for the
3151 * first page, we have to make sure page->private is
3152 * properly set. releasepage may drop page->private
3153 * on us if the page isn't already dirty.
3154 */
a1b32a59 3155 lock_page(page);
d1310b2e 3156 if (i == 0) {
d1310b2e
CM
3157 set_page_extent_head(page, eb->len);
3158 } else if (PagePrivate(page) &&
3159 page->private != EXTENT_PAGE_PRIVATE) {
d1310b2e 3160 set_page_extent_mapped(page);
d1310b2e
CM
3161 }
3162 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
a1b32a59 3163 set_extent_dirty(tree, page_offset(page),
d397712b 3164 page_offset(page) + PAGE_CACHE_SIZE - 1,
a1b32a59
CM
3165 GFP_NOFS);
3166 unlock_page(page);
d1310b2e 3167 }
a1b32a59 3168 return 0;
d1310b2e
CM
3169}
3170EXPORT_SYMBOL(set_extent_buffer_dirty);
3171
1259ab75
CM
3172int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3173 struct extent_buffer *eb)
3174{
3175 unsigned long i;
3176 struct page *page;
3177 unsigned long num_pages;
3178
3179 num_pages = num_extent_pages(eb->start, eb->len);
3180 eb->flags &= ~EXTENT_UPTODATE;
3181
3182 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3183 GFP_NOFS);
3184 for (i = 0; i < num_pages; i++) {
3185 page = extent_buffer_page(eb, i);
33958dc6
CM
3186 if (page)
3187 ClearPageUptodate(page);
1259ab75
CM
3188 }
3189 return 0;
3190}
3191
d1310b2e
CM
3192int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3193 struct extent_buffer *eb)
3194{
3195 unsigned long i;
3196 struct page *page;
3197 unsigned long num_pages;
3198
3199 num_pages = num_extent_pages(eb->start, eb->len);
3200
3201 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3202 GFP_NOFS);
3203 for (i = 0; i < num_pages; i++) {
3204 page = extent_buffer_page(eb, i);
3205 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3206 ((i == num_pages - 1) &&
3207 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3208 check_page_uptodate(tree, page);
3209 continue;
3210 }
3211 SetPageUptodate(page);
3212 }
3213 return 0;
3214}
3215EXPORT_SYMBOL(set_extent_buffer_uptodate);
3216
ce9adaa5
CM
3217int extent_range_uptodate(struct extent_io_tree *tree,
3218 u64 start, u64 end)
3219{
3220 struct page *page;
3221 int ret;
3222 int pg_uptodate = 1;
3223 int uptodate;
3224 unsigned long index;
3225
3226 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
3227 if (ret)
3228 return 1;
d397712b 3229 while (start <= end) {
ce9adaa5
CM
3230 index = start >> PAGE_CACHE_SHIFT;
3231 page = find_get_page(tree->mapping, index);
3232 uptodate = PageUptodate(page);
3233 page_cache_release(page);
3234 if (!uptodate) {
3235 pg_uptodate = 0;
3236 break;
3237 }
3238 start += PAGE_CACHE_SIZE;
3239 }
3240 return pg_uptodate;
3241}
3242
d1310b2e 3243int extent_buffer_uptodate(struct extent_io_tree *tree,
ce9adaa5 3244 struct extent_buffer *eb)
d1310b2e 3245{
728131d8 3246 int ret = 0;
ce9adaa5
CM
3247 unsigned long num_pages;
3248 unsigned long i;
728131d8
CM
3249 struct page *page;
3250 int pg_uptodate = 1;
3251
d1310b2e 3252 if (eb->flags & EXTENT_UPTODATE)
4235298e 3253 return 1;
728131d8 3254
4235298e 3255 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
d1310b2e 3256 EXTENT_UPTODATE, 1);
4235298e
CM
3257 if (ret)
3258 return ret;
728131d8
CM
3259
3260 num_pages = num_extent_pages(eb->start, eb->len);
3261 for (i = 0; i < num_pages; i++) {
3262 page = extent_buffer_page(eb, i);
3263 if (!PageUptodate(page)) {
3264 pg_uptodate = 0;
3265 break;
3266 }
3267 }
4235298e 3268 return pg_uptodate;
d1310b2e
CM
3269}
3270EXPORT_SYMBOL(extent_buffer_uptodate);
3271
3272int read_extent_buffer_pages(struct extent_io_tree *tree,
3273 struct extent_buffer *eb,
a86c12c7 3274 u64 start, int wait,
f188591e 3275 get_extent_t *get_extent, int mirror_num)
d1310b2e
CM
3276{
3277 unsigned long i;
3278 unsigned long start_i;
3279 struct page *page;
3280 int err;
3281 int ret = 0;
ce9adaa5
CM
3282 int locked_pages = 0;
3283 int all_uptodate = 1;
3284 int inc_all_pages = 0;
d1310b2e 3285 unsigned long num_pages;
a86c12c7 3286 struct bio *bio = NULL;
c8b97818 3287 unsigned long bio_flags = 0;
a86c12c7 3288
d1310b2e
CM
3289 if (eb->flags & EXTENT_UPTODATE)
3290 return 0;
3291
ce9adaa5 3292 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
d1310b2e
CM
3293 EXTENT_UPTODATE, 1)) {
3294 return 0;
3295 }
3296
3297 if (start) {
3298 WARN_ON(start < eb->start);
3299 start_i = (start >> PAGE_CACHE_SHIFT) -
3300 (eb->start >> PAGE_CACHE_SHIFT);
3301 } else {
3302 start_i = 0;
3303 }
3304
3305 num_pages = num_extent_pages(eb->start, eb->len);
3306 for (i = start_i; i < num_pages; i++) {
3307 page = extent_buffer_page(eb, i);
d1310b2e 3308 if (!wait) {
2db04966 3309 if (!trylock_page(page))
ce9adaa5 3310 goto unlock_exit;
d1310b2e
CM
3311 } else {
3312 lock_page(page);
3313 }
ce9adaa5 3314 locked_pages++;
d397712b 3315 if (!PageUptodate(page))
ce9adaa5 3316 all_uptodate = 0;
ce9adaa5
CM
3317 }
3318 if (all_uptodate) {
3319 if (start_i == 0)
3320 eb->flags |= EXTENT_UPTODATE;
3321 goto unlock_exit;
3322 }
3323
3324 for (i = start_i; i < num_pages; i++) {
3325 page = extent_buffer_page(eb, i);
3326 if (inc_all_pages)
3327 page_cache_get(page);
3328 if (!PageUptodate(page)) {
3329 if (start_i == 0)
3330 inc_all_pages = 1;
f188591e 3331 ClearPageError(page);
a86c12c7 3332 err = __extent_read_full_page(tree, page,
f188591e 3333 get_extent, &bio,
c8b97818 3334 mirror_num, &bio_flags);
d397712b 3335 if (err)
d1310b2e 3336 ret = err;
d1310b2e
CM
3337 } else {
3338 unlock_page(page);
3339 }
3340 }
3341
a86c12c7 3342 if (bio)
c8b97818 3343 submit_one_bio(READ, bio, mirror_num, bio_flags);
a86c12c7 3344
d397712b 3345 if (ret || !wait)
d1310b2e 3346 return ret;
d397712b 3347
d1310b2e
CM
3348 for (i = start_i; i < num_pages; i++) {
3349 page = extent_buffer_page(eb, i);
3350 wait_on_page_locked(page);
d397712b 3351 if (!PageUptodate(page))
d1310b2e 3352 ret = -EIO;
d1310b2e 3353 }
d397712b 3354
d1310b2e
CM
3355 if (!ret)
3356 eb->flags |= EXTENT_UPTODATE;
3357 return ret;
ce9adaa5
CM
3358
3359unlock_exit:
3360 i = start_i;
d397712b 3361 while (locked_pages > 0) {
ce9adaa5
CM
3362 page = extent_buffer_page(eb, i);
3363 i++;
3364 unlock_page(page);
3365 locked_pages--;
3366 }
3367 return ret;
d1310b2e
CM
3368}
3369EXPORT_SYMBOL(read_extent_buffer_pages);
3370
3371void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3372 unsigned long start,
3373 unsigned long len)
3374{
3375 size_t cur;
3376 size_t offset;
3377 struct page *page;
3378 char *kaddr;
3379 char *dst = (char *)dstv;
3380 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3381 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
d1310b2e
CM
3382
3383 WARN_ON(start > eb->len);
3384 WARN_ON(start + len > eb->start + eb->len);
3385
3386 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3387
d397712b 3388 while (len > 0) {
d1310b2e 3389 page = extent_buffer_page(eb, i);
d1310b2e
CM
3390
3391 cur = min(len, (PAGE_CACHE_SIZE - offset));
3392 kaddr = kmap_atomic(page, KM_USER1);
3393 memcpy(dst, kaddr + offset, cur);
3394 kunmap_atomic(kaddr, KM_USER1);
3395
3396 dst += cur;
3397 len -= cur;
3398 offset = 0;
3399 i++;
3400 }
3401}
3402EXPORT_SYMBOL(read_extent_buffer);
3403
3404int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3405 unsigned long min_len, char **token, char **map,
3406 unsigned long *map_start,
3407 unsigned long *map_len, int km)
3408{
3409 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3410 char *kaddr;
3411 struct page *p;
3412 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3413 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3414 unsigned long end_i = (start_offset + start + min_len - 1) >>
3415 PAGE_CACHE_SHIFT;
3416
3417 if (i != end_i)
3418 return -EINVAL;
3419
3420 if (i == 0) {
3421 offset = start_offset;
3422 *map_start = 0;
3423 } else {
3424 offset = 0;
3425 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3426 }
d397712b 3427
d1310b2e 3428 if (start + min_len > eb->len) {
d397712b
CM
3429 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
3430 "wanted %lu %lu\n", (unsigned long long)eb->start,
3431 eb->len, start, min_len);
d1310b2e
CM
3432 WARN_ON(1);
3433 }
3434
3435 p = extent_buffer_page(eb, i);
d1310b2e
CM
3436 kaddr = kmap_atomic(p, km);
3437 *token = kaddr;
3438 *map = kaddr + offset;
3439 *map_len = PAGE_CACHE_SIZE - offset;
3440 return 0;
3441}
3442EXPORT_SYMBOL(map_private_extent_buffer);
3443
3444int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3445 unsigned long min_len,
3446 char **token, char **map,
3447 unsigned long *map_start,
3448 unsigned long *map_len, int km)
3449{
3450 int err;
3451 int save = 0;
3452 if (eb->map_token) {
3453 unmap_extent_buffer(eb, eb->map_token, km);
3454 eb->map_token = NULL;
3455 save = 1;
934d375b 3456 WARN_ON(!mutex_is_locked(&eb->mutex));
d1310b2e
CM
3457 }
3458 err = map_private_extent_buffer(eb, start, min_len, token, map,
3459 map_start, map_len, km);
3460 if (!err && save) {
3461 eb->map_token = *token;
3462 eb->kaddr = *map;
3463 eb->map_start = *map_start;
3464 eb->map_len = *map_len;
3465 }
3466 return err;
3467}
3468EXPORT_SYMBOL(map_extent_buffer);
3469
3470void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3471{
3472 kunmap_atomic(token, km);
3473}
3474EXPORT_SYMBOL(unmap_extent_buffer);
3475
3476int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3477 unsigned long start,
3478 unsigned long len)
3479{
3480 size_t cur;
3481 size_t offset;
3482 struct page *page;
3483 char *kaddr;
3484 char *ptr = (char *)ptrv;
3485 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3486 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3487 int ret = 0;
3488
3489 WARN_ON(start > eb->len);
3490 WARN_ON(start + len > eb->start + eb->len);
3491
3492 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3493
d397712b 3494 while (len > 0) {
d1310b2e 3495 page = extent_buffer_page(eb, i);
d1310b2e
CM
3496
3497 cur = min(len, (PAGE_CACHE_SIZE - offset));
3498
3499 kaddr = kmap_atomic(page, KM_USER0);
3500 ret = memcmp(ptr, kaddr + offset, cur);
3501 kunmap_atomic(kaddr, KM_USER0);
3502 if (ret)
3503 break;
3504
3505 ptr += cur;
3506 len -= cur;
3507 offset = 0;
3508 i++;
3509 }
3510 return ret;
3511}
3512EXPORT_SYMBOL(memcmp_extent_buffer);
3513
3514void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3515 unsigned long start, unsigned long len)
3516{
3517 size_t cur;
3518 size_t offset;
3519 struct page *page;
3520 char *kaddr;
3521 char *src = (char *)srcv;
3522 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3523 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3524
3525 WARN_ON(start > eb->len);
3526 WARN_ON(start + len > eb->start + eb->len);
3527
3528 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3529
d397712b 3530 while (len > 0) {
d1310b2e
CM
3531 page = extent_buffer_page(eb, i);
3532 WARN_ON(!PageUptodate(page));
3533
3534 cur = min(len, PAGE_CACHE_SIZE - offset);
3535 kaddr = kmap_atomic(page, KM_USER1);
3536 memcpy(kaddr + offset, src, cur);
3537 kunmap_atomic(kaddr, KM_USER1);
3538
3539 src += cur;
3540 len -= cur;
3541 offset = 0;
3542 i++;
3543 }
3544}
3545EXPORT_SYMBOL(write_extent_buffer);
3546
3547void memset_extent_buffer(struct extent_buffer *eb, char c,
3548 unsigned long start, unsigned long len)
3549{
3550 size_t cur;
3551 size_t offset;
3552 struct page *page;
3553 char *kaddr;
3554 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3555 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3556
3557 WARN_ON(start > eb->len);
3558 WARN_ON(start + len > eb->start + eb->len);
3559
3560 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3561
d397712b 3562 while (len > 0) {
d1310b2e
CM
3563 page = extent_buffer_page(eb, i);
3564 WARN_ON(!PageUptodate(page));
3565
3566 cur = min(len, PAGE_CACHE_SIZE - offset);
3567 kaddr = kmap_atomic(page, KM_USER0);
3568 memset(kaddr + offset, c, cur);
3569 kunmap_atomic(kaddr, KM_USER0);
3570
3571 len -= cur;
3572 offset = 0;
3573 i++;
3574 }
3575}
3576EXPORT_SYMBOL(memset_extent_buffer);
3577
3578void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3579 unsigned long dst_offset, unsigned long src_offset,
3580 unsigned long len)
3581{
3582 u64 dst_len = dst->len;
3583 size_t cur;
3584 size_t offset;
3585 struct page *page;
3586 char *kaddr;
3587 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3588 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3589
3590 WARN_ON(src->len != dst_len);
3591
3592 offset = (start_offset + dst_offset) &
3593 ((unsigned long)PAGE_CACHE_SIZE - 1);
3594
d397712b 3595 while (len > 0) {
d1310b2e
CM
3596 page = extent_buffer_page(dst, i);
3597 WARN_ON(!PageUptodate(page));
3598
3599 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3600
3601 kaddr = kmap_atomic(page, KM_USER0);
3602 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3603 kunmap_atomic(kaddr, KM_USER0);
3604
3605 src_offset += cur;
3606 len -= cur;
3607 offset = 0;
3608 i++;
3609 }
3610}
3611EXPORT_SYMBOL(copy_extent_buffer);
3612
3613static void move_pages(struct page *dst_page, struct page *src_page,
3614 unsigned long dst_off, unsigned long src_off,
3615 unsigned long len)
3616{
3617 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3618 if (dst_page == src_page) {
3619 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3620 } else {
3621 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3622 char *p = dst_kaddr + dst_off + len;
3623 char *s = src_kaddr + src_off + len;
3624
3625 while (len--)
3626 *--p = *--s;
3627
3628 kunmap_atomic(src_kaddr, KM_USER1);
3629 }
3630 kunmap_atomic(dst_kaddr, KM_USER0);
3631}
3632
3633static void copy_pages(struct page *dst_page, struct page *src_page,
3634 unsigned long dst_off, unsigned long src_off,
3635 unsigned long len)
3636{
3637 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3638 char *src_kaddr;
3639
3640 if (dst_page != src_page)
3641 src_kaddr = kmap_atomic(src_page, KM_USER1);
3642 else
3643 src_kaddr = dst_kaddr;
3644
3645 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3646 kunmap_atomic(dst_kaddr, KM_USER0);
3647 if (dst_page != src_page)
3648 kunmap_atomic(src_kaddr, KM_USER1);
3649}
3650
3651void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3652 unsigned long src_offset, unsigned long len)
3653{
3654 size_t cur;
3655 size_t dst_off_in_page;
3656 size_t src_off_in_page;
3657 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3658 unsigned long dst_i;
3659 unsigned long src_i;
3660
3661 if (src_offset + len > dst->len) {
d397712b
CM
3662 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3663 "len %lu dst len %lu\n", src_offset, len, dst->len);
d1310b2e
CM
3664 BUG_ON(1);
3665 }
3666 if (dst_offset + len > dst->len) {
d397712b
CM
3667 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3668 "len %lu dst len %lu\n", dst_offset, len, dst->len);
d1310b2e
CM
3669 BUG_ON(1);
3670 }
3671
d397712b 3672 while (len > 0) {
d1310b2e
CM
3673 dst_off_in_page = (start_offset + dst_offset) &
3674 ((unsigned long)PAGE_CACHE_SIZE - 1);
3675 src_off_in_page = (start_offset + src_offset) &
3676 ((unsigned long)PAGE_CACHE_SIZE - 1);
3677
3678 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3679 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3680
3681 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3682 src_off_in_page));
3683 cur = min_t(unsigned long, cur,
3684 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3685
3686 copy_pages(extent_buffer_page(dst, dst_i),
3687 extent_buffer_page(dst, src_i),
3688 dst_off_in_page, src_off_in_page, cur);
3689
3690 src_offset += cur;
3691 dst_offset += cur;
3692 len -= cur;
3693 }
3694}
3695EXPORT_SYMBOL(memcpy_extent_buffer);
3696
3697void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3698 unsigned long src_offset, unsigned long len)
3699{
3700 size_t cur;
3701 size_t dst_off_in_page;
3702 size_t src_off_in_page;
3703 unsigned long dst_end = dst_offset + len - 1;
3704 unsigned long src_end = src_offset + len - 1;
3705 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3706 unsigned long dst_i;
3707 unsigned long src_i;
3708
3709 if (src_offset + len > dst->len) {
d397712b
CM
3710 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3711 "len %lu len %lu\n", src_offset, len, dst->len);
d1310b2e
CM
3712 BUG_ON(1);
3713 }
3714 if (dst_offset + len > dst->len) {
d397712b
CM
3715 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3716 "len %lu len %lu\n", dst_offset, len, dst->len);
d1310b2e
CM
3717 BUG_ON(1);
3718 }
3719 if (dst_offset < src_offset) {
3720 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3721 return;
3722 }
d397712b 3723 while (len > 0) {
d1310b2e
CM
3724 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3725 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3726
3727 dst_off_in_page = (start_offset + dst_end) &
3728 ((unsigned long)PAGE_CACHE_SIZE - 1);
3729 src_off_in_page = (start_offset + src_end) &
3730 ((unsigned long)PAGE_CACHE_SIZE - 1);
3731
3732 cur = min_t(unsigned long, len, src_off_in_page + 1);
3733 cur = min(cur, dst_off_in_page + 1);
3734 move_pages(extent_buffer_page(dst, dst_i),
3735 extent_buffer_page(dst, src_i),
3736 dst_off_in_page - cur + 1,
3737 src_off_in_page - cur + 1, cur);
3738
3739 dst_end -= cur;
3740 src_end -= cur;
3741 len -= cur;
3742 }
3743}
3744EXPORT_SYMBOL(memmove_extent_buffer);
6af118ce
CM
3745
3746int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3747{
3748 u64 start = page_offset(page);
3749 struct extent_buffer *eb;
3750 int ret = 1;
3751 unsigned long i;
3752 unsigned long num_pages;
3753
3754 spin_lock(&tree->buffer_lock);
3755 eb = buffer_search(tree, start);
3756 if (!eb)
3757 goto out;
3758
3759 if (atomic_read(&eb->refs) > 1) {
3760 ret = 0;
3761 goto out;
3762 }
3763 /* at this point we can safely release the extent buffer */
3764 num_pages = num_extent_pages(eb->start, eb->len);
b214107e
CH
3765 for (i = 0; i < num_pages; i++)
3766 page_cache_release(extent_buffer_page(eb, i));
6af118ce
CM
3767 rb_erase(&eb->rb_node, &tree->buffer);
3768 __free_extent_buffer(eb);
3769out:
3770 spin_unlock(&tree->buffer_lock);
3771 return ret;
3772}
3773EXPORT_SYMBOL(try_release_extent_buffer);
This page took 0.23154 seconds and 5 git commands to generate.