2 * Copyright (C) 2015 Facebook. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/kernel.h>
20 #include <linux/vmalloc.h>
24 #include "free-space-tree.h"
25 #include "transaction.h"
27 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
28 struct btrfs_fs_info
*fs_info
,
29 struct btrfs_block_group_cache
*block_group
,
30 struct btrfs_path
*path
);
32 void set_free_space_tree_thresholds(struct btrfs_block_group_cache
*cache
)
36 u64 num_bitmaps
, total_bitmap_size
;
39 * We convert to bitmaps when the disk space required for using extents
40 * exceeds that required for using bitmaps.
42 bitmap_range
= cache
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
43 num_bitmaps
= div_u64(cache
->key
.offset
+ bitmap_range
- 1,
45 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
46 total_bitmap_size
= num_bitmaps
* bitmap_size
;
47 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
48 sizeof(struct btrfs_item
));
51 * We allow for a small buffer between the high threshold and low
52 * threshold to avoid thrashing back and forth between the two formats.
54 if (cache
->bitmap_high_thresh
> 100)
55 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
57 cache
->bitmap_low_thresh
= 0;
60 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
61 struct btrfs_fs_info
*fs_info
,
62 struct btrfs_block_group_cache
*block_group
,
63 struct btrfs_path
*path
)
65 struct btrfs_root
*root
= fs_info
->free_space_root
;
66 struct btrfs_free_space_info
*info
;
68 struct extent_buffer
*leaf
;
71 key
.objectid
= block_group
->key
.objectid
;
72 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
73 key
.offset
= block_group
->key
.offset
;
75 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
79 leaf
= path
->nodes
[0];
80 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
81 struct btrfs_free_space_info
);
82 btrfs_set_free_space_extent_count(leaf
, info
, 0);
83 btrfs_set_free_space_flags(leaf
, info
, 0);
84 btrfs_mark_buffer_dirty(leaf
);
88 btrfs_release_path(path
);
92 struct btrfs_free_space_info
*
93 search_free_space_info(struct btrfs_trans_handle
*trans
,
94 struct btrfs_fs_info
*fs_info
,
95 struct btrfs_block_group_cache
*block_group
,
96 struct btrfs_path
*path
, int cow
)
98 struct btrfs_root
*root
= fs_info
->free_space_root
;
102 key
.objectid
= block_group
->key
.objectid
;
103 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
104 key
.offset
= block_group
->key
.offset
;
106 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
110 btrfs_warn(fs_info
, "missing free space info for %llu\n",
111 block_group
->key
.objectid
);
113 return ERR_PTR(-ENOENT
);
116 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
117 struct btrfs_free_space_info
);
121 * btrfs_search_slot() but we're looking for the greatest key less than the
124 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
125 struct btrfs_root
*root
,
126 struct btrfs_key
*key
, struct btrfs_path
*p
,
127 int ins_len
, int cow
)
131 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
140 if (p
->slots
[0] == 0) {
149 static inline u32
free_space_bitmap_size(u64 size
, u32 sectorsize
)
151 return DIV_ROUND_UP((u32
)div_u64(size
, sectorsize
), BITS_PER_BYTE
);
154 static unsigned long *alloc_bitmap(u32 bitmap_size
)
156 return __vmalloc(bitmap_size
, GFP_NOFS
| __GFP_HIGHMEM
| __GFP_ZERO
,
160 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
161 struct btrfs_fs_info
*fs_info
,
162 struct btrfs_block_group_cache
*block_group
,
163 struct btrfs_path
*path
)
165 struct btrfs_root
*root
= fs_info
->free_space_root
;
166 struct btrfs_free_space_info
*info
;
167 struct btrfs_key key
, found_key
;
168 struct extent_buffer
*leaf
;
169 unsigned long *bitmap
;
173 u32 bitmap_size
, flags
, expected_extent_count
;
174 u32 extent_count
= 0;
178 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
179 block_group
->sectorsize
);
180 bitmap
= alloc_bitmap(bitmap_size
);
186 start
= block_group
->key
.objectid
;
187 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
189 key
.objectid
= end
- 1;
191 key
.offset
= (u64
)-1;
194 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
198 leaf
= path
->nodes
[0];
201 while (path
->slots
[0] > 0) {
202 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
204 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
205 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
206 ASSERT(found_key
.offset
== block_group
->key
.offset
);
209 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
212 ASSERT(found_key
.objectid
>= start
);
213 ASSERT(found_key
.objectid
< end
);
214 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
216 first
= div_u64(found_key
.objectid
- start
,
217 block_group
->sectorsize
);
218 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
219 block_group
->sectorsize
);
220 bitmap_set(bitmap
, first
, last
- first
);
230 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
233 btrfs_release_path(path
);
236 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
241 leaf
= path
->nodes
[0];
242 flags
= btrfs_free_space_flags(leaf
, info
);
243 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
244 btrfs_set_free_space_flags(leaf
, info
, flags
);
245 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
246 btrfs_mark_buffer_dirty(leaf
);
247 btrfs_release_path(path
);
249 if (extent_count
!= expected_extent_count
) {
250 btrfs_err(fs_info
, "incorrect extent count for %llu; counted %u, expected %u",
251 block_group
->key
.objectid
, extent_count
,
252 expected_extent_count
);
258 bitmap_cursor
= (char *)bitmap
;
259 bitmap_range
= block_group
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
266 extent_size
= min(end
- i
, bitmap_range
);
267 data_size
= free_space_bitmap_size(extent_size
,
268 block_group
->sectorsize
);
271 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
272 key
.offset
= extent_size
;
274 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
279 leaf
= path
->nodes
[0];
280 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
281 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
283 btrfs_mark_buffer_dirty(leaf
);
284 btrfs_release_path(path
);
287 bitmap_cursor
+= data_size
;
294 btrfs_abort_transaction(trans
, root
, ret
);
298 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
299 struct btrfs_fs_info
*fs_info
,
300 struct btrfs_block_group_cache
*block_group
,
301 struct btrfs_path
*path
)
303 struct btrfs_root
*root
= fs_info
->free_space_root
;
304 struct btrfs_free_space_info
*info
;
305 struct btrfs_key key
, found_key
;
306 struct extent_buffer
*leaf
;
307 unsigned long *bitmap
;
309 /* Initialize to silence GCC. */
310 u64 extent_start
= 0;
312 u32 bitmap_size
, flags
, expected_extent_count
;
313 int prev_bit
= 0, bit
, bitnr
;
314 u32 extent_count
= 0;
318 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
319 block_group
->sectorsize
);
320 bitmap
= alloc_bitmap(bitmap_size
);
326 start
= block_group
->key
.objectid
;
327 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
329 key
.objectid
= end
- 1;
331 key
.offset
= (u64
)-1;
334 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
338 leaf
= path
->nodes
[0];
341 while (path
->slots
[0] > 0) {
342 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
344 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
345 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
346 ASSERT(found_key
.offset
== block_group
->key
.offset
);
349 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
352 u32 bitmap_pos
, data_size
;
354 ASSERT(found_key
.objectid
>= start
);
355 ASSERT(found_key
.objectid
< end
);
356 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
358 bitmap_pos
= div_u64(found_key
.objectid
- start
,
359 block_group
->sectorsize
*
361 bitmap_cursor
= ((char *)bitmap
) + bitmap_pos
;
362 data_size
= free_space_bitmap_size(found_key
.offset
,
363 block_group
->sectorsize
);
365 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
366 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
376 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
379 btrfs_release_path(path
);
382 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
387 leaf
= path
->nodes
[0];
388 flags
= btrfs_free_space_flags(leaf
, info
);
389 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
390 btrfs_set_free_space_flags(leaf
, info
, flags
);
391 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
392 btrfs_mark_buffer_dirty(leaf
);
393 btrfs_release_path(path
);
397 while (offset
< end
) {
398 bit
= !!test_bit(bitnr
, bitmap
);
399 if (prev_bit
== 0 && bit
== 1) {
400 extent_start
= offset
;
401 } else if (prev_bit
== 1 && bit
== 0) {
402 key
.objectid
= extent_start
;
403 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
404 key
.offset
= offset
- extent_start
;
406 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
409 btrfs_release_path(path
);
414 offset
+= block_group
->sectorsize
;
418 key
.objectid
= extent_start
;
419 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
420 key
.offset
= end
- extent_start
;
422 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
425 btrfs_release_path(path
);
430 if (extent_count
!= expected_extent_count
) {
431 btrfs_err(fs_info
, "incorrect extent count for %llu; counted %u, expected %u",
432 block_group
->key
.objectid
, extent_count
,
433 expected_extent_count
);
443 btrfs_abort_transaction(trans
, root
, ret
);
447 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
448 struct btrfs_fs_info
*fs_info
,
449 struct btrfs_block_group_cache
*block_group
,
450 struct btrfs_path
*path
,
453 struct btrfs_free_space_info
*info
;
458 if (new_extents
== 0)
461 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
466 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
467 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
469 extent_count
+= new_extents
;
470 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
471 btrfs_mark_buffer_dirty(path
->nodes
[0]);
472 btrfs_release_path(path
);
474 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
475 extent_count
> block_group
->bitmap_high_thresh
) {
476 ret
= convert_free_space_to_bitmaps(trans
, fs_info
, block_group
,
478 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
479 extent_count
< block_group
->bitmap_low_thresh
) {
480 ret
= convert_free_space_to_extents(trans
, fs_info
, block_group
,
488 int free_space_test_bit(struct btrfs_block_group_cache
*block_group
,
489 struct btrfs_path
*path
, u64 offset
)
491 struct extent_buffer
*leaf
;
492 struct btrfs_key key
;
493 u64 found_start
, found_end
;
494 unsigned long ptr
, i
;
496 leaf
= path
->nodes
[0];
497 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
498 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
500 found_start
= key
.objectid
;
501 found_end
= key
.objectid
+ key
.offset
;
502 ASSERT(offset
>= found_start
&& offset
< found_end
);
504 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
505 i
= div_u64(offset
- found_start
, block_group
->sectorsize
);
506 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
509 static void free_space_set_bits(struct btrfs_block_group_cache
*block_group
,
510 struct btrfs_path
*path
, u64
*start
, u64
*size
,
513 struct extent_buffer
*leaf
;
514 struct btrfs_key key
;
515 u64 end
= *start
+ *size
;
516 u64 found_start
, found_end
;
517 unsigned long ptr
, first
, last
;
519 leaf
= path
->nodes
[0];
520 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
521 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
523 found_start
= key
.objectid
;
524 found_end
= key
.objectid
+ key
.offset
;
525 ASSERT(*start
>= found_start
&& *start
< found_end
);
526 ASSERT(end
> found_start
);
531 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
532 first
= div_u64(*start
- found_start
, block_group
->sectorsize
);
533 last
= div_u64(end
- found_start
, block_group
->sectorsize
);
535 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
537 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
538 btrfs_mark_buffer_dirty(leaf
);
540 *size
-= end
- *start
;
545 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
546 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
547 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
550 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
551 struct btrfs_root
*root
, struct btrfs_path
*p
)
553 struct btrfs_key key
;
555 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
560 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
561 btrfs_release_path(p
);
563 key
.objectid
+= key
.offset
;
565 key
.offset
= (u64
)-1;
567 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
571 * If remove is 1, then we are removing free space, thus clearing bits in the
572 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
575 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
576 struct btrfs_fs_info
*fs_info
,
577 struct btrfs_block_group_cache
*block_group
,
578 struct btrfs_path
*path
,
579 u64 start
, u64 size
, int remove
)
581 struct btrfs_root
*root
= fs_info
->free_space_root
;
582 struct btrfs_key key
;
583 u64 end
= start
+ size
;
584 u64 cur_start
, cur_size
;
585 int prev_bit
, next_bit
;
590 * Read the bit for the block immediately before the extent of space if
591 * that block is within the block group.
593 if (start
> block_group
->key
.objectid
) {
594 u64 prev_block
= start
- block_group
->sectorsize
;
596 key
.objectid
= prev_block
;
598 key
.offset
= (u64
)-1;
600 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
604 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
606 /* The previous block may have been in the previous bitmap. */
607 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
608 if (start
>= key
.objectid
+ key
.offset
) {
609 ret
= free_space_next_bitmap(trans
, root
, path
);
614 key
.objectid
= start
;
616 key
.offset
= (u64
)-1;
618 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
626 * Iterate over all of the bitmaps overlapped by the extent of space,
627 * clearing/setting bits as required.
632 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
636 ret
= free_space_next_bitmap(trans
, root
, path
);
642 * Read the bit for the block immediately after the extent of space if
643 * that block is within the block group.
645 if (end
< block_group
->key
.objectid
+ block_group
->key
.offset
) {
646 /* The next block may be in the next bitmap. */
647 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
648 if (end
>= key
.objectid
+ key
.offset
) {
649 ret
= free_space_next_bitmap(trans
, root
, path
);
654 next_bit
= free_space_test_bit(block_group
, path
, end
);
662 /* Leftover on the left. */
666 /* Leftover on the right. */
672 /* Merging with neighbor on the left. */
676 /* Merging with neighbor on the right. */
681 btrfs_release_path(path
);
682 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
689 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
690 struct btrfs_fs_info
*fs_info
,
691 struct btrfs_block_group_cache
*block_group
,
692 struct btrfs_path
*path
,
695 struct btrfs_root
*root
= fs_info
->free_space_root
;
696 struct btrfs_key key
;
697 u64 found_start
, found_end
;
698 u64 end
= start
+ size
;
699 int new_extents
= -1;
702 key
.objectid
= start
;
704 key
.offset
= (u64
)-1;
706 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
710 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
712 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
714 found_start
= key
.objectid
;
715 found_end
= key
.objectid
+ key
.offset
;
716 ASSERT(start
>= found_start
&& end
<= found_end
);
719 * Okay, now that we've found the free space extent which contains the
720 * free space that we are removing, there are four cases:
722 * 1. We're using the whole extent: delete the key we found and
723 * decrement the free space extent count.
724 * 2. We are using part of the extent starting at the beginning: delete
725 * the key we found and insert a new key representing the leftover at
726 * the end. There is no net change in the number of extents.
727 * 3. We are using part of the extent ending at the end: delete the key
728 * we found and insert a new key representing the leftover at the
729 * beginning. There is no net change in the number of extents.
730 * 4. We are using part of the extent in the middle: delete the key we
731 * found and insert two new keys representing the leftovers on each
732 * side. Where we used to have one extent, we now have two, so increment
733 * the extent count. We may need to convert the block group to bitmaps
737 /* Delete the existing key (cases 1-4). */
738 ret
= btrfs_del_item(trans
, root
, path
);
742 /* Add a key for leftovers at the beginning (cases 3 and 4). */
743 if (start
> found_start
) {
744 key
.objectid
= found_start
;
745 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
746 key
.offset
= start
- found_start
;
748 btrfs_release_path(path
);
749 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
755 /* Add a key for leftovers at the end (cases 2 and 4). */
756 if (end
< found_end
) {
758 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
759 key
.offset
= found_end
- end
;
761 btrfs_release_path(path
);
762 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
768 btrfs_release_path(path
);
769 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
776 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
777 struct btrfs_fs_info
*fs_info
,
778 struct btrfs_block_group_cache
*block_group
,
779 struct btrfs_path
*path
, u64 start
, u64 size
)
781 struct btrfs_free_space_info
*info
;
785 if (block_group
->needs_free_space
) {
786 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
792 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
794 return PTR_ERR(info
);
795 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
796 btrfs_release_path(path
);
798 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
799 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
800 path
, start
, size
, 1);
802 return remove_free_space_extent(trans
, fs_info
, block_group
,
807 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
808 struct btrfs_fs_info
*fs_info
,
811 struct btrfs_block_group_cache
*block_group
;
812 struct btrfs_path
*path
;
815 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
818 path
= btrfs_alloc_path();
824 block_group
= btrfs_lookup_block_group(fs_info
, start
);
831 mutex_lock(&block_group
->free_space_lock
);
832 ret
= __remove_from_free_space_tree(trans
, fs_info
, block_group
, path
,
834 mutex_unlock(&block_group
->free_space_lock
);
836 btrfs_put_block_group(block_group
);
838 btrfs_free_path(path
);
840 btrfs_abort_transaction(trans
, fs_info
->free_space_root
, ret
);
844 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
845 struct btrfs_fs_info
*fs_info
,
846 struct btrfs_block_group_cache
*block_group
,
847 struct btrfs_path
*path
,
850 struct btrfs_root
*root
= fs_info
->free_space_root
;
851 struct btrfs_key key
, new_key
;
852 u64 found_start
, found_end
;
853 u64 end
= start
+ size
;
858 * We are adding a new extent of free space, but we need to merge
859 * extents. There are four cases here:
861 * 1. The new extent does not have any immediate neighbors to merge
862 * with: add the new key and increment the free space extent count. We
863 * may need to convert the block group to bitmaps as a result.
864 * 2. The new extent has an immediate neighbor before it: remove the
865 * previous key and insert a new key combining both of them. There is no
866 * net change in the number of extents.
867 * 3. The new extent has an immediate neighbor after it: remove the next
868 * key and insert a new key combining both of them. There is no net
869 * change in the number of extents.
870 * 4. The new extent has immediate neighbors on both sides: remove both
871 * of the keys and insert a new key combining all of them. Where we used
872 * to have two extents, we now have one, so decrement the extent count.
875 new_key
.objectid
= start
;
876 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
877 new_key
.offset
= size
;
879 /* Search for a neighbor on the left. */
880 if (start
== block_group
->key
.objectid
)
882 key
.objectid
= start
- 1;
884 key
.offset
= (u64
)-1;
886 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
890 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
892 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
893 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
894 btrfs_release_path(path
);
898 found_start
= key
.objectid
;
899 found_end
= key
.objectid
+ key
.offset
;
900 ASSERT(found_start
>= block_group
->key
.objectid
&&
901 found_end
> block_group
->key
.objectid
);
902 ASSERT(found_start
< start
&& found_end
<= start
);
905 * Delete the neighbor on the left and absorb it into the new key (cases
908 if (found_end
== start
) {
909 ret
= btrfs_del_item(trans
, root
, path
);
912 new_key
.objectid
= found_start
;
913 new_key
.offset
+= key
.offset
;
916 btrfs_release_path(path
);
919 /* Search for a neighbor on the right. */
920 if (end
== block_group
->key
.objectid
+ block_group
->key
.offset
)
924 key
.offset
= (u64
)-1;
926 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
930 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
932 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
933 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
934 btrfs_release_path(path
);
938 found_start
= key
.objectid
;
939 found_end
= key
.objectid
+ key
.offset
;
940 ASSERT(found_start
>= block_group
->key
.objectid
&&
941 found_end
> block_group
->key
.objectid
);
942 ASSERT((found_start
< start
&& found_end
<= start
) ||
943 (found_start
>= end
&& found_end
> end
));
946 * Delete the neighbor on the right and absorb it into the new key
949 if (found_start
== end
) {
950 ret
= btrfs_del_item(trans
, root
, path
);
953 new_key
.offset
+= key
.offset
;
956 btrfs_release_path(path
);
959 /* Insert the new key (cases 1-4). */
960 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
964 btrfs_release_path(path
);
965 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
972 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
973 struct btrfs_fs_info
*fs_info
,
974 struct btrfs_block_group_cache
*block_group
,
975 struct btrfs_path
*path
, u64 start
, u64 size
)
977 struct btrfs_free_space_info
*info
;
981 if (block_group
->needs_free_space
) {
982 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
988 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
990 return PTR_ERR(info
);
991 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
992 btrfs_release_path(path
);
994 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
995 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
996 path
, start
, size
, 0);
998 return add_free_space_extent(trans
, fs_info
, block_group
, path
,
1003 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1004 struct btrfs_fs_info
*fs_info
,
1005 u64 start
, u64 size
)
1007 struct btrfs_block_group_cache
*block_group
;
1008 struct btrfs_path
*path
;
1011 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1014 path
= btrfs_alloc_path();
1020 block_group
= btrfs_lookup_block_group(fs_info
, start
);
1027 mutex_lock(&block_group
->free_space_lock
);
1028 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
, path
, start
,
1030 mutex_unlock(&block_group
->free_space_lock
);
1032 btrfs_put_block_group(block_group
);
1034 btrfs_free_path(path
);
1036 btrfs_abort_transaction(trans
, fs_info
->free_space_root
, ret
);
1041 * Populate the free space tree by walking the extent tree. Operations on the
1042 * extent tree that happen as a result of writes to the free space tree will go
1043 * through the normal add/remove hooks.
1045 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1046 struct btrfs_fs_info
*fs_info
,
1047 struct btrfs_block_group_cache
*block_group
)
1049 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
1050 struct btrfs_path
*path
, *path2
;
1051 struct btrfs_key key
;
1055 path
= btrfs_alloc_path();
1060 path2
= btrfs_alloc_path();
1062 btrfs_free_path(path
);
1066 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path2
);
1071 * Iterate through all of the extent and metadata items in this block
1072 * group, adding the free space between them and the free space at the
1073 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1074 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1077 key
.objectid
= block_group
->key
.objectid
;
1078 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1081 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1086 start
= block_group
->key
.objectid
;
1087 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1089 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1091 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1092 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1093 if (key
.objectid
>= end
)
1096 if (start
< key
.objectid
) {
1097 ret
= __add_to_free_space_tree(trans
, fs_info
,
1105 start
= key
.objectid
;
1106 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1107 start
+= fs_info
->tree_root
->nodesize
;
1109 start
+= key
.offset
;
1110 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1111 if (key
.objectid
!= block_group
->key
.objectid
)
1115 ret
= btrfs_next_item(extent_root
, path
);
1122 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
,
1123 path2
, start
, end
- start
);
1130 btrfs_free_path(path2
);
1131 btrfs_free_path(path
);
1135 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1137 struct btrfs_trans_handle
*trans
;
1138 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1139 struct btrfs_root
*free_space_root
;
1140 struct btrfs_block_group_cache
*block_group
;
1141 struct rb_node
*node
;
1144 trans
= btrfs_start_transaction(tree_root
, 0);
1146 return PTR_ERR(trans
);
1148 free_space_root
= btrfs_create_tree(trans
, fs_info
,
1149 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1150 if (IS_ERR(free_space_root
)) {
1151 ret
= PTR_ERR(free_space_root
);
1154 fs_info
->free_space_root
= free_space_root
;
1156 node
= rb_first(&fs_info
->block_group_cache_tree
);
1158 block_group
= rb_entry(node
, struct btrfs_block_group_cache
,
1160 ret
= populate_free_space_tree(trans
, fs_info
, block_group
);
1163 node
= rb_next(node
);
1166 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1168 ret
= btrfs_commit_transaction(trans
, tree_root
);
1175 btrfs_abort_transaction(trans
, tree_root
, ret
);
1176 btrfs_end_transaction(trans
, tree_root
);
1180 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1181 struct btrfs_root
*root
)
1183 struct btrfs_path
*path
;
1184 struct btrfs_key key
;
1188 path
= btrfs_alloc_path();
1192 path
->leave_spinning
= 1;
1199 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1203 nr
= btrfs_header_nritems(path
->nodes
[0]);
1208 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1212 btrfs_release_path(path
);
1217 btrfs_free_path(path
);
1221 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1223 struct btrfs_trans_handle
*trans
;
1224 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1225 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1228 trans
= btrfs_start_transaction(tree_root
, 0);
1230 return PTR_ERR(trans
);
1232 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1233 fs_info
->free_space_root
= NULL
;
1235 ret
= clear_free_space_tree(trans
, free_space_root
);
1239 ret
= btrfs_del_root(trans
, tree_root
, &free_space_root
->root_key
);
1243 list_del(&free_space_root
->dirty_list
);
1245 btrfs_tree_lock(free_space_root
->node
);
1246 clean_tree_block(trans
, tree_root
->fs_info
, free_space_root
->node
);
1247 btrfs_tree_unlock(free_space_root
->node
);
1248 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1251 free_extent_buffer(free_space_root
->node
);
1252 free_extent_buffer(free_space_root
->commit_root
);
1253 kfree(free_space_root
);
1255 ret
= btrfs_commit_transaction(trans
, tree_root
);
1262 btrfs_abort_transaction(trans
, tree_root
, ret
);
1263 btrfs_end_transaction(trans
, tree_root
);
1267 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1268 struct btrfs_fs_info
*fs_info
,
1269 struct btrfs_block_group_cache
*block_group
,
1270 struct btrfs_path
*path
)
1275 start
= block_group
->key
.objectid
;
1276 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1278 block_group
->needs_free_space
= 0;
1280 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path
);
1284 return __add_to_free_space_tree(trans
, fs_info
, block_group
, path
,
1285 block_group
->key
.objectid
,
1286 block_group
->key
.offset
);
1289 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1290 struct btrfs_fs_info
*fs_info
,
1291 struct btrfs_block_group_cache
*block_group
)
1293 struct btrfs_path
*path
= NULL
;
1296 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1299 mutex_lock(&block_group
->free_space_lock
);
1300 if (!block_group
->needs_free_space
)
1303 path
= btrfs_alloc_path();
1309 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
, path
);
1312 btrfs_free_path(path
);
1313 mutex_unlock(&block_group
->free_space_lock
);
1315 btrfs_abort_transaction(trans
, fs_info
->free_space_root
, ret
);
1319 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1320 struct btrfs_fs_info
*fs_info
,
1321 struct btrfs_block_group_cache
*block_group
)
1323 struct btrfs_root
*root
= fs_info
->free_space_root
;
1324 struct btrfs_path
*path
;
1325 struct btrfs_key key
, found_key
;
1326 struct extent_buffer
*leaf
;
1331 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1334 if (block_group
->needs_free_space
) {
1335 /* We never added this block group to the free space tree. */
1339 path
= btrfs_alloc_path();
1345 start
= block_group
->key
.objectid
;
1346 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1348 key
.objectid
= end
- 1;
1350 key
.offset
= (u64
)-1;
1353 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1357 leaf
= path
->nodes
[0];
1360 while (path
->slots
[0] > 0) {
1361 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1363 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1364 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
1365 ASSERT(found_key
.offset
== block_group
->key
.offset
);
1370 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1371 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1372 ASSERT(found_key
.objectid
>= start
);
1373 ASSERT(found_key
.objectid
< end
);
1374 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1382 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1385 btrfs_release_path(path
);
1390 btrfs_free_path(path
);
1392 btrfs_abort_transaction(trans
, root
, ret
);
1396 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1397 struct btrfs_path
*path
,
1398 u32 expected_extent_count
)
1400 struct btrfs_block_group_cache
*block_group
;
1401 struct btrfs_fs_info
*fs_info
;
1402 struct btrfs_root
*root
;
1403 struct btrfs_key key
;
1404 int prev_bit
= 0, bit
;
1405 /* Initialize to silence GCC. */
1406 u64 extent_start
= 0;
1408 u64 total_found
= 0;
1409 u32 extent_count
= 0;
1412 block_group
= caching_ctl
->block_group
;
1413 fs_info
= block_group
->fs_info
;
1414 root
= fs_info
->free_space_root
;
1416 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1419 ret
= btrfs_next_item(root
, path
);
1425 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1427 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1430 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1431 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1433 caching_ctl
->progress
= key
.objectid
;
1435 offset
= key
.objectid
;
1436 while (offset
< key
.objectid
+ key
.offset
) {
1437 bit
= free_space_test_bit(block_group
, path
, offset
);
1438 if (prev_bit
== 0 && bit
== 1) {
1439 extent_start
= offset
;
1440 } else if (prev_bit
== 1 && bit
== 0) {
1441 total_found
+= add_new_free_space(block_group
,
1445 if (total_found
> CACHING_CTL_WAKE_UP
) {
1447 wake_up(&caching_ctl
->wait
);
1452 offset
+= block_group
->sectorsize
;
1455 if (prev_bit
== 1) {
1456 total_found
+= add_new_free_space(block_group
, fs_info
,
1461 if (extent_count
!= expected_extent_count
) {
1462 btrfs_err(fs_info
, "incorrect extent count for %llu; counted %u, expected %u",
1463 block_group
->key
.objectid
, extent_count
,
1464 expected_extent_count
);
1470 caching_ctl
->progress
= (u64
)-1;
1477 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1478 struct btrfs_path
*path
,
1479 u32 expected_extent_count
)
1481 struct btrfs_block_group_cache
*block_group
;
1482 struct btrfs_fs_info
*fs_info
;
1483 struct btrfs_root
*root
;
1484 struct btrfs_key key
;
1486 u64 total_found
= 0;
1487 u32 extent_count
= 0;
1490 block_group
= caching_ctl
->block_group
;
1491 fs_info
= block_group
->fs_info
;
1492 root
= fs_info
->free_space_root
;
1494 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1497 ret
= btrfs_next_item(root
, path
);
1503 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1505 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1508 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1509 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1511 caching_ctl
->progress
= key
.objectid
;
1513 total_found
+= add_new_free_space(block_group
, fs_info
,
1515 key
.objectid
+ key
.offset
);
1516 if (total_found
> CACHING_CTL_WAKE_UP
) {
1518 wake_up(&caching_ctl
->wait
);
1523 if (extent_count
!= expected_extent_count
) {
1524 btrfs_err(fs_info
, "incorrect extent count for %llu; counted %u, expected %u",
1525 block_group
->key
.objectid
, extent_count
,
1526 expected_extent_count
);
1532 caching_ctl
->progress
= (u64
)-1;
1539 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1541 struct btrfs_block_group_cache
*block_group
;
1542 struct btrfs_fs_info
*fs_info
;
1543 struct btrfs_free_space_info
*info
;
1544 struct btrfs_path
*path
;
1545 u32 extent_count
, flags
;
1548 block_group
= caching_ctl
->block_group
;
1549 fs_info
= block_group
->fs_info
;
1551 path
= btrfs_alloc_path();
1556 * Just like caching_thread() doesn't want to deadlock on the extent
1557 * tree, we don't want to deadlock on the free space tree.
1559 path
->skip_locking
= 1;
1560 path
->search_commit_root
= 1;
1563 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
1565 ret
= PTR_ERR(info
);
1568 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1569 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1572 * We left path pointing to the free space info item, so now
1573 * load_free_space_foo can just iterate through the free space tree from
1576 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1577 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1579 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1582 btrfs_free_path(path
);