2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/module.h>
22 #include "print-tree.h"
23 #include "transaction.h"
25 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
26 *orig_root
, u64 num_blocks
, u64 search_start
,
27 u64 search_end
, u64 hint_block
,
28 struct btrfs_key
*ins
, int data
);
29 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
30 btrfs_root
*extent_root
);
31 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
32 btrfs_root
*extent_root
);
34 static void reada_extent_leaves(struct btrfs_root
*root
,
35 struct btrfs_path
*path
, u64 limit
)
37 struct btrfs_node
*node
;
47 node
= btrfs_buffer_node(path
->nodes
[1]);
48 slot
= path
->slots
[1] + 1;
49 nritems
= btrfs_header_nritems(&node
->header
);
50 for (i
= slot
; i
< nritems
&& i
< slot
+ 8; i
++) {
51 item_objectid
= btrfs_disk_key_objectid(&node
->ptrs
[i
].key
);
52 if (item_objectid
> limit
)
54 blocknr
= btrfs_node_blockptr(node
, i
);
55 ret
= readahead_tree_block(root
, blocknr
);
61 static int cache_block_group(struct btrfs_root
*root
,
62 struct btrfs_block_group_cache
*block_group
)
64 struct btrfs_path
*path
;
67 struct btrfs_leaf
*leaf
;
68 struct radix_tree_root
*extent_radix
;
76 root
= root
->fs_info
->extent_root
;
77 extent_radix
= &root
->fs_info
->extent_map_radix
;
79 if (block_group
->cached
)
81 if (block_group
->data
)
83 path
= btrfs_alloc_path();
86 key
.objectid
= block_group
->key
.objectid
;
89 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
90 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
93 if (ret
&& path
->slots
[0] > 0)
95 limit
= block_group
->key
.objectid
+ block_group
->key
.offset
;
96 reada_extent_leaves(root
, path
, limit
);
98 leaf
= btrfs_buffer_leaf(path
->nodes
[0]);
99 slot
= path
->slots
[0];
100 if (slot
>= btrfs_header_nritems(&leaf
->header
)) {
101 reada_extent_leaves(root
, path
, limit
);
102 ret
= btrfs_next_leaf(root
, path
);
109 hole_size
= block_group
->key
.objectid
+
110 block_group
->key
.offset
- last
;
112 last
= block_group
->key
.objectid
;
113 hole_size
= block_group
->key
.offset
;
115 for (i
= 0; i
< hole_size
; i
++) {
116 set_radix_bit(extent_radix
,
122 btrfs_disk_key_to_cpu(&key
, &leaf
->items
[slot
].key
);
123 if (key
.objectid
>= block_group
->key
.objectid
+
124 block_group
->key
.offset
) {
126 hole_size
= block_group
->key
.objectid
+
127 block_group
->key
.offset
- last
;
129 last
= block_group
->key
.objectid
;
130 hole_size
= block_group
->key
.offset
;
132 for (i
= 0; i
< hole_size
; i
++) {
133 set_radix_bit(extent_radix
, last
+ i
);
137 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
139 last
= key
.objectid
+ key
.offset
;
142 hole_size
= key
.objectid
- last
;
143 for (i
= 0; i
< hole_size
; i
++) {
144 set_radix_bit(extent_radix
, last
+ i
);
146 last
= key
.objectid
+ key
.offset
;
152 block_group
->cached
= 1;
154 btrfs_free_path(path
);
158 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
162 struct btrfs_block_group_cache
*block_group
;
165 ret
= radix_tree_gang_lookup(&info
->block_group_radix
,
166 (void **)&block_group
,
169 if (block_group
->key
.objectid
<= blocknr
&& blocknr
<=
170 block_group
->key
.objectid
+ block_group
->key
.offset
)
173 ret
= radix_tree_gang_lookup(&info
->block_group_data_radix
,
174 (void **)&block_group
,
177 if (block_group
->key
.objectid
<= blocknr
&& blocknr
<=
178 block_group
->key
.objectid
+ block_group
->key
.offset
)
184 static u64
leaf_range(struct btrfs_root
*root
)
186 u64 size
= BTRFS_LEAF_DATA_SIZE(root
);
187 do_div(size
, sizeof(struct btrfs_extent_item
) +
188 sizeof(struct btrfs_item
));
192 static u64
find_search_start(struct btrfs_root
*root
,
193 struct btrfs_block_group_cache
**cache_ret
,
194 u64 search_start
, int num
)
196 unsigned long gang
[8];
198 struct btrfs_block_group_cache
*cache
= *cache_ret
;
199 u64 last
= max(search_start
, cache
->key
.objectid
);
204 last
= max(last
, cache
->last_prealloc
);
207 ret
= cache_block_group(root
, cache
);
211 ret
= find_first_radix_bit(&root
->fs_info
->extent_map_radix
,
212 gang
, last
, ARRAY_SIZE(gang
));
215 last
= gang
[ret
-1] + 1;
217 if (ret
!= ARRAY_SIZE(gang
)) {
220 if (gang
[ret
-1] - gang
[0] > leaf_range(root
)) {
224 if (gang
[0] >= cache
->key
.objectid
+ cache
->key
.offset
) {
230 return max(cache
->last_alloc
, search_start
);
233 cache
= btrfs_lookup_block_group(root
->fs_info
,
234 last
+ cache
->key
.offset
- 1);
236 return max((*cache_ret
)->last_alloc
, search_start
);
238 cache
= btrfs_find_block_group(root
, cache
,
239 last
+ cache
->key
.offset
- 1, 0, 0);
244 static u64
div_factor(u64 num
, int factor
)
251 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
252 struct btrfs_block_group_cache
253 *hint
, u64 search_start
,
256 struct btrfs_block_group_cache
*cache
[8];
257 struct btrfs_block_group_cache
*found_group
= NULL
;
258 struct btrfs_fs_info
*info
= root
->fs_info
;
259 struct radix_tree_root
*radix
;
260 struct radix_tree_root
*swap_radix
;
274 radix
= &info
->block_group_data_radix
;
275 swap_radix
= &info
->block_group_radix
;
277 radix
= &info
->block_group_radix
;
278 swap_radix
= &info
->block_group_data_radix
;
282 struct btrfs_block_group_cache
*shint
;
283 shint
= btrfs_lookup_block_group(info
, search_start
);
284 if (shint
->data
== data
) {
285 used
= btrfs_block_group_used(&shint
->item
);
286 if (used
+ shint
->pinned
<
287 div_factor(shint
->key
.offset
, factor
)) {
292 if (hint
&& hint
->data
== data
) {
293 used
= btrfs_block_group_used(&hint
->item
);
294 if (used
+ hint
->pinned
<
295 div_factor(hint
->key
.offset
, factor
)) {
298 if (used
>= div_factor(hint
->key
.offset
, 8)) {
299 radix_tree_tag_clear(radix
,
301 hint
->key
.offset
- 1,
302 BTRFS_BLOCK_GROUP_AVAIL
);
304 last
= hint
->key
.offset
* 3;
305 if (hint
->key
.objectid
>= last
)
306 last
= max(search_start
+ hint
->key
.offset
- 1,
307 hint
->key
.objectid
- last
);
309 last
= hint
->key
.objectid
+ hint
->key
.offset
;
313 hint_last
= max(hint
->key
.objectid
, search_start
);
315 hint_last
= search_start
;
320 ret
= radix_tree_gang_lookup_tag(radix
, (void **)cache
,
321 last
, ARRAY_SIZE(cache
),
322 BTRFS_BLOCK_GROUP_AVAIL
);
325 for (i
= 0; i
< ret
; i
++) {
326 last
= cache
[i
]->key
.objectid
+
327 cache
[i
]->key
.offset
;
328 used
= btrfs_block_group_used(&cache
[i
]->item
);
329 if (used
+ cache
[i
]->pinned
<
330 div_factor(cache
[i
]->key
.offset
, factor
)) {
331 found_group
= cache
[i
];
334 if (used
>= div_factor(cache
[i
]->key
.offset
, 8)) {
335 radix_tree_tag_clear(radix
,
336 cache
[i
]->key
.objectid
+
337 cache
[i
]->key
.offset
- 1,
338 BTRFS_BLOCK_GROUP_AVAIL
);
346 ret
= radix_tree_gang_lookup(radix
, (void **)cache
,
347 last
, ARRAY_SIZE(cache
));
350 for (i
= 0; i
< ret
; i
++) {
351 last
= cache
[i
]->key
.objectid
+
352 cache
[i
]->key
.offset
;
353 used
= btrfs_block_group_used(&cache
[i
]->item
);
354 if (used
+ cache
[i
]->pinned
< cache
[i
]->key
.offset
) {
355 found_group
= cache
[i
];
358 if (used
>= cache
[i
]->key
.offset
) {
359 radix_tree_tag_clear(radix
,
360 cache
[i
]->key
.objectid
+
361 cache
[i
]->key
.offset
- 1,
362 BTRFS_BLOCK_GROUP_AVAIL
);
373 struct radix_tree_root
*tmp
= radix
;
381 ret
= radix_tree_gang_lookup(radix
,
382 (void **)&found_group
, 0, 1);
384 ret
= radix_tree_gang_lookup(swap_radix
,
385 (void **)&found_group
,
394 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
395 struct btrfs_root
*root
,
396 u64 blocknr
, u64 num_blocks
)
398 struct btrfs_path
*path
;
400 struct btrfs_key key
;
401 struct btrfs_leaf
*l
;
402 struct btrfs_extent_item
*item
;
403 struct btrfs_key ins
;
406 path
= btrfs_alloc_path();
409 ret
= find_free_extent(trans
, root
->fs_info
->extent_root
, 0, 0,
410 (u64
)-1, 0, &ins
, 0);
412 btrfs_free_path(path
);
415 key
.objectid
= blocknr
;
417 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
418 key
.offset
= num_blocks
;
419 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
427 l
= btrfs_buffer_leaf(path
->nodes
[0]);
428 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
429 refs
= btrfs_extent_refs(item
);
430 btrfs_set_extent_refs(item
, refs
+ 1);
431 btrfs_mark_buffer_dirty(path
->nodes
[0]);
433 btrfs_release_path(root
->fs_info
->extent_root
, path
);
434 btrfs_free_path(path
);
435 finish_current_insert(trans
, root
->fs_info
->extent_root
);
436 del_pending_extents(trans
, root
->fs_info
->extent_root
);
440 static int lookup_extent_ref(struct btrfs_trans_handle
*trans
,
441 struct btrfs_root
*root
, u64 blocknr
,
442 u64 num_blocks
, u32
*refs
)
444 struct btrfs_path
*path
;
446 struct btrfs_key key
;
447 struct btrfs_leaf
*l
;
448 struct btrfs_extent_item
*item
;
450 path
= btrfs_alloc_path();
451 key
.objectid
= blocknr
;
452 key
.offset
= num_blocks
;
454 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
455 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
461 l
= btrfs_buffer_leaf(path
->nodes
[0]);
462 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
463 *refs
= btrfs_extent_refs(item
);
465 btrfs_free_path(path
);
469 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
470 struct btrfs_root
*root
)
472 return btrfs_inc_extent_ref(trans
, root
, bh_blocknr(root
->node
), 1);
475 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
476 struct buffer_head
*buf
)
479 struct btrfs_node
*buf_node
;
480 struct btrfs_leaf
*buf_leaf
;
481 struct btrfs_disk_key
*key
;
482 struct btrfs_file_extent_item
*fi
;
491 buf_node
= btrfs_buffer_node(buf
);
492 leaf
= btrfs_is_leaf(buf_node
);
493 buf_leaf
= btrfs_buffer_leaf(buf
);
494 for (i
= 0; i
< btrfs_header_nritems(&buf_node
->header
); i
++) {
497 key
= &buf_leaf
->items
[i
].key
;
498 if (btrfs_disk_key_type(key
) != BTRFS_EXTENT_DATA_KEY
)
500 fi
= btrfs_item_ptr(buf_leaf
, i
,
501 struct btrfs_file_extent_item
);
502 if (btrfs_file_extent_type(fi
) ==
503 BTRFS_FILE_EXTENT_INLINE
)
505 disk_blocknr
= btrfs_file_extent_disk_blocknr(fi
);
506 if (disk_blocknr
== 0)
508 ret
= btrfs_inc_extent_ref(trans
, root
, disk_blocknr
,
509 btrfs_file_extent_disk_num_blocks(fi
));
515 blocknr
= btrfs_node_blockptr(buf_node
, i
);
516 ret
= btrfs_inc_extent_ref(trans
, root
, blocknr
, 1);
525 for (i
=0; i
< faili
; i
++) {
528 key
= &buf_leaf
->items
[i
].key
;
529 if (btrfs_disk_key_type(key
) != BTRFS_EXTENT_DATA_KEY
)
531 fi
= btrfs_item_ptr(buf_leaf
, i
,
532 struct btrfs_file_extent_item
);
533 if (btrfs_file_extent_type(fi
) ==
534 BTRFS_FILE_EXTENT_INLINE
)
536 disk_blocknr
= btrfs_file_extent_disk_blocknr(fi
);
537 if (disk_blocknr
== 0)
539 err
= btrfs_free_extent(trans
, root
, disk_blocknr
,
540 btrfs_file_extent_disk_num_blocks(fi
), 0);
543 blocknr
= btrfs_node_blockptr(buf_node
, i
);
544 err
= btrfs_free_extent(trans
, root
, blocknr
, 1, 0);
551 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
552 struct btrfs_root
*root
,
553 struct btrfs_path
*path
,
554 struct btrfs_block_group_cache
*cache
)
558 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
559 struct btrfs_block_group_item
*bi
;
560 struct btrfs_key ins
;
562 ret
= find_free_extent(trans
, extent_root
, 0, 0, (u64
)-1, 0, &ins
, 0);
563 /* FIXME, set bit to recalc cache groups on next mount */
566 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
570 bi
= btrfs_item_ptr(btrfs_buffer_leaf(path
->nodes
[0]), path
->slots
[0],
571 struct btrfs_block_group_item
);
572 memcpy(bi
, &cache
->item
, sizeof(*bi
));
573 mark_buffer_dirty(path
->nodes
[0]);
574 btrfs_release_path(extent_root
, path
);
576 finish_current_insert(trans
, extent_root
);
577 pending_ret
= del_pending_extents(trans
, extent_root
);
583 cache
->last_alloc
= cache
->first_free
;
588 static int write_dirty_block_radix(struct btrfs_trans_handle
*trans
,
589 struct btrfs_root
*root
,
590 struct radix_tree_root
*radix
)
592 struct btrfs_block_group_cache
*cache
[8];
597 struct btrfs_path
*path
;
598 unsigned long off
= 0;
600 path
= btrfs_alloc_path();
605 ret
= radix_tree_gang_lookup_tag(radix
, (void **)cache
,
606 off
, ARRAY_SIZE(cache
),
607 BTRFS_BLOCK_GROUP_DIRTY
);
610 for (i
= 0; i
< ret
; i
++) {
611 err
= write_one_cache_group(trans
, root
,
614 * if we fail to write the cache group, we want
615 * to keep it marked dirty in hopes that a later
620 off
= cache
[i
]->key
.objectid
+
621 cache
[i
]->key
.offset
;
625 radix_tree_tag_clear(radix
, cache
[i
]->key
.objectid
+
626 cache
[i
]->key
.offset
- 1,
627 BTRFS_BLOCK_GROUP_DIRTY
);
630 btrfs_free_path(path
);
634 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
635 struct btrfs_root
*root
)
639 ret
= write_dirty_block_radix(trans
, root
,
640 &root
->fs_info
->block_group_radix
);
641 ret2
= write_dirty_block_radix(trans
, root
,
642 &root
->fs_info
->block_group_data_radix
);
650 static int update_block_group(struct btrfs_trans_handle
*trans
,
651 struct btrfs_root
*root
,
652 u64 blocknr
, u64 num
, int alloc
, int mark_free
,
655 struct btrfs_block_group_cache
*cache
;
656 struct btrfs_fs_info
*info
= root
->fs_info
;
664 cache
= btrfs_lookup_block_group(info
, blocknr
);
668 block_in_group
= blocknr
- cache
->key
.objectid
;
669 WARN_ON(block_in_group
> cache
->key
.offset
);
670 radix_tree_tag_set(cache
->radix
, cache
->key
.objectid
+
671 cache
->key
.offset
- 1,
672 BTRFS_BLOCK_GROUP_DIRTY
);
674 old_val
= btrfs_block_group_used(&cache
->item
);
675 num
= min(total
, cache
->key
.offset
- block_in_group
);
677 if (blocknr
> cache
->last_alloc
)
678 cache
->last_alloc
= blocknr
;
680 for (i
= 0; i
< num
; i
++) {
681 clear_radix_bit(&info
->extent_map_radix
,
685 if (cache
->data
!= data
&&
686 old_val
< (cache
->key
.offset
>> 1)) {
688 radix_tree_delete(cache
->radix
,
689 cache
->key
.objectid
+
690 cache
->key
.offset
- 1);
694 &info
->block_group_data_radix
;
696 BTRFS_BLOCK_GROUP_DATA
;
698 cache
->radix
= &info
->block_group_radix
;
700 ~BTRFS_BLOCK_GROUP_DATA
;
702 ret
= radix_tree_insert(cache
->radix
,
703 cache
->key
.objectid
+
704 cache
->key
.offset
- 1,
710 if (blocknr
< cache
->first_free
)
711 cache
->first_free
= blocknr
;
712 if (!cache
->data
&& mark_free
) {
713 for (i
= 0; i
< num
; i
++) {
714 set_radix_bit(&info
->extent_map_radix
,
718 if (old_val
< (cache
->key
.offset
>> 1) &&
719 old_val
+ num
>= (cache
->key
.offset
>> 1)) {
720 radix_tree_tag_set(cache
->radix
,
721 cache
->key
.objectid
+
722 cache
->key
.offset
- 1,
723 BTRFS_BLOCK_GROUP_AVAIL
);
726 btrfs_set_block_group_used(&cache
->item
, old_val
);
733 static int try_remove_page(struct address_space
*mapping
, unsigned long index
)
736 ret
= invalidate_mapping_pages(mapping
, index
, index
);
740 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
, struct
743 unsigned long gang
[8];
744 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
745 struct btrfs_block_group_cache
*block_group
;
749 struct radix_tree_root
*pinned_radix
= &root
->fs_info
->pinned_radix
;
750 struct radix_tree_root
*extent_radix
= &root
->fs_info
->extent_map_radix
;
753 ret
= find_first_radix_bit(pinned_radix
, gang
, 0,
759 for (i
= 0; i
< ret
; i
++) {
760 clear_radix_bit(pinned_radix
, gang
[i
]);
761 block_group
= btrfs_lookup_block_group(root
->fs_info
,
764 WARN_ON(block_group
->pinned
== 0);
765 block_group
->pinned
--;
766 if (gang
[i
] < block_group
->last_alloc
)
767 block_group
->last_alloc
= gang
[i
];
768 if (gang
[i
] < block_group
->last_prealloc
)
769 block_group
->last_prealloc
= gang
[i
];
770 if (!block_group
->data
)
771 set_radix_bit(extent_radix
, gang
[i
]);
773 try_remove_page(btree_inode
->i_mapping
,
774 gang
[i
] << (PAGE_CACHE_SHIFT
-
775 btree_inode
->i_blkbits
));
781 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
782 btrfs_root
*extent_root
)
784 struct btrfs_key ins
;
785 struct btrfs_extent_item extent_item
;
788 u64 super_blocks_used
;
789 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
791 btrfs_set_extent_refs(&extent_item
, 1);
794 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
795 btrfs_set_extent_owner(&extent_item
, extent_root
->root_key
.objectid
);
797 for (i
= 0; i
< extent_root
->fs_info
->extent_tree_insert_nr
; i
++) {
798 ins
.objectid
= extent_root
->fs_info
->extent_tree_insert
[i
];
799 super_blocks_used
= btrfs_super_blocks_used(&info
->super_copy
);
800 btrfs_set_super_blocks_used(&info
->super_copy
,
801 super_blocks_used
+ 1);
802 ret
= btrfs_insert_item(trans
, extent_root
, &ins
, &extent_item
,
803 sizeof(extent_item
));
806 extent_root
->fs_info
->extent_tree_insert_nr
= 0;
810 static int pin_down_block(struct btrfs_root
*root
, u64 blocknr
, int pending
)
813 struct btrfs_header
*header
;
814 struct buffer_head
*bh
;
817 bh
= btrfs_find_tree_block(root
, blocknr
);
819 if (buffer_uptodate(bh
)) {
821 root
->fs_info
->running_transaction
->transid
;
822 header
= btrfs_buffer_header(bh
);
823 if (btrfs_header_generation(header
) ==
825 btrfs_block_release(root
, bh
);
829 btrfs_block_release(root
, bh
);
831 err
= set_radix_bit(&root
->fs_info
->pinned_radix
, blocknr
);
833 struct btrfs_block_group_cache
*cache
;
834 cache
= btrfs_lookup_block_group(root
->fs_info
,
840 err
= set_radix_bit(&root
->fs_info
->pending_del_radix
, blocknr
);
847 * remove an extent from the root, returns 0 on success
849 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
850 *root
, u64 blocknr
, u64 num_blocks
, int pin
,
853 struct btrfs_path
*path
;
854 struct btrfs_key key
;
855 struct btrfs_fs_info
*info
= root
->fs_info
;
856 struct btrfs_root
*extent_root
= info
->extent_root
;
858 struct btrfs_extent_item
*ei
;
859 struct btrfs_key ins
;
862 key
.objectid
= blocknr
;
864 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
865 key
.offset
= num_blocks
;
867 path
= btrfs_alloc_path();
871 ret
= find_free_extent(trans
, root
, 0, 0, (u64
)-1, 0, &ins
, 0);
873 btrfs_free_path(path
);
877 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
881 ei
= btrfs_item_ptr(btrfs_buffer_leaf(path
->nodes
[0]), path
->slots
[0],
882 struct btrfs_extent_item
);
883 BUG_ON(ei
->refs
== 0);
884 refs
= btrfs_extent_refs(ei
) - 1;
885 btrfs_set_extent_refs(ei
, refs
);
886 btrfs_mark_buffer_dirty(path
->nodes
[0]);
888 u64 super_blocks_used
;
891 ret
= pin_down_block(root
, blocknr
, 0);
895 super_blocks_used
= btrfs_super_blocks_used(&info
->super_copy
);
896 btrfs_set_super_blocks_used(&info
->super_copy
,
897 super_blocks_used
- num_blocks
);
898 ret
= btrfs_del_item(trans
, extent_root
, path
);
902 ret
= update_block_group(trans
, root
, blocknr
, num_blocks
, 0,
906 btrfs_free_path(path
);
907 finish_current_insert(trans
, extent_root
);
912 * find all the blocks marked as pending in the radix tree and remove
913 * them from the extent map
915 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
916 btrfs_root
*extent_root
)
921 unsigned long gang
[4];
923 struct radix_tree_root
*pending_radix
;
924 struct radix_tree_root
*pinned_radix
;
925 struct btrfs_block_group_cache
*cache
;
927 pending_radix
= &extent_root
->fs_info
->pending_del_radix
;
928 pinned_radix
= &extent_root
->fs_info
->pinned_radix
;
931 ret
= find_first_radix_bit(pending_radix
, gang
, 0,
935 for (i
= 0; i
< ret
; i
++) {
936 wret
= set_radix_bit(pinned_radix
, gang
[i
]);
939 btrfs_lookup_block_group(extent_root
->fs_info
,
945 printk(KERN_CRIT
"set_radix_bit, err %d\n",
949 wret
= clear_radix_bit(pending_radix
, gang
[i
]);
951 wret
= __free_extent(trans
, extent_root
,
961 * remove an extent from the root, returns 0 on success
963 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
964 *root
, u64 blocknr
, u64 num_blocks
, int pin
)
966 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
970 if (root
== extent_root
) {
971 pin_down_block(root
, blocknr
, 1);
974 ret
= __free_extent(trans
, root
, blocknr
, num_blocks
, pin
, pin
== 0);
975 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
976 return ret
? ret
: pending_ret
;
980 * walks the btree of allocated extents and find a hole of a given size.
981 * The key ins is changed to record the hole:
982 * ins->objectid == block start
983 * ins->flags = BTRFS_EXTENT_ITEM_KEY
984 * ins->offset == number of blocks
985 * Any available blocks before search_start are skipped.
987 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
988 *orig_root
, u64 num_blocks
, u64 search_start
, u64
989 search_end
, u64 hint_block
,
990 struct btrfs_key
*ins
, int data
)
992 struct btrfs_path
*path
;
993 struct btrfs_key key
;
999 u64 orig_search_start
= search_start
;
1001 struct btrfs_leaf
*l
;
1002 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
1003 struct btrfs_fs_info
*info
= root
->fs_info
;
1004 int total_needed
= num_blocks
;
1005 int total_found
= 0;
1006 int fill_prealloc
= 0;
1008 struct btrfs_block_group_cache
*block_group
;
1014 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
1016 level
= btrfs_header_level(btrfs_buffer_header(root
->node
));
1017 if (num_blocks
== 0) {
1020 total_needed
= (min(level
+ 1, BTRFS_MAX_LEVEL
) + 2) * 3;
1022 if (fill_prealloc
) {
1024 int nr
= info
->extent_tree_prealloc_nr
;
1025 first
= info
->extent_tree_prealloc
[nr
- 1];
1026 if (info
->extent_tree_prealloc_nr
>= total_needed
&&
1027 first
>= search_start
) {
1028 ins
->objectid
= info
->extent_tree_prealloc
[0];
1032 info
->extent_tree_prealloc_nr
= 0;
1034 if (search_end
== (u64
)-1)
1035 search_end
= btrfs_super_total_blocks(&info
->super_copy
);
1037 block_group
= btrfs_lookup_block_group(info
, hint_block
);
1038 block_group
= btrfs_find_block_group(root
, block_group
,
1039 hint_block
, data
, 1);
1041 block_group
= btrfs_find_block_group(root
,
1042 trans
->block_group
, 0,
1046 path
= btrfs_alloc_path();
1049 if (!block_group
->data
)
1050 search_start
= find_search_start(root
, &block_group
,
1051 search_start
, total_needed
);
1052 else if (!full_scan
)
1053 search_start
= max(block_group
->last_alloc
, search_start
);
1055 btrfs_init_path(path
);
1056 ins
->objectid
= search_start
;
1060 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1064 if (path
->slots
[0] > 0) {
1068 l
= btrfs_buffer_leaf(path
->nodes
[0]);
1069 btrfs_disk_key_to_cpu(&key
, &l
->items
[path
->slots
[0]].key
);
1071 * a rare case, go back one key if we hit a block group item
1072 * instead of an extent item
1074 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
&&
1075 key
.objectid
+ key
.offset
>= search_start
) {
1076 ins
->objectid
= key
.objectid
;
1077 ins
->offset
= key
.offset
- 1;
1078 btrfs_release_path(root
, path
);
1079 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1083 if (path
->slots
[0] > 0) {
1089 l
= btrfs_buffer_leaf(path
->nodes
[0]);
1090 slot
= path
->slots
[0];
1091 if (slot
>= btrfs_header_nritems(&l
->header
)) {
1092 if (fill_prealloc
) {
1093 info
->extent_tree_prealloc_nr
= 0;
1097 limit
= last_block
+
1098 (block_group
->key
.offset
>> 1);
1100 limit
= search_start
+
1101 (block_group
->key
.offset
>> 1);
1102 ret
= btrfs_next_leaf(root
, path
);
1108 ins
->objectid
= search_start
;
1109 ins
->offset
= search_end
- search_start
;
1113 ins
->objectid
= last_block
> search_start
?
1114 last_block
: search_start
;
1115 ins
->offset
= search_end
- ins
->objectid
;
1119 btrfs_disk_key_to_cpu(&key
, &l
->items
[slot
].key
);
1120 if (key
.objectid
>= search_start
&& key
.objectid
> last_block
&&
1122 if (last_block
< search_start
)
1123 last_block
= search_start
;
1124 hole_size
= key
.objectid
- last_block
;
1125 if (hole_size
>= num_blocks
) {
1126 ins
->objectid
= last_block
;
1127 ins
->offset
= hole_size
;
1132 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
)
1136 last_block
= key
.objectid
+ key
.offset
;
1137 if (!full_scan
&& last_block
>= block_group
->key
.objectid
+
1138 block_group
->key
.offset
) {
1139 btrfs_release_path(root
, path
);
1140 search_start
= block_group
->key
.objectid
+
1141 block_group
->key
.offset
* 2;
1149 /* we have to make sure we didn't find an extent that has already
1150 * been allocated by the map tree or the original allocation
1152 btrfs_release_path(root
, path
);
1153 BUG_ON(ins
->objectid
< search_start
);
1155 if (ins
->objectid
+ num_blocks
>= search_end
) {
1160 search_start
= orig_search_start
;
1167 for (test_block
= ins
->objectid
;
1168 test_block
< ins
->objectid
+ num_blocks
; test_block
++) {
1169 if (test_radix_bit(&info
->pinned_radix
, test_block
)) {
1170 search_start
= test_block
+ 1;
1174 if (!fill_prealloc
&& info
->extent_tree_insert_nr
) {
1176 info
->extent_tree_insert
[info
->extent_tree_insert_nr
- 1];
1177 if (ins
->objectid
+ num_blocks
>
1178 info
->extent_tree_insert
[0] &&
1179 ins
->objectid
<= last
) {
1180 search_start
= last
+ 1;
1181 WARN_ON(!full_scan
);
1185 if (!fill_prealloc
&& info
->extent_tree_prealloc_nr
) {
1187 info
->extent_tree_prealloc
[info
->extent_tree_prealloc_nr
- 1];
1188 if (ins
->objectid
+ num_blocks
> first
&&
1189 ins
->objectid
<= info
->extent_tree_prealloc
[0]) {
1190 search_start
= info
->extent_tree_prealloc
[0] + 1;
1194 if (fill_prealloc
) {
1196 test_block
= ins
->objectid
;
1197 if (test_block
- info
->extent_tree_prealloc
[total_needed
- 1] >=
1200 info
->extent_tree_prealloc_nr
= total_found
;
1202 while(test_block
< ins
->objectid
+ ins
->offset
&&
1203 total_found
< total_needed
) {
1204 nr
= total_needed
- total_found
- 1;
1206 info
->extent_tree_prealloc
[nr
] = test_block
;
1210 if (total_found
< total_needed
) {
1211 search_start
= test_block
;
1214 info
->extent_tree_prealloc_nr
= total_found
;
1217 block_group
= btrfs_lookup_block_group(info
, ins
->objectid
);
1220 block_group
->last_prealloc
=
1221 info
->extent_tree_prealloc
[total_needed
-1];
1223 trans
->block_group
= block_group
;
1226 ins
->offset
= num_blocks
;
1227 btrfs_free_path(path
);
1231 if (search_start
+ num_blocks
>= search_end
) {
1232 search_start
= orig_search_start
;
1242 block_group
= btrfs_lookup_block_group(info
, search_start
);
1245 block_group
= btrfs_find_block_group(root
, block_group
,
1246 search_start
, data
, 0);
1250 btrfs_release_path(root
, path
);
1251 btrfs_free_path(path
);
1255 * finds a free extent and does all the dirty work required for allocation
1256 * returns the key for the extent through ins, and a tree buffer for
1257 * the first block of the extent through buf.
1259 * returns 0 if everything worked, non-zero otherwise.
1261 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
1262 struct btrfs_root
*root
, u64 owner
,
1263 u64 num_blocks
, u64 hint_block
,
1264 u64 search_end
, struct btrfs_key
*ins
, int data
)
1268 u64 super_blocks_used
;
1269 u64 search_start
= 0;
1270 struct btrfs_fs_info
*info
= root
->fs_info
;
1271 struct btrfs_root
*extent_root
= info
->extent_root
;
1272 struct btrfs_extent_item extent_item
;
1273 struct btrfs_key prealloc_key
;
1275 btrfs_set_extent_refs(&extent_item
, 1);
1276 btrfs_set_extent_owner(&extent_item
, owner
);
1278 if (root
== extent_root
) {
1280 BUG_ON(info
->extent_tree_prealloc_nr
== 0);
1281 BUG_ON(num_blocks
!= 1);
1283 info
->extent_tree_prealloc_nr
--;
1284 nr
= info
->extent_tree_prealloc_nr
;
1285 ins
->objectid
= info
->extent_tree_prealloc
[nr
];
1286 info
->extent_tree_insert
[info
->extent_tree_insert_nr
++] =
1288 ret
= update_block_group(trans
, root
,
1289 ins
->objectid
, ins
->offset
, 1, 0, 0);
1295 * if we're doing a data allocation, preallocate room in the
1296 * extent tree first. This way the extent tree blocks end up
1297 * in the correct block group.
1300 ret
= find_free_extent(trans
, root
, 0, 0,
1301 search_end
, 0, &prealloc_key
, 0);
1305 if (prealloc_key
.objectid
+ prealloc_key
.offset
>= search_end
) {
1306 int nr
= info
->extent_tree_prealloc_nr
;
1307 search_end
= info
->extent_tree_prealloc
[nr
- 1] - 1;
1309 search_start
= info
->extent_tree_prealloc
[0] + 1;
1312 if (hint_block
< search_start
)
1313 hint_block
= search_start
;
1314 /* do the real allocation */
1315 ret
= find_free_extent(trans
, root
, num_blocks
, search_start
,
1316 search_end
, hint_block
, ins
, data
);
1318 if (search_start
== 0)
1320 search_end
= search_start
- 1;
1322 hint_block
= search_start
;
1323 ret
= find_free_extent(trans
, root
, num_blocks
, search_start
,
1324 search_end
, hint_block
, ins
, data
);
1330 * if we're doing a metadata allocation, preallocate space in the
1331 * extent tree second. This way, we don't create a tiny hole
1332 * in the allocation map between any unused preallocation blocks
1333 * and the metadata block we're actually allocating. On disk,
1335 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1336 * The unused prealloc will get reused the next time around.
1339 if (ins
->objectid
+ ins
->offset
>= search_end
)
1340 search_end
= ins
->objectid
- 1;
1342 search_start
= ins
->objectid
+ ins
->offset
;
1344 if (hint_block
< search_start
)
1345 hint_block
= search_start
;
1347 ret
= find_free_extent(trans
, root
, 0, search_start
,
1348 search_end
, hint_block
,
1351 if (search_start
== 0)
1353 search_end
= search_start
- 1;
1355 hint_block
= search_start
;
1356 ret
= find_free_extent(trans
, root
, 0, search_start
,
1357 search_end
, hint_block
,
1364 super_blocks_used
= btrfs_super_blocks_used(&info
->super_copy
);
1365 btrfs_set_super_blocks_used(&info
->super_copy
, super_blocks_used
+
1367 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
1368 sizeof(extent_item
));
1370 finish_current_insert(trans
, extent_root
);
1371 pending_ret
= del_pending_extents(trans
, extent_root
);
1378 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0,
1385 * helper function to allocate a block for a given tree
1386 * returns the tree buffer or NULL.
1388 struct buffer_head
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1389 struct btrfs_root
*root
, u64 hint
)
1391 struct btrfs_key ins
;
1393 struct buffer_head
*buf
;
1395 ret
= btrfs_alloc_extent(trans
, root
, root
->root_key
.objectid
,
1396 1, hint
, (unsigned long)-1, &ins
, 0);
1399 return ERR_PTR(ret
);
1401 buf
= btrfs_find_create_tree_block(root
, ins
.objectid
);
1403 btrfs_free_extent(trans
, root
, ins
.objectid
, 1, 0);
1404 return ERR_PTR(-ENOMEM
);
1406 set_buffer_uptodate(buf
);
1407 set_buffer_checked(buf
);
1408 set_radix_bit(&trans
->transaction
->dirty_pages
, buf
->b_page
->index
);
1412 static int drop_leaf_ref(struct btrfs_trans_handle
*trans
,
1413 struct btrfs_root
*root
, struct buffer_head
*cur
)
1415 struct btrfs_disk_key
*key
;
1416 struct btrfs_leaf
*leaf
;
1417 struct btrfs_file_extent_item
*fi
;
1422 BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur
)));
1423 leaf
= btrfs_buffer_leaf(cur
);
1424 nritems
= btrfs_header_nritems(&leaf
->header
);
1425 for (i
= 0; i
< nritems
; i
++) {
1427 key
= &leaf
->items
[i
].key
;
1428 if (btrfs_disk_key_type(key
) != BTRFS_EXTENT_DATA_KEY
)
1430 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
1431 if (btrfs_file_extent_type(fi
) == BTRFS_FILE_EXTENT_INLINE
)
1434 * FIXME make sure to insert a trans record that
1435 * repeats the snapshot del on crash
1437 disk_blocknr
= btrfs_file_extent_disk_blocknr(fi
);
1438 if (disk_blocknr
== 0)
1440 ret
= btrfs_free_extent(trans
, root
, disk_blocknr
,
1441 btrfs_file_extent_disk_num_blocks(fi
),
1448 static void reada_walk_down(struct btrfs_root
*root
,
1449 struct btrfs_node
*node
)
1457 nritems
= btrfs_header_nritems(&node
->header
);
1458 for (i
= 0; i
< nritems
; i
++) {
1459 blocknr
= btrfs_node_blockptr(node
, i
);
1460 ret
= lookup_extent_ref(NULL
, root
, blocknr
, 1, &refs
);
1464 ret
= readahead_tree_block(root
, blocknr
);
1471 * helper function for drop_snapshot, this walks down the tree dropping ref
1472 * counts as it goes.
1474 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1475 *root
, struct btrfs_path
*path
, int *level
)
1477 struct buffer_head
*next
;
1478 struct buffer_head
*cur
;
1483 WARN_ON(*level
< 0);
1484 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1485 ret
= lookup_extent_ref(trans
, root
, bh_blocknr(path
->nodes
[*level
]),
1492 * walk down to the last node level and free all the leaves
1494 while(*level
>= 0) {
1495 WARN_ON(*level
< 0);
1496 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1497 cur
= path
->nodes
[*level
];
1499 if (*level
> 0 && path
->slots
[*level
] == 0)
1500 reada_walk_down(root
, btrfs_buffer_node(cur
));
1502 if (btrfs_header_level(btrfs_buffer_header(cur
)) != *level
)
1505 if (path
->slots
[*level
] >=
1506 btrfs_header_nritems(btrfs_buffer_header(cur
)))
1509 ret
= drop_leaf_ref(trans
, root
, cur
);
1513 blocknr
= btrfs_node_blockptr(btrfs_buffer_node(cur
),
1514 path
->slots
[*level
]);
1515 ret
= lookup_extent_ref(trans
, root
, blocknr
, 1, &refs
);
1518 path
->slots
[*level
]++;
1519 ret
= btrfs_free_extent(trans
, root
, blocknr
, 1, 1);
1523 next
= read_tree_block(root
, blocknr
);
1524 WARN_ON(*level
<= 0);
1525 if (path
->nodes
[*level
-1])
1526 btrfs_block_release(root
, path
->nodes
[*level
-1]);
1527 path
->nodes
[*level
-1] = next
;
1528 *level
= btrfs_header_level(btrfs_buffer_header(next
));
1529 path
->slots
[*level
] = 0;
1532 WARN_ON(*level
< 0);
1533 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1534 ret
= btrfs_free_extent(trans
, root
,
1535 bh_blocknr(path
->nodes
[*level
]), 1, 1);
1536 btrfs_block_release(root
, path
->nodes
[*level
]);
1537 path
->nodes
[*level
] = NULL
;
1544 * helper for dropping snapshots. This walks back up the tree in the path
1545 * to find the first node higher up where we haven't yet gone through
1548 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1549 *root
, struct btrfs_path
*path
, int *level
)
1554 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1555 slot
= path
->slots
[i
];
1556 if (slot
< btrfs_header_nritems(
1557 btrfs_buffer_header(path
->nodes
[i
])) - 1) {
1562 ret
= btrfs_free_extent(trans
, root
,
1563 bh_blocknr(path
->nodes
[*level
]),
1566 btrfs_block_release(root
, path
->nodes
[*level
]);
1567 path
->nodes
[*level
] = NULL
;
1575 * drop the reference count on the tree rooted at 'snap'. This traverses
1576 * the tree freeing any blocks that have a ref count of zero after being
1579 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1580 *root
, struct buffer_head
*snap
)
1585 struct btrfs_path
*path
;
1589 path
= btrfs_alloc_path();
1592 level
= btrfs_header_level(btrfs_buffer_header(snap
));
1594 path
->nodes
[level
] = snap
;
1595 path
->slots
[level
] = 0;
1597 wret
= walk_down_tree(trans
, root
, path
, &level
);
1603 wret
= walk_up_tree(trans
, root
, path
, &level
);
1609 for (i
= 0; i
<= orig_level
; i
++) {
1610 if (path
->nodes
[i
]) {
1611 btrfs_block_release(root
, path
->nodes
[i
]);
1614 btrfs_free_path(path
);
1618 static int free_block_group_radix(struct radix_tree_root
*radix
)
1621 struct btrfs_block_group_cache
*cache
[8];
1625 ret
= radix_tree_gang_lookup(radix
, (void **)cache
, 0,
1629 for (i
= 0; i
< ret
; i
++) {
1630 radix_tree_delete(radix
, cache
[i
]->key
.objectid
+
1631 cache
[i
]->key
.offset
- 1);
1638 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
1642 unsigned long gang
[16];
1645 ret
= free_block_group_radix(&info
->block_group_radix
);
1646 ret2
= free_block_group_radix(&info
->block_group_data_radix
);
1653 ret
= find_first_radix_bit(&info
->extent_map_radix
,
1654 gang
, 0, ARRAY_SIZE(gang
));
1657 for (i
= 0; i
< ret
; i
++) {
1658 clear_radix_bit(&info
->extent_map_radix
, gang
[i
]);
1664 int btrfs_read_block_groups(struct btrfs_root
*root
)
1666 struct btrfs_path
*path
;
1669 struct btrfs_block_group_item
*bi
;
1670 struct btrfs_block_group_cache
*cache
;
1671 struct btrfs_fs_info
*info
= root
->fs_info
;
1672 struct radix_tree_root
*radix
;
1673 struct btrfs_key key
;
1674 struct btrfs_key found_key
;
1675 struct btrfs_leaf
*leaf
;
1676 u64 group_size_blocks
;
1679 group_size_blocks
= BTRFS_BLOCK_GROUP_SIZE
>>
1680 root
->fs_info
->sb
->s_blocksize_bits
;
1681 root
= info
->extent_root
;
1683 key
.offset
= group_size_blocks
;
1685 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
1687 path
= btrfs_alloc_path();
1692 ret
= btrfs_search_slot(NULL
, info
->extent_root
,
1698 leaf
= btrfs_buffer_leaf(path
->nodes
[0]);
1699 btrfs_disk_key_to_cpu(&found_key
,
1700 &leaf
->items
[path
->slots
[0]].key
);
1701 cache
= kmalloc(sizeof(*cache
), GFP_NOFS
);
1707 bi
= btrfs_item_ptr(leaf
, path
->slots
[0],
1708 struct btrfs_block_group_item
);
1709 if (bi
->flags
& BTRFS_BLOCK_GROUP_DATA
) {
1710 radix
= &info
->block_group_data_radix
;
1713 radix
= &info
->block_group_radix
;
1717 memcpy(&cache
->item
, bi
, sizeof(*bi
));
1718 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
1719 cache
->last_alloc
= cache
->key
.objectid
;
1720 cache
->first_free
= cache
->key
.objectid
;
1721 cache
->last_prealloc
= cache
->key
.objectid
;
1725 cache
->radix
= radix
;
1727 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
1728 btrfs_release_path(root
, path
);
1729 ret
= radix_tree_insert(radix
, found_key
.objectid
+
1730 found_key
.offset
- 1,
1733 used
= btrfs_block_group_used(bi
);
1734 if (used
< div_factor(key
.offset
, 8)) {
1735 radix_tree_tag_set(radix
, found_key
.objectid
+
1736 found_key
.offset
- 1,
1737 BTRFS_BLOCK_GROUP_AVAIL
);
1740 btrfs_super_total_blocks(&info
->super_copy
))
1744 btrfs_free_path(path
);