2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
22 #include "print-tree.h"
23 #include "transaction.h"
25 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
26 btrfs_root
*extent_root
);
27 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
28 btrfs_root
*extent_root
);
30 static int cache_block_group(struct btrfs_root
*root
,
31 struct btrfs_block_group_cache
*block_group
)
33 struct btrfs_path
*path
;
36 struct btrfs_leaf
*leaf
;
37 struct radix_tree_root
*extent_radix
;
45 root
= root
->fs_info
->extent_root
;
46 extent_radix
= &root
->fs_info
->extent_map_radix
;
48 if (block_group
->cached
)
50 if (block_group
->data
)
52 path
= btrfs_alloc_path();
57 first_free
= block_group
->key
.objectid
;
58 key
.objectid
= block_group
->key
.objectid
;
62 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
63 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
68 if (ret
&& path
->slots
[0] > 0)
72 leaf
= btrfs_buffer_leaf(path
->nodes
[0]);
73 slot
= path
->slots
[0];
74 if (slot
>= btrfs_header_nritems(&leaf
->header
)) {
75 ret
= btrfs_next_leaf(root
, path
);
85 btrfs_disk_key_to_cpu(&key
, &leaf
->items
[slot
].key
);
86 if (key
.objectid
< block_group
->key
.objectid
) {
87 if (key
.objectid
+ key
.offset
> first_free
)
88 first_free
= key
.objectid
+ key
.offset
;
92 if (key
.objectid
>= block_group
->key
.objectid
+
93 block_group
->key
.offset
) {
97 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
102 hole_size
= key
.objectid
- last
;
103 for (i
= 0; i
< hole_size
; i
++) {
104 set_radix_bit(extent_radix
, last
+ i
);
106 last
= key
.objectid
+ key
.offset
;
114 if (block_group
->key
.objectid
+
115 block_group
->key
.offset
> last
) {
116 hole_size
= block_group
->key
.objectid
+
117 block_group
->key
.offset
- last
;
118 for (i
= 0; i
< hole_size
; i
++) {
119 set_radix_bit(extent_radix
,
123 block_group
->cached
= 1;
125 btrfs_free_path(path
);
129 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
133 struct btrfs_block_group_cache
*block_group
;
136 ret
= radix_tree_gang_lookup(&info
->block_group_radix
,
137 (void **)&block_group
,
140 if (block_group
->key
.objectid
<= blocknr
&& blocknr
<=
141 block_group
->key
.objectid
+ block_group
->key
.offset
)
144 ret
= radix_tree_gang_lookup(&info
->block_group_data_radix
,
145 (void **)&block_group
,
148 if (block_group
->key
.objectid
<= blocknr
&& blocknr
<=
149 block_group
->key
.objectid
+ block_group
->key
.offset
)
155 static u64
leaf_range(struct btrfs_root
*root
)
157 u64 size
= BTRFS_LEAF_DATA_SIZE(root
);
158 do_div(size
, sizeof(struct btrfs_extent_item
) +
159 sizeof(struct btrfs_item
));
163 static u64
find_search_start(struct btrfs_root
*root
,
164 struct btrfs_block_group_cache
**cache_ret
,
165 u64 search_start
, int num
)
167 unsigned long gang
[8];
169 struct btrfs_block_group_cache
*cache
= *cache_ret
;
170 u64 last
= max(search_start
, cache
->key
.objectid
);
175 ret
= cache_block_group(root
, cache
);
179 ret
= find_first_radix_bit(&root
->fs_info
->extent_map_radix
,
180 gang
, last
, ARRAY_SIZE(gang
));
183 last
= gang
[ret
-1] + 1;
185 if (ret
!= ARRAY_SIZE(gang
)) {
188 if (gang
[ret
-1] - gang
[0] > leaf_range(root
)) {
192 if (gang
[0] >= cache
->key
.objectid
+ cache
->key
.offset
) {
198 return max(cache
->last_alloc
, search_start
);
201 cache
= btrfs_lookup_block_group(root
->fs_info
,
202 last
+ cache
->key
.offset
- 1);
204 return max((*cache_ret
)->last_alloc
, search_start
);
206 cache
= btrfs_find_block_group(root
, cache
,
207 last
+ cache
->key
.offset
- 1, 0, 0);
212 static u64
div_factor(u64 num
, int factor
)
219 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
220 struct btrfs_block_group_cache
221 *hint
, u64 search_start
,
224 struct btrfs_block_group_cache
*cache
[8];
225 struct btrfs_block_group_cache
*found_group
= NULL
;
226 struct btrfs_fs_info
*info
= root
->fs_info
;
227 struct radix_tree_root
*radix
;
228 struct radix_tree_root
*swap_radix
;
242 radix
= &info
->block_group_data_radix
;
243 swap_radix
= &info
->block_group_radix
;
245 radix
= &info
->block_group_radix
;
246 swap_radix
= &info
->block_group_data_radix
;
250 struct btrfs_block_group_cache
*shint
;
251 shint
= btrfs_lookup_block_group(info
, search_start
);
252 if (shint
&& shint
->data
== data
) {
253 used
= btrfs_block_group_used(&shint
->item
);
254 if (used
+ shint
->pinned
<
255 div_factor(shint
->key
.offset
, factor
)) {
260 if (hint
&& hint
->data
== data
) {
261 used
= btrfs_block_group_used(&hint
->item
);
262 if (used
+ hint
->pinned
<
263 div_factor(hint
->key
.offset
, factor
)) {
266 if (used
>= div_factor(hint
->key
.offset
, 8)) {
267 radix_tree_tag_clear(radix
,
269 hint
->key
.offset
- 1,
270 BTRFS_BLOCK_GROUP_AVAIL
);
272 last
= hint
->key
.offset
* 3;
273 if (hint
->key
.objectid
>= last
)
274 last
= max(search_start
+ hint
->key
.offset
- 1,
275 hint
->key
.objectid
- last
);
277 last
= hint
->key
.objectid
+ hint
->key
.offset
;
281 hint_last
= max(hint
->key
.objectid
, search_start
);
283 hint_last
= search_start
;
288 ret
= radix_tree_gang_lookup_tag(radix
, (void **)cache
,
289 last
, ARRAY_SIZE(cache
),
290 BTRFS_BLOCK_GROUP_AVAIL
);
293 for (i
= 0; i
< ret
; i
++) {
294 last
= cache
[i
]->key
.objectid
+
295 cache
[i
]->key
.offset
;
296 used
= btrfs_block_group_used(&cache
[i
]->item
);
297 if (used
+ cache
[i
]->pinned
<
298 div_factor(cache
[i
]->key
.offset
, factor
)) {
299 found_group
= cache
[i
];
302 if (used
>= div_factor(cache
[i
]->key
.offset
, 8)) {
303 radix_tree_tag_clear(radix
,
304 cache
[i
]->key
.objectid
+
305 cache
[i
]->key
.offset
- 1,
306 BTRFS_BLOCK_GROUP_AVAIL
);
314 ret
= radix_tree_gang_lookup(radix
, (void **)cache
,
315 last
, ARRAY_SIZE(cache
));
318 for (i
= 0; i
< ret
; i
++) {
319 last
= cache
[i
]->key
.objectid
+
320 cache
[i
]->key
.offset
;
321 used
= btrfs_block_group_used(&cache
[i
]->item
);
322 if (used
+ cache
[i
]->pinned
< cache
[i
]->key
.offset
) {
323 found_group
= cache
[i
];
326 if (used
>= cache
[i
]->key
.offset
) {
327 radix_tree_tag_clear(radix
,
328 cache
[i
]->key
.objectid
+
329 cache
[i
]->key
.offset
- 1,
330 BTRFS_BLOCK_GROUP_AVAIL
);
341 struct radix_tree_root
*tmp
= radix
;
349 ret
= radix_tree_gang_lookup(radix
,
350 (void **)&found_group
, 0, 1);
352 ret
= radix_tree_gang_lookup(swap_radix
,
353 (void **)&found_group
,
362 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
363 struct btrfs_root
*root
,
364 u64 blocknr
, u64 num_blocks
)
366 struct btrfs_path
*path
;
368 struct btrfs_key key
;
369 struct btrfs_leaf
*l
;
370 struct btrfs_extent_item
*item
;
373 path
= btrfs_alloc_path();
377 key
.objectid
= blocknr
;
379 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
380 key
.offset
= num_blocks
;
381 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
389 l
= btrfs_buffer_leaf(path
->nodes
[0]);
390 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
391 refs
= btrfs_extent_refs(item
);
392 btrfs_set_extent_refs(item
, refs
+ 1);
393 btrfs_mark_buffer_dirty(path
->nodes
[0]);
395 btrfs_release_path(root
->fs_info
->extent_root
, path
);
396 btrfs_free_path(path
);
397 finish_current_insert(trans
, root
->fs_info
->extent_root
);
398 del_pending_extents(trans
, root
->fs_info
->extent_root
);
402 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
403 struct btrfs_root
*root
)
405 finish_current_insert(trans
, root
->fs_info
->extent_root
);
406 del_pending_extents(trans
, root
->fs_info
->extent_root
);
410 static int lookup_extent_ref(struct btrfs_trans_handle
*trans
,
411 struct btrfs_root
*root
, u64 blocknr
,
412 u64 num_blocks
, u32
*refs
)
414 struct btrfs_path
*path
;
416 struct btrfs_key key
;
417 struct btrfs_leaf
*l
;
418 struct btrfs_extent_item
*item
;
420 path
= btrfs_alloc_path();
421 key
.objectid
= blocknr
;
422 key
.offset
= num_blocks
;
424 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
425 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
431 l
= btrfs_buffer_leaf(path
->nodes
[0]);
432 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
433 *refs
= btrfs_extent_refs(item
);
435 btrfs_free_path(path
);
439 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
440 struct btrfs_root
*root
)
442 return btrfs_inc_extent_ref(trans
, root
, bh_blocknr(root
->node
), 1);
445 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
446 struct buffer_head
*buf
)
449 struct btrfs_node
*buf_node
;
450 struct btrfs_leaf
*buf_leaf
;
451 struct btrfs_disk_key
*key
;
452 struct btrfs_file_extent_item
*fi
;
461 buf_node
= btrfs_buffer_node(buf
);
462 leaf
= btrfs_is_leaf(buf_node
);
463 buf_leaf
= btrfs_buffer_leaf(buf
);
464 for (i
= 0; i
< btrfs_header_nritems(&buf_node
->header
); i
++) {
467 key
= &buf_leaf
->items
[i
].key
;
468 if (btrfs_disk_key_type(key
) != BTRFS_EXTENT_DATA_KEY
)
470 fi
= btrfs_item_ptr(buf_leaf
, i
,
471 struct btrfs_file_extent_item
);
472 if (btrfs_file_extent_type(fi
) ==
473 BTRFS_FILE_EXTENT_INLINE
)
475 disk_blocknr
= btrfs_file_extent_disk_blocknr(fi
);
476 if (disk_blocknr
== 0)
478 ret
= btrfs_inc_extent_ref(trans
, root
, disk_blocknr
,
479 btrfs_file_extent_disk_num_blocks(fi
));
485 blocknr
= btrfs_node_blockptr(buf_node
, i
);
486 ret
= btrfs_inc_extent_ref(trans
, root
, blocknr
, 1);
496 for (i
=0; i
< faili
; i
++) {
499 key
= &buf_leaf
->items
[i
].key
;
500 if (btrfs_disk_key_type(key
) != BTRFS_EXTENT_DATA_KEY
)
502 fi
= btrfs_item_ptr(buf_leaf
, i
,
503 struct btrfs_file_extent_item
);
504 if (btrfs_file_extent_type(fi
) ==
505 BTRFS_FILE_EXTENT_INLINE
)
507 disk_blocknr
= btrfs_file_extent_disk_blocknr(fi
);
508 if (disk_blocknr
== 0)
510 err
= btrfs_free_extent(trans
, root
, disk_blocknr
,
511 btrfs_file_extent_disk_num_blocks(fi
), 0);
514 blocknr
= btrfs_node_blockptr(buf_node
, i
);
515 err
= btrfs_free_extent(trans
, root
, blocknr
, 1, 0);
522 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
523 struct btrfs_root
*root
,
524 struct btrfs_path
*path
,
525 struct btrfs_block_group_cache
*cache
)
529 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
530 struct btrfs_block_group_item
*bi
;
532 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
536 bi
= btrfs_item_ptr(btrfs_buffer_leaf(path
->nodes
[0]), path
->slots
[0],
537 struct btrfs_block_group_item
);
538 memcpy(bi
, &cache
->item
, sizeof(*bi
));
539 btrfs_mark_buffer_dirty(path
->nodes
[0]);
540 btrfs_release_path(extent_root
, path
);
542 finish_current_insert(trans
, extent_root
);
543 pending_ret
= del_pending_extents(trans
, extent_root
);
549 cache
->last_alloc
= cache
->first_free
;
554 static int write_dirty_block_radix(struct btrfs_trans_handle
*trans
,
555 struct btrfs_root
*root
,
556 struct radix_tree_root
*radix
)
558 struct btrfs_block_group_cache
*cache
[8];
563 struct btrfs_path
*path
;
564 unsigned long off
= 0;
566 path
= btrfs_alloc_path();
571 ret
= radix_tree_gang_lookup_tag(radix
, (void **)cache
,
572 off
, ARRAY_SIZE(cache
),
573 BTRFS_BLOCK_GROUP_DIRTY
);
576 for (i
= 0; i
< ret
; i
++) {
577 err
= write_one_cache_group(trans
, root
,
580 * if we fail to write the cache group, we want
581 * to keep it marked dirty in hopes that a later
586 off
= cache
[i
]->key
.objectid
+
587 cache
[i
]->key
.offset
;
591 radix_tree_tag_clear(radix
, cache
[i
]->key
.objectid
+
592 cache
[i
]->key
.offset
- 1,
593 BTRFS_BLOCK_GROUP_DIRTY
);
596 btrfs_free_path(path
);
600 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
601 struct btrfs_root
*root
)
605 ret
= write_dirty_block_radix(trans
, root
,
606 &root
->fs_info
->block_group_radix
);
607 ret2
= write_dirty_block_radix(trans
, root
,
608 &root
->fs_info
->block_group_data_radix
);
616 static int update_block_group(struct btrfs_trans_handle
*trans
,
617 struct btrfs_root
*root
,
618 u64 blocknr
, u64 num
, int alloc
, int mark_free
,
621 struct btrfs_block_group_cache
*cache
;
622 struct btrfs_fs_info
*info
= root
->fs_info
;
630 cache
= btrfs_lookup_block_group(info
, blocknr
);
634 block_in_group
= blocknr
- cache
->key
.objectid
;
635 WARN_ON(block_in_group
> cache
->key
.offset
);
636 radix_tree_tag_set(cache
->radix
, cache
->key
.objectid
+
637 cache
->key
.offset
- 1,
638 BTRFS_BLOCK_GROUP_DIRTY
);
640 old_val
= btrfs_block_group_used(&cache
->item
);
641 num
= min(total
, cache
->key
.offset
- block_in_group
);
643 if (blocknr
> cache
->last_alloc
)
644 cache
->last_alloc
= blocknr
;
646 for (i
= 0; i
< num
; i
++) {
647 clear_radix_bit(&info
->extent_map_radix
,
651 if (cache
->data
!= data
&&
652 old_val
< (cache
->key
.offset
>> 1)) {
654 radix_tree_delete(cache
->radix
,
655 cache
->key
.objectid
+
656 cache
->key
.offset
- 1);
660 &info
->block_group_data_radix
;
662 BTRFS_BLOCK_GROUP_DATA
;
664 cache
->radix
= &info
->block_group_radix
;
666 ~BTRFS_BLOCK_GROUP_DATA
;
668 ret
= radix_tree_insert(cache
->radix
,
669 cache
->key
.objectid
+
670 cache
->key
.offset
- 1,
676 if (blocknr
< cache
->first_free
)
677 cache
->first_free
= blocknr
;
678 if (!cache
->data
&& mark_free
) {
679 for (i
= 0; i
< num
; i
++) {
680 set_radix_bit(&info
->extent_map_radix
,
684 if (old_val
< (cache
->key
.offset
>> 1) &&
685 old_val
+ num
>= (cache
->key
.offset
>> 1)) {
686 radix_tree_tag_set(cache
->radix
,
687 cache
->key
.objectid
+
688 cache
->key
.offset
- 1,
689 BTRFS_BLOCK_GROUP_AVAIL
);
692 btrfs_set_block_group_used(&cache
->item
, old_val
);
699 int btrfs_copy_pinned(struct btrfs_root
*root
, struct radix_tree_root
*copy
)
701 unsigned long gang
[8];
703 struct radix_tree_root
*pinned_radix
= &root
->fs_info
->pinned_radix
;
708 ret
= find_first_radix_bit(pinned_radix
, gang
, last
,
712 for (i
= 0 ; i
< ret
; i
++) {
713 set_radix_bit(copy
, gang
[i
]);
717 ret
= find_first_radix_bit(&root
->fs_info
->extent_ins_radix
, gang
, 0,
723 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
724 struct btrfs_root
*root
,
725 struct radix_tree_root
*unpin_radix
)
727 unsigned long gang
[8];
728 struct btrfs_block_group_cache
*block_group
;
732 struct radix_tree_root
*pinned_radix
= &root
->fs_info
->pinned_radix
;
733 struct radix_tree_root
*extent_radix
= &root
->fs_info
->extent_map_radix
;
736 ret
= find_first_radix_bit(unpin_radix
, gang
, 0,
742 for (i
= 0; i
< ret
; i
++) {
743 clear_radix_bit(pinned_radix
, gang
[i
]);
744 clear_radix_bit(unpin_radix
, gang
[i
]);
745 block_group
= btrfs_lookup_block_group(root
->fs_info
,
748 WARN_ON(block_group
->pinned
== 0);
749 block_group
->pinned
--;
750 if (gang
[i
] < block_group
->last_alloc
)
751 block_group
->last_alloc
= gang
[i
];
752 if (!block_group
->data
)
753 set_radix_bit(extent_radix
, gang
[i
]);
760 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
761 btrfs_root
*extent_root
)
763 struct btrfs_key ins
;
764 struct btrfs_extent_item extent_item
;
768 unsigned long gang
[8];
769 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
771 btrfs_set_extent_refs(&extent_item
, 1);
774 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
775 btrfs_set_extent_owner(&extent_item
, extent_root
->root_key
.objectid
);
778 ret
= find_first_radix_bit(&info
->extent_ins_radix
, gang
, 0,
783 for (i
= 0; i
< ret
; i
++) {
784 ins
.objectid
= gang
[i
];
785 err
= btrfs_insert_item(trans
, extent_root
, &ins
,
787 sizeof(extent_item
));
788 clear_radix_bit(&info
->extent_ins_radix
, gang
[i
]);
795 static int pin_down_block(struct btrfs_root
*root
, u64 blocknr
, int pending
)
798 struct btrfs_header
*header
;
799 struct buffer_head
*bh
;
802 bh
= btrfs_find_tree_block(root
, blocknr
);
804 if (buffer_uptodate(bh
)) {
806 root
->fs_info
->running_transaction
->transid
;
807 header
= btrfs_buffer_header(bh
);
808 if (btrfs_header_generation(header
) ==
810 btrfs_block_release(root
, bh
);
814 btrfs_block_release(root
, bh
);
816 err
= set_radix_bit(&root
->fs_info
->pinned_radix
, blocknr
);
818 struct btrfs_block_group_cache
*cache
;
819 cache
= btrfs_lookup_block_group(root
->fs_info
,
825 err
= set_radix_bit(&root
->fs_info
->pending_del_radix
, blocknr
);
832 * remove an extent from the root, returns 0 on success
834 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
835 *root
, u64 blocknr
, u64 num_blocks
, int pin
,
838 struct btrfs_path
*path
;
839 struct btrfs_key key
;
840 struct btrfs_fs_info
*info
= root
->fs_info
;
841 struct btrfs_root
*extent_root
= info
->extent_root
;
843 struct btrfs_extent_item
*ei
;
846 key
.objectid
= blocknr
;
848 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
849 key
.offset
= num_blocks
;
851 path
= btrfs_alloc_path();
855 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
859 ei
= btrfs_item_ptr(btrfs_buffer_leaf(path
->nodes
[0]), path
->slots
[0],
860 struct btrfs_extent_item
);
861 BUG_ON(ei
->refs
== 0);
862 refs
= btrfs_extent_refs(ei
) - 1;
863 btrfs_set_extent_refs(ei
, refs
);
864 btrfs_mark_buffer_dirty(path
->nodes
[0]);
866 u64 super_blocks_used
, root_blocks_used
;
869 ret
= pin_down_block(root
, blocknr
, 0);
873 /* block accounting for super block */
874 super_blocks_used
= btrfs_super_blocks_used(&info
->super_copy
);
875 btrfs_set_super_blocks_used(&info
->super_copy
,
876 super_blocks_used
- num_blocks
);
878 /* block accounting for root item */
879 root_blocks_used
= btrfs_root_blocks_used(&root
->root_item
);
880 btrfs_set_root_blocks_used(&root
->root_item
,
881 root_blocks_used
- num_blocks
);
883 ret
= btrfs_del_item(trans
, extent_root
, path
);
887 ret
= update_block_group(trans
, root
, blocknr
, num_blocks
, 0,
891 btrfs_free_path(path
);
892 finish_current_insert(trans
, extent_root
);
897 * find all the blocks marked as pending in the radix tree and remove
898 * them from the extent map
900 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
901 btrfs_root
*extent_root
)
906 unsigned long gang
[4];
908 struct radix_tree_root
*pending_radix
;
909 struct radix_tree_root
*pinned_radix
;
910 struct btrfs_block_group_cache
*cache
;
912 pending_radix
= &extent_root
->fs_info
->pending_del_radix
;
913 pinned_radix
= &extent_root
->fs_info
->pinned_radix
;
916 ret
= find_first_radix_bit(pending_radix
, gang
, 0,
920 for (i
= 0; i
< ret
; i
++) {
921 wret
= set_radix_bit(pinned_radix
, gang
[i
]);
924 btrfs_lookup_block_group(extent_root
->fs_info
,
930 printk(KERN_CRIT
"set_radix_bit, err %d\n",
934 wret
= clear_radix_bit(pending_radix
, gang
[i
]);
936 wret
= __free_extent(trans
, extent_root
,
946 * remove an extent from the root, returns 0 on success
948 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
949 *root
, u64 blocknr
, u64 num_blocks
, int pin
)
951 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
955 if (root
== extent_root
) {
956 pin_down_block(root
, blocknr
, 1);
959 ret
= __free_extent(trans
, root
, blocknr
, num_blocks
, pin
, pin
== 0);
960 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
961 return ret
? ret
: pending_ret
;
965 * walks the btree of allocated extents and find a hole of a given size.
966 * The key ins is changed to record the hole:
967 * ins->objectid == block start
968 * ins->flags = BTRFS_EXTENT_ITEM_KEY
969 * ins->offset == number of blocks
970 * Any available blocks before search_start are skipped.
972 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
973 *orig_root
, u64 num_blocks
, u64 empty_size
,
974 u64 search_start
, u64 search_end
, u64 hint_block
,
975 struct btrfs_key
*ins
, u64 exclude_start
,
976 u64 exclude_nr
, int data
)
978 struct btrfs_path
*path
;
979 struct btrfs_key key
;
985 u64 orig_search_start
= search_start
;
987 struct btrfs_leaf
*l
;
988 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
989 struct btrfs_fs_info
*info
= root
->fs_info
;
990 int total_needed
= num_blocks
;
992 struct btrfs_block_group_cache
*block_group
;
996 WARN_ON(num_blocks
< 1);
998 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
1000 level
= btrfs_header_level(btrfs_buffer_header(root
->node
));
1001 if (search_end
== (u64
)-1)
1002 search_end
= btrfs_super_total_blocks(&info
->super_copy
);
1004 block_group
= btrfs_lookup_block_group(info
, hint_block
);
1005 block_group
= btrfs_find_block_group(root
, block_group
,
1006 hint_block
, data
, 1);
1008 block_group
= btrfs_find_block_group(root
,
1009 trans
->block_group
, 0,
1013 total_needed
+= empty_size
;
1014 path
= btrfs_alloc_path();
1017 if (!block_group
->data
)
1018 search_start
= find_search_start(root
, &block_group
,
1019 search_start
, total_needed
);
1020 else if (!full_scan
)
1021 search_start
= max(block_group
->last_alloc
, search_start
);
1023 btrfs_init_path(path
);
1024 ins
->objectid
= search_start
;
1029 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1033 if (path
->slots
[0] > 0) {
1037 l
= btrfs_buffer_leaf(path
->nodes
[0]);
1038 btrfs_disk_key_to_cpu(&key
, &l
->items
[path
->slots
[0]].key
);
1040 * a rare case, go back one key if we hit a block group item
1041 * instead of an extent item
1043 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
&&
1044 key
.objectid
+ key
.offset
>= search_start
) {
1045 ins
->objectid
= key
.objectid
;
1046 ins
->offset
= key
.offset
- 1;
1047 btrfs_release_path(root
, path
);
1048 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1052 if (path
->slots
[0] > 0) {
1058 l
= btrfs_buffer_leaf(path
->nodes
[0]);
1059 slot
= path
->slots
[0];
1060 if (slot
>= btrfs_header_nritems(&l
->header
)) {
1061 ret
= btrfs_next_leaf(root
, path
);
1067 ins
->objectid
= search_start
;
1068 ins
->offset
= search_end
- search_start
;
1072 ins
->objectid
= last_block
> search_start
?
1073 last_block
: search_start
;
1074 ins
->offset
= search_end
- ins
->objectid
;
1078 btrfs_disk_key_to_cpu(&key
, &l
->items
[slot
].key
);
1079 if (key
.objectid
>= search_start
&& key
.objectid
> last_block
&&
1081 if (last_block
< search_start
)
1082 last_block
= search_start
;
1083 hole_size
= key
.objectid
- last_block
;
1084 if (hole_size
>= num_blocks
) {
1085 ins
->objectid
= last_block
;
1086 ins
->offset
= hole_size
;
1091 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
)
1095 last_block
= key
.objectid
+ key
.offset
;
1096 if (!full_scan
&& last_block
>= block_group
->key
.objectid
+
1097 block_group
->key
.offset
) {
1098 btrfs_release_path(root
, path
);
1099 search_start
= block_group
->key
.objectid
+
1100 block_group
->key
.offset
* 2;
1108 /* we have to make sure we didn't find an extent that has already
1109 * been allocated by the map tree or the original allocation
1111 btrfs_release_path(root
, path
);
1112 BUG_ON(ins
->objectid
< search_start
);
1114 if (ins
->objectid
+ num_blocks
>= search_end
) {
1119 search_start
= orig_search_start
;
1122 total_needed
-= empty_size
;
1128 for (test_block
= ins
->objectid
;
1129 test_block
< ins
->objectid
+ num_blocks
; test_block
++) {
1130 if (test_radix_bit(&info
->pinned_radix
, test_block
) ||
1131 test_radix_bit(&info
->extent_ins_radix
, test_block
)) {
1132 search_start
= test_block
+ 1;
1136 if (exclude_nr
> 0 && (ins
->objectid
+ num_blocks
> exclude_start
&&
1137 ins
->objectid
< exclude_start
+ exclude_nr
)) {
1138 search_start
= exclude_start
+ exclude_nr
;
1142 block_group
= btrfs_lookup_block_group(info
, ins
->objectid
);
1144 trans
->block_group
= block_group
;
1146 ins
->offset
= num_blocks
;
1147 btrfs_free_path(path
);
1151 if (search_start
+ num_blocks
>= search_end
) {
1152 search_start
= orig_search_start
;
1159 total_needed
-= empty_size
;
1164 block_group
= btrfs_lookup_block_group(info
, search_start
);
1167 block_group
= btrfs_find_block_group(root
, block_group
,
1168 search_start
, data
, 0);
1172 btrfs_release_path(root
, path
);
1173 btrfs_free_path(path
);
1177 * finds a free extent and does all the dirty work required for allocation
1178 * returns the key for the extent through ins, and a tree buffer for
1179 * the first block of the extent through buf.
1181 * returns 0 if everything worked, non-zero otherwise.
1183 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
1184 struct btrfs_root
*root
, u64 owner
,
1185 u64 num_blocks
, u64 empty_size
, u64 hint_block
,
1186 u64 search_end
, struct btrfs_key
*ins
, int data
)
1190 u64 super_blocks_used
, root_blocks_used
;
1191 u64 search_start
= 0;
1192 struct btrfs_fs_info
*info
= root
->fs_info
;
1193 struct btrfs_root
*extent_root
= info
->extent_root
;
1194 struct btrfs_extent_item extent_item
;
1196 btrfs_set_extent_refs(&extent_item
, 1);
1197 btrfs_set_extent_owner(&extent_item
, owner
);
1199 WARN_ON(num_blocks
< 1);
1200 ret
= find_free_extent(trans
, root
, num_blocks
, empty_size
,
1201 search_start
, search_end
, hint_block
, ins
,
1202 trans
->alloc_exclude_start
,
1203 trans
->alloc_exclude_nr
, data
);
1208 /* block accounting for super block */
1209 super_blocks_used
= btrfs_super_blocks_used(&info
->super_copy
);
1210 btrfs_set_super_blocks_used(&info
->super_copy
, super_blocks_used
+
1213 /* block accounting for root item */
1214 root_blocks_used
= btrfs_root_blocks_used(&root
->root_item
);
1215 btrfs_set_root_blocks_used(&root
->root_item
, root_blocks_used
+
1218 if (root
== extent_root
) {
1219 BUG_ON(num_blocks
!= 1);
1220 set_radix_bit(&root
->fs_info
->extent_ins_radix
, ins
->objectid
);
1224 WARN_ON(trans
->alloc_exclude_nr
);
1225 trans
->alloc_exclude_start
= ins
->objectid
;
1226 trans
->alloc_exclude_nr
= ins
->offset
;
1227 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
1228 sizeof(extent_item
));
1230 trans
->alloc_exclude_start
= 0;
1231 trans
->alloc_exclude_nr
= 0;
1234 finish_current_insert(trans
, extent_root
);
1235 pending_ret
= del_pending_extents(trans
, extent_root
);
1244 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0,
1251 * helper function to allocate a block for a given tree
1252 * returns the tree buffer or NULL.
1254 struct buffer_head
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1255 struct btrfs_root
*root
, u64 hint
,
1258 struct btrfs_key ins
;
1260 struct buffer_head
*buf
;
1262 ret
= btrfs_alloc_extent(trans
, root
, root
->root_key
.objectid
,
1263 1, empty_size
, hint
, (u64
)-1, &ins
, 0);
1266 return ERR_PTR(ret
);
1268 buf
= btrfs_find_create_tree_block(root
, ins
.objectid
);
1270 btrfs_free_extent(trans
, root
, ins
.objectid
, 1, 0);
1271 return ERR_PTR(-ENOMEM
);
1273 WARN_ON(buffer_dirty(buf
));
1274 set_buffer_uptodate(buf
);
1275 set_buffer_checked(buf
);
1276 set_buffer_defrag(buf
);
1277 set_radix_bit(&trans
->transaction
->dirty_pages
, buf
->b_page
->index
);
1281 static int drop_leaf_ref(struct btrfs_trans_handle
*trans
,
1282 struct btrfs_root
*root
, struct buffer_head
*cur
)
1284 struct btrfs_disk_key
*key
;
1285 struct btrfs_leaf
*leaf
;
1286 struct btrfs_file_extent_item
*fi
;
1291 BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur
)));
1292 leaf
= btrfs_buffer_leaf(cur
);
1293 nritems
= btrfs_header_nritems(&leaf
->header
);
1294 for (i
= 0; i
< nritems
; i
++) {
1296 key
= &leaf
->items
[i
].key
;
1297 if (btrfs_disk_key_type(key
) != BTRFS_EXTENT_DATA_KEY
)
1299 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
1300 if (btrfs_file_extent_type(fi
) == BTRFS_FILE_EXTENT_INLINE
)
1303 * FIXME make sure to insert a trans record that
1304 * repeats the snapshot del on crash
1306 disk_blocknr
= btrfs_file_extent_disk_blocknr(fi
);
1307 if (disk_blocknr
== 0)
1309 ret
= btrfs_free_extent(trans
, root
, disk_blocknr
,
1310 btrfs_file_extent_disk_num_blocks(fi
),
1317 static void reada_walk_down(struct btrfs_root
*root
,
1318 struct btrfs_node
*node
)
1326 nritems
= btrfs_header_nritems(&node
->header
);
1327 for (i
= 0; i
< nritems
; i
++) {
1328 blocknr
= btrfs_node_blockptr(node
, i
);
1329 ret
= lookup_extent_ref(NULL
, root
, blocknr
, 1, &refs
);
1333 mutex_unlock(&root
->fs_info
->fs_mutex
);
1334 ret
= readahead_tree_block(root
, blocknr
);
1336 mutex_lock(&root
->fs_info
->fs_mutex
);
1343 * helper function for drop_snapshot, this walks down the tree dropping ref
1344 * counts as it goes.
1346 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1347 *root
, struct btrfs_path
*path
, int *level
)
1349 struct buffer_head
*next
;
1350 struct buffer_head
*cur
;
1355 WARN_ON(*level
< 0);
1356 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1357 ret
= lookup_extent_ref(trans
, root
, bh_blocknr(path
->nodes
[*level
]),
1364 * walk down to the last node level and free all the leaves
1366 while(*level
>= 0) {
1367 WARN_ON(*level
< 0);
1368 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1369 cur
= path
->nodes
[*level
];
1371 if (*level
> 0 && path
->slots
[*level
] == 0)
1372 reada_walk_down(root
, btrfs_buffer_node(cur
));
1374 if (btrfs_header_level(btrfs_buffer_header(cur
)) != *level
)
1377 if (path
->slots
[*level
] >=
1378 btrfs_header_nritems(btrfs_buffer_header(cur
)))
1381 ret
= drop_leaf_ref(trans
, root
, cur
);
1385 blocknr
= btrfs_node_blockptr(btrfs_buffer_node(cur
),
1386 path
->slots
[*level
]);
1387 ret
= lookup_extent_ref(trans
, root
, blocknr
, 1, &refs
);
1390 path
->slots
[*level
]++;
1391 ret
= btrfs_free_extent(trans
, root
, blocknr
, 1, 1);
1395 next
= btrfs_find_tree_block(root
, blocknr
);
1396 if (!next
|| !buffer_uptodate(next
)) {
1398 mutex_unlock(&root
->fs_info
->fs_mutex
);
1399 next
= read_tree_block(root
, blocknr
);
1400 mutex_lock(&root
->fs_info
->fs_mutex
);
1402 /* we dropped the lock, check one more time */
1403 ret
= lookup_extent_ref(trans
, root
, blocknr
, 1, &refs
);
1406 path
->slots
[*level
]++;
1408 ret
= btrfs_free_extent(trans
, root
,
1414 WARN_ON(*level
<= 0);
1415 if (path
->nodes
[*level
-1])
1416 btrfs_block_release(root
, path
->nodes
[*level
-1]);
1417 path
->nodes
[*level
-1] = next
;
1418 *level
= btrfs_header_level(btrfs_buffer_header(next
));
1419 path
->slots
[*level
] = 0;
1422 WARN_ON(*level
< 0);
1423 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1424 ret
= btrfs_free_extent(trans
, root
,
1425 bh_blocknr(path
->nodes
[*level
]), 1, 1);
1426 btrfs_block_release(root
, path
->nodes
[*level
]);
1427 path
->nodes
[*level
] = NULL
;
1434 * helper for dropping snapshots. This walks back up the tree in the path
1435 * to find the first node higher up where we haven't yet gone through
1438 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1439 *root
, struct btrfs_path
*path
, int *level
)
1444 struct btrfs_root_item
*root_item
= &root
->root_item
;
1446 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1447 slot
= path
->slots
[i
];
1448 if (slot
< btrfs_header_nritems(
1449 btrfs_buffer_header(path
->nodes
[i
])) - 1) {
1450 struct btrfs_node
*node
;
1451 node
= btrfs_buffer_node(path
->nodes
[i
]);
1454 WARN_ON(*level
== 0);
1455 memcpy(&root_item
->drop_progress
,
1456 &node
->ptrs
[path
->slots
[i
]].key
,
1457 sizeof(root_item
->drop_progress
));
1458 root_item
->drop_level
= i
;
1461 ret
= btrfs_free_extent(trans
, root
,
1462 bh_blocknr(path
->nodes
[*level
]),
1465 btrfs_block_release(root
, path
->nodes
[*level
]);
1466 path
->nodes
[*level
] = NULL
;
1474 * drop the reference count on the tree rooted at 'snap'. This traverses
1475 * the tree freeing any blocks that have a ref count of zero after being
1478 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1484 struct btrfs_path
*path
;
1487 struct btrfs_root_item
*root_item
= &root
->root_item
;
1489 path
= btrfs_alloc_path();
1492 level
= btrfs_header_level(btrfs_buffer_header(root
->node
));
1494 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
1495 path
->nodes
[level
] = root
->node
;
1496 path
->slots
[level
] = 0;
1498 struct btrfs_key key
;
1499 struct btrfs_disk_key
*found_key
;
1500 struct btrfs_node
*node
;
1502 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
1503 level
= root_item
->drop_level
;
1504 path
->lowest_level
= level
;
1505 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
1510 node
= btrfs_buffer_node(path
->nodes
[level
]);
1511 found_key
= &node
->ptrs
[path
->slots
[level
]].key
;
1512 WARN_ON(memcmp(found_key
, &root_item
->drop_progress
,
1513 sizeof(*found_key
)));
1516 wret
= walk_down_tree(trans
, root
, path
, &level
);
1522 wret
= walk_up_tree(trans
, root
, path
, &level
);
1531 for (i
= 0; i
<= orig_level
; i
++) {
1532 if (path
->nodes
[i
]) {
1533 btrfs_block_release(root
, path
->nodes
[i
]);
1538 btrfs_free_path(path
);
1542 static int free_block_group_radix(struct radix_tree_root
*radix
)
1545 struct btrfs_block_group_cache
*cache
[8];
1549 ret
= radix_tree_gang_lookup(radix
, (void **)cache
, 0,
1553 for (i
= 0; i
< ret
; i
++) {
1554 radix_tree_delete(radix
, cache
[i
]->key
.objectid
+
1555 cache
[i
]->key
.offset
- 1);
1562 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
1566 unsigned long gang
[16];
1569 ret
= free_block_group_radix(&info
->block_group_radix
);
1570 ret2
= free_block_group_radix(&info
->block_group_data_radix
);
1577 ret
= find_first_radix_bit(&info
->extent_map_radix
,
1578 gang
, 0, ARRAY_SIZE(gang
));
1581 for (i
= 0; i
< ret
; i
++) {
1582 clear_radix_bit(&info
->extent_map_radix
, gang
[i
]);
1588 int btrfs_read_block_groups(struct btrfs_root
*root
)
1590 struct btrfs_path
*path
;
1593 struct btrfs_block_group_item
*bi
;
1594 struct btrfs_block_group_cache
*cache
;
1595 struct btrfs_fs_info
*info
= root
->fs_info
;
1596 struct radix_tree_root
*radix
;
1597 struct btrfs_key key
;
1598 struct btrfs_key found_key
;
1599 struct btrfs_leaf
*leaf
;
1600 u64 group_size_blocks
;
1603 group_size_blocks
= BTRFS_BLOCK_GROUP_SIZE
>>
1604 root
->fs_info
->sb
->s_blocksize_bits
;
1605 root
= info
->extent_root
;
1607 key
.offset
= group_size_blocks
;
1609 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
1611 path
= btrfs_alloc_path();
1616 ret
= btrfs_search_slot(NULL
, info
->extent_root
,
1622 leaf
= btrfs_buffer_leaf(path
->nodes
[0]);
1623 btrfs_disk_key_to_cpu(&found_key
,
1624 &leaf
->items
[path
->slots
[0]].key
);
1625 cache
= kmalloc(sizeof(*cache
), GFP_NOFS
);
1631 bi
= btrfs_item_ptr(leaf
, path
->slots
[0],
1632 struct btrfs_block_group_item
);
1633 if (bi
->flags
& BTRFS_BLOCK_GROUP_DATA
) {
1634 radix
= &info
->block_group_data_radix
;
1637 radix
= &info
->block_group_radix
;
1641 memcpy(&cache
->item
, bi
, sizeof(*bi
));
1642 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
1643 cache
->last_alloc
= cache
->key
.objectid
;
1644 cache
->first_free
= cache
->key
.objectid
;
1648 cache
->radix
= radix
;
1650 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
1651 btrfs_release_path(root
, path
);
1652 ret
= radix_tree_insert(radix
, found_key
.objectid
+
1653 found_key
.offset
- 1,
1656 used
= btrfs_block_group_used(bi
);
1657 if (used
< div_factor(key
.offset
, 8)) {
1658 radix_tree_tag_set(radix
, found_key
.objectid
+
1659 found_key
.offset
- 1,
1660 BTRFS_BLOCK_GROUP_AVAIL
);
1663 btrfs_super_total_blocks(&info
->super_copy
))
1667 btrfs_free_path(path
);