2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
29 #include "print-tree.h"
30 #include "transaction.h"
33 #include "ref-cache.h"
34 #include "free-space-cache.h"
36 #define PENDING_EXTENT_INSERT 0
37 #define PENDING_EXTENT_DELETE 1
38 #define PENDING_BACKREF_UPDATE 2
40 struct pending_extent_op
{
49 struct list_head list
;
53 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
54 struct btrfs_root
*root
, u64 parent
,
55 u64 root_objectid
, u64 ref_generation
,
56 u64 owner
, struct btrfs_key
*ins
,
58 static int update_reserved_extents(struct btrfs_root
*root
,
59 u64 bytenr
, u64 num
, int reserve
);
60 static int update_block_group(struct btrfs_trans_handle
*trans
,
61 struct btrfs_root
*root
,
62 u64 bytenr
, u64 num_bytes
, int alloc
,
64 static noinline
int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
65 struct btrfs_root
*root
,
66 u64 bytenr
, u64 num_bytes
, u64 parent
,
67 u64 root_objectid
, u64 ref_generation
,
68 u64 owner_objectid
, int pin
,
71 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
72 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
73 u64 flags
, int force
);
75 static int block_group_bits(struct btrfs_block_group_cache
*cache
, u64 bits
)
77 return (cache
->flags
& bits
) == bits
;
81 * this adds the block group to the fs_info rb tree for the block group
84 static int btrfs_add_block_group_cache(struct btrfs_fs_info
*info
,
85 struct btrfs_block_group_cache
*block_group
)
88 struct rb_node
*parent
= NULL
;
89 struct btrfs_block_group_cache
*cache
;
91 spin_lock(&info
->block_group_cache_lock
);
92 p
= &info
->block_group_cache_tree
.rb_node
;
96 cache
= rb_entry(parent
, struct btrfs_block_group_cache
,
98 if (block_group
->key
.objectid
< cache
->key
.objectid
) {
100 } else if (block_group
->key
.objectid
> cache
->key
.objectid
) {
103 spin_unlock(&info
->block_group_cache_lock
);
108 rb_link_node(&block_group
->cache_node
, parent
, p
);
109 rb_insert_color(&block_group
->cache_node
,
110 &info
->block_group_cache_tree
);
111 spin_unlock(&info
->block_group_cache_lock
);
117 * This will return the block group at or after bytenr if contains is 0, else
118 * it will return the block group that contains the bytenr
120 static struct btrfs_block_group_cache
*
121 block_group_cache_tree_search(struct btrfs_fs_info
*info
, u64 bytenr
,
124 struct btrfs_block_group_cache
*cache
, *ret
= NULL
;
128 spin_lock(&info
->block_group_cache_lock
);
129 n
= info
->block_group_cache_tree
.rb_node
;
132 cache
= rb_entry(n
, struct btrfs_block_group_cache
,
134 end
= cache
->key
.objectid
+ cache
->key
.offset
- 1;
135 start
= cache
->key
.objectid
;
137 if (bytenr
< start
) {
138 if (!contains
&& (!ret
|| start
< ret
->key
.objectid
))
141 } else if (bytenr
> start
) {
142 if (contains
&& bytenr
<= end
) {
153 atomic_inc(&ret
->count
);
154 spin_unlock(&info
->block_group_cache_lock
);
160 * this is only called by cache_block_group, since we could have freed extents
161 * we need to check the pinned_extents for any extents that can't be used yet
162 * since their free space will be released as soon as the transaction commits.
164 static int add_new_free_space(struct btrfs_block_group_cache
*block_group
,
165 struct btrfs_fs_info
*info
, u64 start
, u64 end
)
167 u64 extent_start
, extent_end
, size
;
170 while (start
< end
) {
171 ret
= find_first_extent_bit(&info
->pinned_extents
, start
,
172 &extent_start
, &extent_end
,
177 if (extent_start
== start
) {
178 start
= extent_end
+ 1;
179 } else if (extent_start
> start
&& extent_start
< end
) {
180 size
= extent_start
- start
;
181 ret
= btrfs_add_free_space(block_group
, start
,
184 start
= extent_end
+ 1;
192 ret
= btrfs_add_free_space(block_group
, start
, size
);
199 static int remove_sb_from_cache(struct btrfs_root
*root
,
200 struct btrfs_block_group_cache
*cache
)
207 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
208 bytenr
= btrfs_sb_offset(i
);
209 ret
= btrfs_rmap_block(&root
->fs_info
->mapping_tree
,
210 cache
->key
.objectid
, bytenr
, 0,
211 &logical
, &nr
, &stripe_len
);
214 btrfs_remove_free_space(cache
, logical
[nr
],
222 static int cache_block_group(struct btrfs_root
*root
,
223 struct btrfs_block_group_cache
*block_group
)
225 struct btrfs_path
*path
;
227 struct btrfs_key key
;
228 struct extent_buffer
*leaf
;
235 root
= root
->fs_info
->extent_root
;
237 if (block_group
->cached
)
240 path
= btrfs_alloc_path();
246 * we get into deadlocks with paths held by callers of this function.
247 * since the alloc_mutex is protecting things right now, just
248 * skip the locking here
250 path
->skip_locking
= 1;
251 last
= max_t(u64
, block_group
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
254 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
255 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
260 leaf
= path
->nodes
[0];
261 slot
= path
->slots
[0];
262 if (slot
>= btrfs_header_nritems(leaf
)) {
263 ret
= btrfs_next_leaf(root
, path
);
271 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
272 if (key
.objectid
< block_group
->key
.objectid
)
275 if (key
.objectid
>= block_group
->key
.objectid
+
276 block_group
->key
.offset
)
279 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
280 add_new_free_space(block_group
, root
->fs_info
, last
,
283 last
= key
.objectid
+ key
.offset
;
289 add_new_free_space(block_group
, root
->fs_info
, last
,
290 block_group
->key
.objectid
+
291 block_group
->key
.offset
);
293 block_group
->cached
= 1;
294 remove_sb_from_cache(root
, block_group
);
297 btrfs_free_path(path
);
302 * return the block group that starts at or after bytenr
304 static struct btrfs_block_group_cache
*
305 btrfs_lookup_first_block_group(struct btrfs_fs_info
*info
, u64 bytenr
)
307 struct btrfs_block_group_cache
*cache
;
309 cache
= block_group_cache_tree_search(info
, bytenr
, 0);
315 * return the block group that contains teh given bytenr
317 struct btrfs_block_group_cache
*btrfs_lookup_block_group(
318 struct btrfs_fs_info
*info
,
321 struct btrfs_block_group_cache
*cache
;
323 cache
= block_group_cache_tree_search(info
, bytenr
, 1);
328 void btrfs_put_block_group(struct btrfs_block_group_cache
*cache
)
330 if (atomic_dec_and_test(&cache
->count
))
334 static struct btrfs_space_info
*__find_space_info(struct btrfs_fs_info
*info
,
337 struct list_head
*head
= &info
->space_info
;
338 struct btrfs_space_info
*found
;
341 list_for_each_entry_rcu(found
, head
, list
) {
342 if (found
->flags
== flags
) {
352 * after adding space to the filesystem, we need to clear the full flags
353 * on all the space infos.
355 void btrfs_clear_space_info_full(struct btrfs_fs_info
*info
)
357 struct list_head
*head
= &info
->space_info
;
358 struct btrfs_space_info
*found
;
361 list_for_each_entry_rcu(found
, head
, list
)
366 static u64
div_factor(u64 num
, int factor
)
375 u64
btrfs_find_block_group(struct btrfs_root
*root
,
376 u64 search_start
, u64 search_hint
, int owner
)
378 struct btrfs_block_group_cache
*cache
;
380 u64 last
= max(search_hint
, search_start
);
387 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
391 spin_lock(&cache
->lock
);
392 last
= cache
->key
.objectid
+ cache
->key
.offset
;
393 used
= btrfs_block_group_used(&cache
->item
);
395 if ((full_search
|| !cache
->ro
) &&
396 block_group_bits(cache
, BTRFS_BLOCK_GROUP_METADATA
)) {
397 if (used
+ cache
->pinned
+ cache
->reserved
<
398 div_factor(cache
->key
.offset
, factor
)) {
399 group_start
= cache
->key
.objectid
;
400 spin_unlock(&cache
->lock
);
401 btrfs_put_block_group(cache
);
405 spin_unlock(&cache
->lock
);
406 btrfs_put_block_group(cache
);
414 if (!full_search
&& factor
< 10) {
424 /* simple helper to search for an existing extent at a given offset */
425 int btrfs_lookup_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
428 struct btrfs_key key
;
429 struct btrfs_path
*path
;
431 path
= btrfs_alloc_path();
433 key
.objectid
= start
;
435 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
436 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
438 btrfs_free_path(path
);
443 * Back reference rules. Back refs have three main goals:
445 * 1) differentiate between all holders of references to an extent so that
446 * when a reference is dropped we can make sure it was a valid reference
447 * before freeing the extent.
449 * 2) Provide enough information to quickly find the holders of an extent
450 * if we notice a given block is corrupted or bad.
452 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
453 * maintenance. This is actually the same as #2, but with a slightly
454 * different use case.
456 * File extents can be referenced by:
458 * - multiple snapshots, subvolumes, or different generations in one subvol
459 * - different files inside a single subvolume
460 * - different offsets inside a file (bookend extents in file.c)
462 * The extent ref structure has fields for:
464 * - Objectid of the subvolume root
465 * - Generation number of the tree holding the reference
466 * - objectid of the file holding the reference
467 * - number of references holding by parent node (alway 1 for tree blocks)
469 * Btree leaf may hold multiple references to a file extent. In most cases,
470 * these references are from same file and the corresponding offsets inside
471 * the file are close together.
473 * When a file extent is allocated the fields are filled in:
474 * (root_key.objectid, trans->transid, inode objectid, 1)
476 * When a leaf is cow'd new references are added for every file extent found
477 * in the leaf. It looks similar to the create case, but trans->transid will
478 * be different when the block is cow'd.
480 * (root_key.objectid, trans->transid, inode objectid,
481 * number of references in the leaf)
483 * When a file extent is removed either during snapshot deletion or
484 * file truncation, we find the corresponding back reference and check
485 * the following fields:
487 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
490 * Btree extents can be referenced by:
492 * - Different subvolumes
493 * - Different generations of the same subvolume
495 * When a tree block is created, back references are inserted:
497 * (root->root_key.objectid, trans->transid, level, 1)
499 * When a tree block is cow'd, new back references are added for all the
500 * blocks it points to. If the tree block isn't in reference counted root,
501 * the old back references are removed. These new back references are of
502 * the form (trans->transid will have increased since creation):
504 * (root->root_key.objectid, trans->transid, level, 1)
506 * When a backref is in deleting, the following fields are checked:
508 * if backref was for a tree root:
509 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
511 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
513 * Back Reference Key composing:
515 * The key objectid corresponds to the first byte in the extent, the key
516 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
517 * byte of parent extent. If a extent is tree root, the key offset is set
518 * to the key objectid.
521 static noinline
int lookup_extent_backref(struct btrfs_trans_handle
*trans
,
522 struct btrfs_root
*root
,
523 struct btrfs_path
*path
,
524 u64 bytenr
, u64 parent
,
525 u64 ref_root
, u64 ref_generation
,
526 u64 owner_objectid
, int del
)
528 struct btrfs_key key
;
529 struct btrfs_extent_ref
*ref
;
530 struct extent_buffer
*leaf
;
534 key
.objectid
= bytenr
;
535 key
.type
= BTRFS_EXTENT_REF_KEY
;
538 ret
= btrfs_search_slot(trans
, root
, &key
, path
, del
? -1 : 0, 1);
546 leaf
= path
->nodes
[0];
547 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
548 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
549 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
550 btrfs_ref_generation(leaf
, ref
) != ref_generation
||
551 (ref_objectid
!= owner_objectid
&&
552 ref_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
)) {
562 static noinline
int insert_extent_backref(struct btrfs_trans_handle
*trans
,
563 struct btrfs_root
*root
,
564 struct btrfs_path
*path
,
565 u64 bytenr
, u64 parent
,
566 u64 ref_root
, u64 ref_generation
,
570 struct btrfs_key key
;
571 struct extent_buffer
*leaf
;
572 struct btrfs_extent_ref
*ref
;
576 key
.objectid
= bytenr
;
577 key
.type
= BTRFS_EXTENT_REF_KEY
;
580 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*ref
));
582 leaf
= path
->nodes
[0];
583 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
584 struct btrfs_extent_ref
);
585 btrfs_set_ref_root(leaf
, ref
, ref_root
);
586 btrfs_set_ref_generation(leaf
, ref
, ref_generation
);
587 btrfs_set_ref_objectid(leaf
, ref
, owner_objectid
);
588 btrfs_set_ref_num_refs(leaf
, ref
, refs_to_add
);
589 } else if (ret
== -EEXIST
) {
592 BUG_ON(owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
);
593 leaf
= path
->nodes
[0];
594 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
595 struct btrfs_extent_ref
);
596 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
597 btrfs_ref_generation(leaf
, ref
) != ref_generation
) {
603 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
604 BUG_ON(num_refs
== 0);
605 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
+ refs_to_add
);
607 existing_owner
= btrfs_ref_objectid(leaf
, ref
);
608 if (existing_owner
!= owner_objectid
&&
609 existing_owner
!= BTRFS_MULTIPLE_OBJECTIDS
) {
610 btrfs_set_ref_objectid(leaf
, ref
,
611 BTRFS_MULTIPLE_OBJECTIDS
);
617 btrfs_unlock_up_safe(path
, 1);
618 btrfs_mark_buffer_dirty(path
->nodes
[0]);
620 btrfs_release_path(root
, path
);
624 static noinline
int remove_extent_backref(struct btrfs_trans_handle
*trans
,
625 struct btrfs_root
*root
,
626 struct btrfs_path
*path
,
629 struct extent_buffer
*leaf
;
630 struct btrfs_extent_ref
*ref
;
634 leaf
= path
->nodes
[0];
635 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
636 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
637 BUG_ON(num_refs
< refs_to_drop
);
638 num_refs
-= refs_to_drop
;
640 ret
= btrfs_del_item(trans
, root
, path
);
642 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
);
643 btrfs_mark_buffer_dirty(leaf
);
645 btrfs_release_path(root
, path
);
649 #ifdef BIO_RW_DISCARD
650 static void btrfs_issue_discard(struct block_device
*bdev
,
653 blkdev_issue_discard(bdev
, start
>> 9, len
>> 9, GFP_KERNEL
);
657 static int btrfs_discard_extent(struct btrfs_root
*root
, u64 bytenr
,
660 #ifdef BIO_RW_DISCARD
662 u64 map_length
= num_bytes
;
663 struct btrfs_multi_bio
*multi
= NULL
;
665 /* Tell the block device(s) that the sectors can be discarded */
666 ret
= btrfs_map_block(&root
->fs_info
->mapping_tree
, READ
,
667 bytenr
, &map_length
, &multi
, 0);
669 struct btrfs_bio_stripe
*stripe
= multi
->stripes
;
672 if (map_length
> num_bytes
)
673 map_length
= num_bytes
;
675 for (i
= 0; i
< multi
->num_stripes
; i
++, stripe
++) {
676 btrfs_issue_discard(stripe
->dev
->bdev
,
689 static int __btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
690 struct btrfs_root
*root
, u64 bytenr
,
692 u64 orig_parent
, u64 parent
,
693 u64 orig_root
, u64 ref_root
,
694 u64 orig_generation
, u64 ref_generation
,
698 int pin
= owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
;
700 ret
= btrfs_update_delayed_ref(trans
, bytenr
, num_bytes
,
701 orig_parent
, parent
, orig_root
,
702 ref_root
, orig_generation
,
703 ref_generation
, owner_objectid
, pin
);
708 int btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
709 struct btrfs_root
*root
, u64 bytenr
,
710 u64 num_bytes
, u64 orig_parent
, u64 parent
,
711 u64 ref_root
, u64 ref_generation
,
715 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
716 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
719 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
, num_bytes
,
720 orig_parent
, parent
, ref_root
,
721 ref_root
, ref_generation
,
722 ref_generation
, owner_objectid
);
725 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
726 struct btrfs_root
*root
, u64 bytenr
,
728 u64 orig_parent
, u64 parent
,
729 u64 orig_root
, u64 ref_root
,
730 u64 orig_generation
, u64 ref_generation
,
735 ret
= btrfs_add_delayed_ref(trans
, bytenr
, num_bytes
, parent
, ref_root
,
736 ref_generation
, owner_objectid
,
737 BTRFS_ADD_DELAYED_REF
, 0);
742 static noinline_for_stack
int add_extent_ref(struct btrfs_trans_handle
*trans
,
743 struct btrfs_root
*root
, u64 bytenr
,
744 u64 num_bytes
, u64 parent
, u64 ref_root
,
745 u64 ref_generation
, u64 owner_objectid
,
748 struct btrfs_path
*path
;
750 struct btrfs_key key
;
751 struct extent_buffer
*l
;
752 struct btrfs_extent_item
*item
;
755 path
= btrfs_alloc_path();
760 path
->leave_spinning
= 1;
761 key
.objectid
= bytenr
;
762 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
763 key
.offset
= num_bytes
;
765 /* first find the extent item and update its reference count */
766 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
,
769 btrfs_set_path_blocking(path
);
775 btrfs_free_path(path
);
780 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
781 if (key
.objectid
!= bytenr
) {
782 btrfs_print_leaf(root
->fs_info
->extent_root
, path
->nodes
[0]);
783 printk(KERN_ERR
"btrfs wanted %llu found %llu\n",
784 (unsigned long long)bytenr
,
785 (unsigned long long)key
.objectid
);
788 BUG_ON(key
.type
!= BTRFS_EXTENT_ITEM_KEY
);
790 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
792 refs
= btrfs_extent_refs(l
, item
);
793 btrfs_set_extent_refs(l
, item
, refs
+ refs_to_add
);
794 btrfs_unlock_up_safe(path
, 1);
796 btrfs_mark_buffer_dirty(path
->nodes
[0]);
798 btrfs_release_path(root
->fs_info
->extent_root
, path
);
801 path
->leave_spinning
= 1;
803 /* now insert the actual backref */
804 ret
= insert_extent_backref(trans
, root
->fs_info
->extent_root
,
805 path
, bytenr
, parent
,
806 ref_root
, ref_generation
,
807 owner_objectid
, refs_to_add
);
809 btrfs_free_path(path
);
813 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
814 struct btrfs_root
*root
,
815 u64 bytenr
, u64 num_bytes
, u64 parent
,
816 u64 ref_root
, u64 ref_generation
,
820 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
821 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
824 ret
= __btrfs_inc_extent_ref(trans
, root
, bytenr
, num_bytes
, 0, parent
,
825 0, ref_root
, 0, ref_generation
,
830 static int drop_delayed_ref(struct btrfs_trans_handle
*trans
,
831 struct btrfs_root
*root
,
832 struct btrfs_delayed_ref_node
*node
)
835 struct btrfs_delayed_ref
*ref
= btrfs_delayed_node_to_ref(node
);
837 BUG_ON(node
->ref_mod
== 0);
838 ret
= __btrfs_free_extent(trans
, root
, node
->bytenr
, node
->num_bytes
,
839 node
->parent
, ref
->root
, ref
->generation
,
840 ref
->owner_objectid
, ref
->pin
, node
->ref_mod
);
845 /* helper function to actually process a single delayed ref entry */
846 static noinline
int run_one_delayed_ref(struct btrfs_trans_handle
*trans
,
847 struct btrfs_root
*root
,
848 struct btrfs_delayed_ref_node
*node
,
852 struct btrfs_delayed_ref
*ref
;
854 if (node
->parent
== (u64
)-1) {
855 struct btrfs_delayed_ref_head
*head
;
857 * we've hit the end of the chain and we were supposed
858 * to insert this extent into the tree. But, it got
859 * deleted before we ever needed to insert it, so all
860 * we have to do is clean up the accounting
862 if (insert_reserved
) {
863 update_reserved_extents(root
, node
->bytenr
,
866 head
= btrfs_delayed_node_to_head(node
);
867 mutex_unlock(&head
->mutex
);
871 ref
= btrfs_delayed_node_to_ref(node
);
872 if (ref
->action
== BTRFS_ADD_DELAYED_REF
) {
873 if (insert_reserved
) {
874 struct btrfs_key ins
;
876 ins
.objectid
= node
->bytenr
;
877 ins
.offset
= node
->num_bytes
;
878 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
880 /* record the full extent allocation */
881 ret
= __btrfs_alloc_reserved_extent(trans
, root
,
882 node
->parent
, ref
->root
,
883 ref
->generation
, ref
->owner_objectid
,
884 &ins
, node
->ref_mod
);
885 update_reserved_extents(root
, node
->bytenr
,
888 /* just add one backref */
889 ret
= add_extent_ref(trans
, root
, node
->bytenr
,
891 node
->parent
, ref
->root
, ref
->generation
,
892 ref
->owner_objectid
, node
->ref_mod
);
895 } else if (ref
->action
== BTRFS_DROP_DELAYED_REF
) {
896 WARN_ON(insert_reserved
);
897 ret
= drop_delayed_ref(trans
, root
, node
);
902 static noinline
struct btrfs_delayed_ref_node
*
903 select_delayed_ref(struct btrfs_delayed_ref_head
*head
)
905 struct rb_node
*node
;
906 struct btrfs_delayed_ref_node
*ref
;
907 int action
= BTRFS_ADD_DELAYED_REF
;
910 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
911 * this prevents ref count from going down to zero when
912 * there still are pending delayed ref.
914 node
= rb_prev(&head
->node
.rb_node
);
918 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
,
920 if (ref
->bytenr
!= head
->node
.bytenr
)
922 if (btrfs_delayed_node_to_ref(ref
)->action
== action
)
924 node
= rb_prev(node
);
926 if (action
== BTRFS_ADD_DELAYED_REF
) {
927 action
= BTRFS_DROP_DELAYED_REF
;
933 static noinline
int run_clustered_refs(struct btrfs_trans_handle
*trans
,
934 struct btrfs_root
*root
,
935 struct list_head
*cluster
)
937 struct btrfs_delayed_ref_root
*delayed_refs
;
938 struct btrfs_delayed_ref_node
*ref
;
939 struct btrfs_delayed_ref_head
*locked_ref
= NULL
;
942 int must_insert_reserved
= 0;
944 delayed_refs
= &trans
->transaction
->delayed_refs
;
947 /* pick a new head ref from the cluster list */
948 if (list_empty(cluster
))
951 locked_ref
= list_entry(cluster
->next
,
952 struct btrfs_delayed_ref_head
, cluster
);
954 /* grab the lock that says we are going to process
955 * all the refs for this head */
956 ret
= btrfs_delayed_ref_lock(trans
, locked_ref
);
959 * we may have dropped the spin lock to get the head
960 * mutex lock, and that might have given someone else
961 * time to free the head. If that's true, it has been
962 * removed from our list and we can move on.
964 if (ret
== -EAGAIN
) {
972 * record the must insert reserved flag before we
973 * drop the spin lock.
975 must_insert_reserved
= locked_ref
->must_insert_reserved
;
976 locked_ref
->must_insert_reserved
= 0;
979 * locked_ref is the head node, so we have to go one
980 * node back for any delayed ref updates
982 ref
= select_delayed_ref(locked_ref
);
984 /* All delayed refs have been processed, Go ahead
985 * and send the head node to run_one_delayed_ref,
986 * so that any accounting fixes can happen
988 ref
= &locked_ref
->node
;
989 list_del_init(&locked_ref
->cluster
);
994 rb_erase(&ref
->rb_node
, &delayed_refs
->root
);
995 delayed_refs
->num_entries
--;
996 spin_unlock(&delayed_refs
->lock
);
998 ret
= run_one_delayed_ref(trans
, root
, ref
,
999 must_insert_reserved
);
1001 btrfs_put_delayed_ref(ref
);
1005 spin_lock(&delayed_refs
->lock
);
1011 * this starts processing the delayed reference count updates and
1012 * extent insertions we have queued up so far. count can be
1013 * 0, which means to process everything in the tree at the start
1014 * of the run (but not newly added entries), or it can be some target
1015 * number you'd like to process.
1017 int btrfs_run_delayed_refs(struct btrfs_trans_handle
*trans
,
1018 struct btrfs_root
*root
, unsigned long count
)
1020 struct rb_node
*node
;
1021 struct btrfs_delayed_ref_root
*delayed_refs
;
1022 struct btrfs_delayed_ref_node
*ref
;
1023 struct list_head cluster
;
1025 int run_all
= count
== (unsigned long)-1;
1028 if (root
== root
->fs_info
->extent_root
)
1029 root
= root
->fs_info
->tree_root
;
1031 delayed_refs
= &trans
->transaction
->delayed_refs
;
1032 INIT_LIST_HEAD(&cluster
);
1034 spin_lock(&delayed_refs
->lock
);
1036 count
= delayed_refs
->num_entries
* 2;
1040 if (!(run_all
|| run_most
) &&
1041 delayed_refs
->num_heads_ready
< 64)
1045 * go find something we can process in the rbtree. We start at
1046 * the beginning of the tree, and then build a cluster
1047 * of refs to process starting at the first one we are able to
1050 ret
= btrfs_find_ref_cluster(trans
, &cluster
,
1051 delayed_refs
->run_delayed_start
);
1055 ret
= run_clustered_refs(trans
, root
, &cluster
);
1058 count
-= min_t(unsigned long, ret
, count
);
1065 node
= rb_first(&delayed_refs
->root
);
1068 count
= (unsigned long)-1;
1071 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
,
1073 if (btrfs_delayed_ref_is_head(ref
)) {
1074 struct btrfs_delayed_ref_head
*head
;
1076 head
= btrfs_delayed_node_to_head(ref
);
1077 atomic_inc(&ref
->refs
);
1079 spin_unlock(&delayed_refs
->lock
);
1080 mutex_lock(&head
->mutex
);
1081 mutex_unlock(&head
->mutex
);
1083 btrfs_put_delayed_ref(ref
);
1087 node
= rb_next(node
);
1089 spin_unlock(&delayed_refs
->lock
);
1090 schedule_timeout(1);
1094 spin_unlock(&delayed_refs
->lock
);
1098 int btrfs_cross_ref_exist(struct btrfs_trans_handle
*trans
,
1099 struct btrfs_root
*root
, u64 objectid
, u64 bytenr
)
1101 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1102 struct btrfs_path
*path
;
1103 struct extent_buffer
*leaf
;
1104 struct btrfs_extent_ref
*ref_item
;
1105 struct btrfs_key key
;
1106 struct btrfs_key found_key
;
1112 key
.objectid
= bytenr
;
1113 key
.offset
= (u64
)-1;
1114 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1116 path
= btrfs_alloc_path();
1117 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
1123 if (path
->slots
[0] == 0)
1127 leaf
= path
->nodes
[0];
1128 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1130 if (found_key
.objectid
!= bytenr
||
1131 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
)
1134 last_snapshot
= btrfs_root_last_snapshot(&root
->root_item
);
1136 leaf
= path
->nodes
[0];
1137 nritems
= btrfs_header_nritems(leaf
);
1138 if (path
->slots
[0] >= nritems
) {
1139 ret
= btrfs_next_leaf(extent_root
, path
);
1146 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1147 if (found_key
.objectid
!= bytenr
)
1150 if (found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
1155 ref_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
1156 struct btrfs_extent_ref
);
1157 ref_root
= btrfs_ref_root(leaf
, ref_item
);
1158 if ((ref_root
!= root
->root_key
.objectid
&&
1159 ref_root
!= BTRFS_TREE_LOG_OBJECTID
) ||
1160 objectid
!= btrfs_ref_objectid(leaf
, ref_item
)) {
1164 if (btrfs_ref_generation(leaf
, ref_item
) <= last_snapshot
) {
1173 btrfs_free_path(path
);
1177 int btrfs_cache_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1178 struct extent_buffer
*buf
, u32 nr_extents
)
1180 struct btrfs_key key
;
1181 struct btrfs_file_extent_item
*fi
;
1189 if (!root
->ref_cows
)
1192 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
1194 root_gen
= root
->root_key
.offset
;
1197 root_gen
= trans
->transid
- 1;
1200 level
= btrfs_header_level(buf
);
1201 nritems
= btrfs_header_nritems(buf
);
1204 struct btrfs_leaf_ref
*ref
;
1205 struct btrfs_extent_info
*info
;
1207 ref
= btrfs_alloc_leaf_ref(root
, nr_extents
);
1213 ref
->root_gen
= root_gen
;
1214 ref
->bytenr
= buf
->start
;
1215 ref
->owner
= btrfs_header_owner(buf
);
1216 ref
->generation
= btrfs_header_generation(buf
);
1217 ref
->nritems
= nr_extents
;
1218 info
= ref
->extents
;
1220 for (i
= 0; nr_extents
> 0 && i
< nritems
; i
++) {
1222 btrfs_item_key_to_cpu(buf
, &key
, i
);
1223 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1225 fi
= btrfs_item_ptr(buf
, i
,
1226 struct btrfs_file_extent_item
);
1227 if (btrfs_file_extent_type(buf
, fi
) ==
1228 BTRFS_FILE_EXTENT_INLINE
)
1230 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1231 if (disk_bytenr
== 0)
1234 info
->bytenr
= disk_bytenr
;
1236 btrfs_file_extent_disk_num_bytes(buf
, fi
);
1237 info
->objectid
= key
.objectid
;
1238 info
->offset
= key
.offset
;
1242 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1243 if (ret
== -EEXIST
&& shared
) {
1244 struct btrfs_leaf_ref
*old
;
1245 old
= btrfs_lookup_leaf_ref(root
, ref
->bytenr
);
1247 btrfs_remove_leaf_ref(root
, old
);
1248 btrfs_free_leaf_ref(root
, old
);
1249 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1252 btrfs_free_leaf_ref(root
, ref
);
1258 /* when a block goes through cow, we update the reference counts of
1259 * everything that block points to. The internal pointers of the block
1260 * can be in just about any order, and it is likely to have clusters of
1261 * things that are close together and clusters of things that are not.
1263 * To help reduce the seeks that come with updating all of these reference
1264 * counts, sort them by byte number before actual updates are done.
1266 * struct refsort is used to match byte number to slot in the btree block.
1267 * we sort based on the byte number and then use the slot to actually
1270 * struct refsort is smaller than strcut btrfs_item and smaller than
1271 * struct btrfs_key_ptr. Since we're currently limited to the page size
1272 * for a btree block, there's no way for a kmalloc of refsorts for a
1273 * single node to be bigger than a page.
1281 * for passing into sort()
1283 static int refsort_cmp(const void *a_void
, const void *b_void
)
1285 const struct refsort
*a
= a_void
;
1286 const struct refsort
*b
= b_void
;
1288 if (a
->bytenr
< b
->bytenr
)
1290 if (a
->bytenr
> b
->bytenr
)
1296 noinline
int btrfs_inc_ref(struct btrfs_trans_handle
*trans
,
1297 struct btrfs_root
*root
,
1298 struct extent_buffer
*orig_buf
,
1299 struct extent_buffer
*buf
, u32
*nr_extents
)
1305 u64 orig_generation
;
1306 struct refsort
*sorted
;
1308 u32 nr_file_extents
= 0;
1309 struct btrfs_key key
;
1310 struct btrfs_file_extent_item
*fi
;
1317 int (*process_func
)(struct btrfs_trans_handle
*, struct btrfs_root
*,
1318 u64
, u64
, u64
, u64
, u64
, u64
, u64
, u64
, u64
);
1320 ref_root
= btrfs_header_owner(buf
);
1321 ref_generation
= btrfs_header_generation(buf
);
1322 orig_root
= btrfs_header_owner(orig_buf
);
1323 orig_generation
= btrfs_header_generation(orig_buf
);
1325 nritems
= btrfs_header_nritems(buf
);
1326 level
= btrfs_header_level(buf
);
1328 sorted
= kmalloc(sizeof(struct refsort
) * nritems
, GFP_NOFS
);
1331 if (root
->ref_cows
) {
1332 process_func
= __btrfs_inc_extent_ref
;
1335 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1338 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1340 process_func
= __btrfs_update_extent_ref
;
1344 * we make two passes through the items. In the first pass we
1345 * only record the byte number and slot. Then we sort based on
1346 * byte number and do the actual work based on the sorted results
1348 for (i
= 0; i
< nritems
; i
++) {
1351 btrfs_item_key_to_cpu(buf
, &key
, i
);
1352 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1354 fi
= btrfs_item_ptr(buf
, i
,
1355 struct btrfs_file_extent_item
);
1356 if (btrfs_file_extent_type(buf
, fi
) ==
1357 BTRFS_FILE_EXTENT_INLINE
)
1359 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1364 sorted
[refi
].bytenr
= bytenr
;
1365 sorted
[refi
].slot
= i
;
1368 bytenr
= btrfs_node_blockptr(buf
, i
);
1369 sorted
[refi
].bytenr
= bytenr
;
1370 sorted
[refi
].slot
= i
;
1375 * if refi == 0, we didn't actually put anything into the sorted
1376 * array and we're done
1381 sort(sorted
, refi
, sizeof(struct refsort
), refsort_cmp
, NULL
);
1383 for (i
= 0; i
< refi
; i
++) {
1385 slot
= sorted
[i
].slot
;
1386 bytenr
= sorted
[i
].bytenr
;
1389 btrfs_item_key_to_cpu(buf
, &key
, slot
);
1390 fi
= btrfs_item_ptr(buf
, slot
,
1391 struct btrfs_file_extent_item
);
1393 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1397 ret
= process_func(trans
, root
, bytenr
,
1398 btrfs_file_extent_disk_num_bytes(buf
, fi
),
1399 orig_buf
->start
, buf
->start
,
1400 orig_root
, ref_root
,
1401 orig_generation
, ref_generation
,
1410 ret
= process_func(trans
, root
, bytenr
, buf
->len
,
1411 orig_buf
->start
, buf
->start
,
1412 orig_root
, ref_root
,
1413 orig_generation
, ref_generation
,
1426 *nr_extents
= nr_file_extents
;
1428 *nr_extents
= nritems
;
1437 int btrfs_update_ref(struct btrfs_trans_handle
*trans
,
1438 struct btrfs_root
*root
, struct extent_buffer
*orig_buf
,
1439 struct extent_buffer
*buf
, int start_slot
, int nr
)
1446 u64 orig_generation
;
1447 struct btrfs_key key
;
1448 struct btrfs_file_extent_item
*fi
;
1454 BUG_ON(start_slot
< 0);
1455 BUG_ON(start_slot
+ nr
> btrfs_header_nritems(buf
));
1457 ref_root
= btrfs_header_owner(buf
);
1458 ref_generation
= btrfs_header_generation(buf
);
1459 orig_root
= btrfs_header_owner(orig_buf
);
1460 orig_generation
= btrfs_header_generation(orig_buf
);
1461 level
= btrfs_header_level(buf
);
1463 if (!root
->ref_cows
) {
1465 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1468 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1472 for (i
= 0, slot
= start_slot
; i
< nr
; i
++, slot
++) {
1475 btrfs_item_key_to_cpu(buf
, &key
, slot
);
1476 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1478 fi
= btrfs_item_ptr(buf
, slot
,
1479 struct btrfs_file_extent_item
);
1480 if (btrfs_file_extent_type(buf
, fi
) ==
1481 BTRFS_FILE_EXTENT_INLINE
)
1483 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1486 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1487 btrfs_file_extent_disk_num_bytes(buf
, fi
),
1488 orig_buf
->start
, buf
->start
,
1489 orig_root
, ref_root
, orig_generation
,
1490 ref_generation
, key
.objectid
);
1494 bytenr
= btrfs_node_blockptr(buf
, slot
);
1495 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1496 buf
->len
, orig_buf
->start
,
1497 buf
->start
, orig_root
, ref_root
,
1498 orig_generation
, ref_generation
,
1510 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
1511 struct btrfs_root
*root
,
1512 struct btrfs_path
*path
,
1513 struct btrfs_block_group_cache
*cache
)
1516 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1518 struct extent_buffer
*leaf
;
1520 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
1525 leaf
= path
->nodes
[0];
1526 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
1527 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
1528 btrfs_mark_buffer_dirty(leaf
);
1529 btrfs_release_path(extent_root
, path
);
1537 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
1538 struct btrfs_root
*root
)
1540 struct btrfs_block_group_cache
*cache
, *entry
;
1544 struct btrfs_path
*path
;
1547 path
= btrfs_alloc_path();
1553 spin_lock(&root
->fs_info
->block_group_cache_lock
);
1554 for (n
= rb_first(&root
->fs_info
->block_group_cache_tree
);
1555 n
; n
= rb_next(n
)) {
1556 entry
= rb_entry(n
, struct btrfs_block_group_cache
,
1563 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
1569 last
+= cache
->key
.offset
;
1571 err
= write_one_cache_group(trans
, root
,
1574 * if we fail to write the cache group, we want
1575 * to keep it marked dirty in hopes that a later
1583 btrfs_free_path(path
);
1587 int btrfs_extent_readonly(struct btrfs_root
*root
, u64 bytenr
)
1589 struct btrfs_block_group_cache
*block_group
;
1592 block_group
= btrfs_lookup_block_group(root
->fs_info
, bytenr
);
1593 if (!block_group
|| block_group
->ro
)
1596 btrfs_put_block_group(block_group
);
1600 static int update_space_info(struct btrfs_fs_info
*info
, u64 flags
,
1601 u64 total_bytes
, u64 bytes_used
,
1602 struct btrfs_space_info
**space_info
)
1604 struct btrfs_space_info
*found
;
1606 found
= __find_space_info(info
, flags
);
1608 spin_lock(&found
->lock
);
1609 found
->total_bytes
+= total_bytes
;
1610 found
->bytes_used
+= bytes_used
;
1612 spin_unlock(&found
->lock
);
1613 *space_info
= found
;
1616 found
= kzalloc(sizeof(*found
), GFP_NOFS
);
1620 INIT_LIST_HEAD(&found
->block_groups
);
1621 init_rwsem(&found
->groups_sem
);
1622 spin_lock_init(&found
->lock
);
1623 found
->flags
= flags
;
1624 found
->total_bytes
= total_bytes
;
1625 found
->bytes_used
= bytes_used
;
1626 found
->bytes_pinned
= 0;
1627 found
->bytes_reserved
= 0;
1628 found
->bytes_readonly
= 0;
1629 found
->bytes_delalloc
= 0;
1631 found
->force_alloc
= 0;
1632 *space_info
= found
;
1633 list_add_rcu(&found
->list
, &info
->space_info
);
1637 static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info
, u64 flags
)
1639 u64 extra_flags
= flags
& (BTRFS_BLOCK_GROUP_RAID0
|
1640 BTRFS_BLOCK_GROUP_RAID1
|
1641 BTRFS_BLOCK_GROUP_RAID10
|
1642 BTRFS_BLOCK_GROUP_DUP
);
1644 if (flags
& BTRFS_BLOCK_GROUP_DATA
)
1645 fs_info
->avail_data_alloc_bits
|= extra_flags
;
1646 if (flags
& BTRFS_BLOCK_GROUP_METADATA
)
1647 fs_info
->avail_metadata_alloc_bits
|= extra_flags
;
1648 if (flags
& BTRFS_BLOCK_GROUP_SYSTEM
)
1649 fs_info
->avail_system_alloc_bits
|= extra_flags
;
1653 static void set_block_group_readonly(struct btrfs_block_group_cache
*cache
)
1655 spin_lock(&cache
->space_info
->lock
);
1656 spin_lock(&cache
->lock
);
1658 cache
->space_info
->bytes_readonly
+= cache
->key
.offset
-
1659 btrfs_block_group_used(&cache
->item
);
1662 spin_unlock(&cache
->lock
);
1663 spin_unlock(&cache
->space_info
->lock
);
1666 u64
btrfs_reduce_alloc_profile(struct btrfs_root
*root
, u64 flags
)
1668 u64 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
1670 if (num_devices
== 1)
1671 flags
&= ~(BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID0
);
1672 if (num_devices
< 4)
1673 flags
&= ~BTRFS_BLOCK_GROUP_RAID10
;
1675 if ((flags
& BTRFS_BLOCK_GROUP_DUP
) &&
1676 (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
1677 BTRFS_BLOCK_GROUP_RAID10
))) {
1678 flags
&= ~BTRFS_BLOCK_GROUP_DUP
;
1681 if ((flags
& BTRFS_BLOCK_GROUP_RAID1
) &&
1682 (flags
& BTRFS_BLOCK_GROUP_RAID10
)) {
1683 flags
&= ~BTRFS_BLOCK_GROUP_RAID1
;
1686 if ((flags
& BTRFS_BLOCK_GROUP_RAID0
) &&
1687 ((flags
& BTRFS_BLOCK_GROUP_RAID1
) |
1688 (flags
& BTRFS_BLOCK_GROUP_RAID10
) |
1689 (flags
& BTRFS_BLOCK_GROUP_DUP
)))
1690 flags
&= ~BTRFS_BLOCK_GROUP_RAID0
;
1694 static u64
btrfs_get_alloc_profile(struct btrfs_root
*root
, u64 data
)
1696 struct btrfs_fs_info
*info
= root
->fs_info
;
1700 alloc_profile
= info
->avail_data_alloc_bits
&
1701 info
->data_alloc_profile
;
1702 data
= BTRFS_BLOCK_GROUP_DATA
| alloc_profile
;
1703 } else if (root
== root
->fs_info
->chunk_root
) {
1704 alloc_profile
= info
->avail_system_alloc_bits
&
1705 info
->system_alloc_profile
;
1706 data
= BTRFS_BLOCK_GROUP_SYSTEM
| alloc_profile
;
1708 alloc_profile
= info
->avail_metadata_alloc_bits
&
1709 info
->metadata_alloc_profile
;
1710 data
= BTRFS_BLOCK_GROUP_METADATA
| alloc_profile
;
1713 return btrfs_reduce_alloc_profile(root
, data
);
1716 void btrfs_set_inode_space_info(struct btrfs_root
*root
, struct inode
*inode
)
1720 alloc_target
= btrfs_get_alloc_profile(root
, 1);
1721 BTRFS_I(inode
)->space_info
= __find_space_info(root
->fs_info
,
1726 * for now this just makes sure we have at least 5% of our metadata space free
1729 int btrfs_check_metadata_free_space(struct btrfs_root
*root
)
1731 struct btrfs_fs_info
*info
= root
->fs_info
;
1732 struct btrfs_space_info
*meta_sinfo
;
1733 u64 alloc_target
, thresh
;
1734 int committed
= 0, ret
;
1736 /* get the space info for where the metadata will live */
1737 alloc_target
= btrfs_get_alloc_profile(root
, 0);
1738 meta_sinfo
= __find_space_info(info
, alloc_target
);
1741 spin_lock(&meta_sinfo
->lock
);
1742 if (!meta_sinfo
->full
)
1743 thresh
= meta_sinfo
->total_bytes
* 80;
1745 thresh
= meta_sinfo
->total_bytes
* 95;
1747 do_div(thresh
, 100);
1749 if (meta_sinfo
->bytes_used
+ meta_sinfo
->bytes_reserved
+
1750 meta_sinfo
->bytes_pinned
+ meta_sinfo
->bytes_readonly
> thresh
) {
1751 struct btrfs_trans_handle
*trans
;
1752 if (!meta_sinfo
->full
) {
1753 meta_sinfo
->force_alloc
= 1;
1754 spin_unlock(&meta_sinfo
->lock
);
1756 trans
= btrfs_start_transaction(root
, 1);
1760 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
1761 2 * 1024 * 1024, alloc_target
, 0);
1762 btrfs_end_transaction(trans
, root
);
1765 spin_unlock(&meta_sinfo
->lock
);
1769 trans
= btrfs_join_transaction(root
, 1);
1772 ret
= btrfs_commit_transaction(trans
, root
);
1779 spin_unlock(&meta_sinfo
->lock
);
1785 * This will check the space that the inode allocates from to make sure we have
1786 * enough space for bytes.
1788 int btrfs_check_data_free_space(struct btrfs_root
*root
, struct inode
*inode
,
1791 struct btrfs_space_info
*data_sinfo
;
1792 int ret
= 0, committed
= 0;
1794 /* make sure bytes are sectorsize aligned */
1795 bytes
= (bytes
+ root
->sectorsize
- 1) & ~((u64
)root
->sectorsize
- 1);
1797 data_sinfo
= BTRFS_I(inode
)->space_info
;
1799 /* make sure we have enough space to handle the data first */
1800 spin_lock(&data_sinfo
->lock
);
1801 if (data_sinfo
->total_bytes
- data_sinfo
->bytes_used
-
1802 data_sinfo
->bytes_delalloc
- data_sinfo
->bytes_reserved
-
1803 data_sinfo
->bytes_pinned
- data_sinfo
->bytes_readonly
-
1804 data_sinfo
->bytes_may_use
< bytes
) {
1805 struct btrfs_trans_handle
*trans
;
1808 * if we don't have enough free bytes in this space then we need
1809 * to alloc a new chunk.
1811 if (!data_sinfo
->full
) {
1814 data_sinfo
->force_alloc
= 1;
1815 spin_unlock(&data_sinfo
->lock
);
1817 alloc_target
= btrfs_get_alloc_profile(root
, 1);
1818 trans
= btrfs_start_transaction(root
, 1);
1822 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
1823 bytes
+ 2 * 1024 * 1024,
1825 btrfs_end_transaction(trans
, root
);
1830 spin_unlock(&data_sinfo
->lock
);
1832 /* commit the current transaction and try again */
1835 trans
= btrfs_join_transaction(root
, 1);
1838 ret
= btrfs_commit_transaction(trans
, root
);
1844 printk(KERN_ERR
"no space left, need %llu, %llu delalloc bytes"
1845 ", %llu bytes_used, %llu bytes_reserved, "
1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
1847 "%llu total\n", bytes
, data_sinfo
->bytes_delalloc
,
1848 data_sinfo
->bytes_used
, data_sinfo
->bytes_reserved
,
1849 data_sinfo
->bytes_pinned
, data_sinfo
->bytes_readonly
,
1850 data_sinfo
->bytes_may_use
, data_sinfo
->total_bytes
);
1853 data_sinfo
->bytes_may_use
+= bytes
;
1854 BTRFS_I(inode
)->reserved_bytes
+= bytes
;
1855 spin_unlock(&data_sinfo
->lock
);
1857 return btrfs_check_metadata_free_space(root
);
1861 * if there was an error for whatever reason after calling
1862 * btrfs_check_data_free_space, call this so we can cleanup the counters.
1864 void btrfs_free_reserved_data_space(struct btrfs_root
*root
,
1865 struct inode
*inode
, u64 bytes
)
1867 struct btrfs_space_info
*data_sinfo
;
1869 /* make sure bytes are sectorsize aligned */
1870 bytes
= (bytes
+ root
->sectorsize
- 1) & ~((u64
)root
->sectorsize
- 1);
1872 data_sinfo
= BTRFS_I(inode
)->space_info
;
1873 spin_lock(&data_sinfo
->lock
);
1874 data_sinfo
->bytes_may_use
-= bytes
;
1875 BTRFS_I(inode
)->reserved_bytes
-= bytes
;
1876 spin_unlock(&data_sinfo
->lock
);
1879 /* called when we are adding a delalloc extent to the inode's io_tree */
1880 void btrfs_delalloc_reserve_space(struct btrfs_root
*root
, struct inode
*inode
,
1883 struct btrfs_space_info
*data_sinfo
;
1885 /* get the space info for where this inode will be storing its data */
1886 data_sinfo
= BTRFS_I(inode
)->space_info
;
1888 /* make sure we have enough space to handle the data first */
1889 spin_lock(&data_sinfo
->lock
);
1890 data_sinfo
->bytes_delalloc
+= bytes
;
1893 * we are adding a delalloc extent without calling
1894 * btrfs_check_data_free_space first. This happens on a weird
1895 * writepage condition, but shouldn't hurt our accounting
1897 if (unlikely(bytes
> BTRFS_I(inode
)->reserved_bytes
)) {
1898 data_sinfo
->bytes_may_use
-= BTRFS_I(inode
)->reserved_bytes
;
1899 BTRFS_I(inode
)->reserved_bytes
= 0;
1901 data_sinfo
->bytes_may_use
-= bytes
;
1902 BTRFS_I(inode
)->reserved_bytes
-= bytes
;
1905 spin_unlock(&data_sinfo
->lock
);
1908 /* called when we are clearing an delalloc extent from the inode's io_tree */
1909 void btrfs_delalloc_free_space(struct btrfs_root
*root
, struct inode
*inode
,
1912 struct btrfs_space_info
*info
;
1914 info
= BTRFS_I(inode
)->space_info
;
1916 spin_lock(&info
->lock
);
1917 info
->bytes_delalloc
-= bytes
;
1918 spin_unlock(&info
->lock
);
1921 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
1922 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
1923 u64 flags
, int force
)
1925 struct btrfs_space_info
*space_info
;
1929 mutex_lock(&extent_root
->fs_info
->chunk_mutex
);
1931 flags
= btrfs_reduce_alloc_profile(extent_root
, flags
);
1933 space_info
= __find_space_info(extent_root
->fs_info
, flags
);
1935 ret
= update_space_info(extent_root
->fs_info
, flags
,
1939 BUG_ON(!space_info
);
1941 spin_lock(&space_info
->lock
);
1942 if (space_info
->force_alloc
) {
1944 space_info
->force_alloc
= 0;
1946 if (space_info
->full
) {
1947 spin_unlock(&space_info
->lock
);
1951 thresh
= space_info
->total_bytes
- space_info
->bytes_readonly
;
1952 thresh
= div_factor(thresh
, 6);
1954 (space_info
->bytes_used
+ space_info
->bytes_pinned
+
1955 space_info
->bytes_reserved
+ alloc_bytes
) < thresh
) {
1956 spin_unlock(&space_info
->lock
);
1959 spin_unlock(&space_info
->lock
);
1961 ret
= btrfs_alloc_chunk(trans
, extent_root
, flags
);
1963 space_info
->full
= 1;
1965 mutex_unlock(&extent_root
->fs_info
->chunk_mutex
);
1969 static int update_block_group(struct btrfs_trans_handle
*trans
,
1970 struct btrfs_root
*root
,
1971 u64 bytenr
, u64 num_bytes
, int alloc
,
1974 struct btrfs_block_group_cache
*cache
;
1975 struct btrfs_fs_info
*info
= root
->fs_info
;
1976 u64 total
= num_bytes
;
1981 cache
= btrfs_lookup_block_group(info
, bytenr
);
1984 byte_in_group
= bytenr
- cache
->key
.objectid
;
1985 WARN_ON(byte_in_group
> cache
->key
.offset
);
1987 spin_lock(&cache
->space_info
->lock
);
1988 spin_lock(&cache
->lock
);
1990 old_val
= btrfs_block_group_used(&cache
->item
);
1991 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
1993 old_val
+= num_bytes
;
1994 cache
->space_info
->bytes_used
+= num_bytes
;
1996 cache
->space_info
->bytes_readonly
-= num_bytes
;
1997 btrfs_set_block_group_used(&cache
->item
, old_val
);
1998 spin_unlock(&cache
->lock
);
1999 spin_unlock(&cache
->space_info
->lock
);
2001 old_val
-= num_bytes
;
2002 cache
->space_info
->bytes_used
-= num_bytes
;
2004 cache
->space_info
->bytes_readonly
+= num_bytes
;
2005 btrfs_set_block_group_used(&cache
->item
, old_val
);
2006 spin_unlock(&cache
->lock
);
2007 spin_unlock(&cache
->space_info
->lock
);
2011 ret
= btrfs_discard_extent(root
, bytenr
,
2015 ret
= btrfs_add_free_space(cache
, bytenr
,
2020 btrfs_put_block_group(cache
);
2022 bytenr
+= num_bytes
;
2027 static u64
first_logical_byte(struct btrfs_root
*root
, u64 search_start
)
2029 struct btrfs_block_group_cache
*cache
;
2032 cache
= btrfs_lookup_first_block_group(root
->fs_info
, search_start
);
2036 bytenr
= cache
->key
.objectid
;
2037 btrfs_put_block_group(cache
);
2042 int btrfs_update_pinned_extents(struct btrfs_root
*root
,
2043 u64 bytenr
, u64 num
, int pin
)
2046 struct btrfs_block_group_cache
*cache
;
2047 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2050 set_extent_dirty(&fs_info
->pinned_extents
,
2051 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
2053 clear_extent_dirty(&fs_info
->pinned_extents
,
2054 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
2058 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
2060 len
= min(num
, cache
->key
.offset
-
2061 (bytenr
- cache
->key
.objectid
));
2063 spin_lock(&cache
->space_info
->lock
);
2064 spin_lock(&cache
->lock
);
2065 cache
->pinned
+= len
;
2066 cache
->space_info
->bytes_pinned
+= len
;
2067 spin_unlock(&cache
->lock
);
2068 spin_unlock(&cache
->space_info
->lock
);
2069 fs_info
->total_pinned
+= len
;
2071 spin_lock(&cache
->space_info
->lock
);
2072 spin_lock(&cache
->lock
);
2073 cache
->pinned
-= len
;
2074 cache
->space_info
->bytes_pinned
-= len
;
2075 spin_unlock(&cache
->lock
);
2076 spin_unlock(&cache
->space_info
->lock
);
2077 fs_info
->total_pinned
-= len
;
2079 btrfs_add_free_space(cache
, bytenr
, len
);
2081 btrfs_put_block_group(cache
);
2088 static int update_reserved_extents(struct btrfs_root
*root
,
2089 u64 bytenr
, u64 num
, int reserve
)
2092 struct btrfs_block_group_cache
*cache
;
2093 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2096 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
2098 len
= min(num
, cache
->key
.offset
-
2099 (bytenr
- cache
->key
.objectid
));
2101 spin_lock(&cache
->space_info
->lock
);
2102 spin_lock(&cache
->lock
);
2104 cache
->reserved
+= len
;
2105 cache
->space_info
->bytes_reserved
+= len
;
2107 cache
->reserved
-= len
;
2108 cache
->space_info
->bytes_reserved
-= len
;
2110 spin_unlock(&cache
->lock
);
2111 spin_unlock(&cache
->space_info
->lock
);
2112 btrfs_put_block_group(cache
);
2119 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_io_tree
*copy
)
2124 struct extent_io_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
2128 ret
= find_first_extent_bit(pinned_extents
, last
,
2129 &start
, &end
, EXTENT_DIRTY
);
2132 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
2138 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
2139 struct btrfs_root
*root
,
2140 struct extent_io_tree
*unpin
)
2147 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
2152 ret
= btrfs_discard_extent(root
, start
, end
+ 1 - start
);
2154 /* unlocks the pinned mutex */
2155 btrfs_update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
2156 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
2163 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
2164 struct btrfs_root
*root
,
2165 struct btrfs_path
*path
,
2166 u64 bytenr
, u64 num_bytes
, int is_data
,
2167 struct extent_buffer
**must_clean
)
2170 struct extent_buffer
*buf
;
2175 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
2179 /* we can reuse a block if it hasn't been written
2180 * and it is from this transaction. We can't
2181 * reuse anything from the tree log root because
2182 * it has tiny sub-transactions.
2184 if (btrfs_buffer_uptodate(buf
, 0) &&
2185 btrfs_try_tree_lock(buf
)) {
2186 u64 header_owner
= btrfs_header_owner(buf
);
2187 u64 header_transid
= btrfs_header_generation(buf
);
2188 if (header_owner
!= BTRFS_TREE_LOG_OBJECTID
&&
2189 header_owner
!= BTRFS_TREE_RELOC_OBJECTID
&&
2190 header_owner
!= BTRFS_DATA_RELOC_TREE_OBJECTID
&&
2191 header_transid
== trans
->transid
&&
2192 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
2196 btrfs_tree_unlock(buf
);
2198 free_extent_buffer(buf
);
2200 btrfs_set_path_blocking(path
);
2201 /* unlocks the pinned mutex */
2202 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
2209 * remove an extent from the root, returns 0 on success
2211 static int __free_extent(struct btrfs_trans_handle
*trans
,
2212 struct btrfs_root
*root
,
2213 u64 bytenr
, u64 num_bytes
, u64 parent
,
2214 u64 root_objectid
, u64 ref_generation
,
2215 u64 owner_objectid
, int pin
, int mark_free
,
2218 struct btrfs_path
*path
;
2219 struct btrfs_key key
;
2220 struct btrfs_fs_info
*info
= root
->fs_info
;
2221 struct btrfs_root
*extent_root
= info
->extent_root
;
2222 struct extent_buffer
*leaf
;
2224 int extent_slot
= 0;
2225 int found_extent
= 0;
2227 struct btrfs_extent_item
*ei
;
2230 key
.objectid
= bytenr
;
2231 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
2232 key
.offset
= num_bytes
;
2233 path
= btrfs_alloc_path();
2238 path
->leave_spinning
= 1;
2239 ret
= lookup_extent_backref(trans
, extent_root
, path
,
2240 bytenr
, parent
, root_objectid
,
2241 ref_generation
, owner_objectid
, 1);
2243 struct btrfs_key found_key
;
2244 extent_slot
= path
->slots
[0];
2245 while (extent_slot
> 0) {
2247 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
2249 if (found_key
.objectid
!= bytenr
)
2251 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
2252 found_key
.offset
== num_bytes
) {
2256 if (path
->slots
[0] - extent_slot
> 5)
2259 if (!found_extent
) {
2260 ret
= remove_extent_backref(trans
, extent_root
, path
,
2263 btrfs_release_path(extent_root
, path
);
2264 path
->leave_spinning
= 1;
2265 ret
= btrfs_search_slot(trans
, extent_root
,
2268 printk(KERN_ERR
"umm, got %d back from search"
2269 ", was looking for %llu\n", ret
,
2270 (unsigned long long)bytenr
);
2271 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
2274 extent_slot
= path
->slots
[0];
2277 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
2279 printk(KERN_ERR
"btrfs unable to find ref byte nr %llu "
2280 "parent %llu root %llu gen %llu owner %llu\n",
2281 (unsigned long long)bytenr
,
2282 (unsigned long long)parent
,
2283 (unsigned long long)root_objectid
,
2284 (unsigned long long)ref_generation
,
2285 (unsigned long long)owner_objectid
);
2288 leaf
= path
->nodes
[0];
2289 ei
= btrfs_item_ptr(leaf
, extent_slot
,
2290 struct btrfs_extent_item
);
2291 refs
= btrfs_extent_refs(leaf
, ei
);
2294 * we're not allowed to delete the extent item if there
2295 * are other delayed ref updates pending
2298 BUG_ON(refs
< refs_to_drop
);
2299 refs
-= refs_to_drop
;
2300 btrfs_set_extent_refs(leaf
, ei
, refs
);
2301 btrfs_mark_buffer_dirty(leaf
);
2303 if (refs
== 0 && found_extent
&&
2304 path
->slots
[0] == extent_slot
+ 1) {
2305 struct btrfs_extent_ref
*ref
;
2306 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
2307 struct btrfs_extent_ref
);
2308 BUG_ON(btrfs_ref_num_refs(leaf
, ref
) != refs_to_drop
);
2309 /* if the back ref and the extent are next to each other
2310 * they get deleted below in one shot
2312 path
->slots
[0] = extent_slot
;
2314 } else if (found_extent
) {
2315 /* otherwise delete the extent back ref */
2316 ret
= remove_extent_backref(trans
, extent_root
, path
,
2319 /* if refs are 0, we need to setup the path for deletion */
2321 btrfs_release_path(extent_root
, path
);
2322 path
->leave_spinning
= 1;
2323 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
,
2332 struct extent_buffer
*must_clean
= NULL
;
2335 ret
= pin_down_bytes(trans
, root
, path
,
2337 owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
,
2344 /* block accounting for super block */
2345 spin_lock(&info
->delalloc_lock
);
2346 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
2347 btrfs_set_super_bytes_used(&info
->super_copy
,
2348 super_used
- num_bytes
);
2350 /* block accounting for root item */
2351 root_used
= btrfs_root_used(&root
->root_item
);
2352 btrfs_set_root_used(&root
->root_item
,
2353 root_used
- num_bytes
);
2354 spin_unlock(&info
->delalloc_lock
);
2357 * it is going to be very rare for someone to be waiting
2358 * on the block we're freeing. del_items might need to
2359 * schedule, so rather than get fancy, just force it
2363 btrfs_set_lock_blocking(must_clean
);
2365 ret
= btrfs_del_items(trans
, extent_root
, path
, path
->slots
[0],
2368 btrfs_release_path(extent_root
, path
);
2371 clean_tree_block(NULL
, root
, must_clean
);
2372 btrfs_tree_unlock(must_clean
);
2373 free_extent_buffer(must_clean
);
2376 if (owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
2377 ret
= btrfs_del_csums(trans
, root
, bytenr
, num_bytes
);
2380 invalidate_mapping_pages(info
->btree_inode
->i_mapping
,
2381 bytenr
>> PAGE_CACHE_SHIFT
,
2382 (bytenr
+ num_bytes
- 1) >> PAGE_CACHE_SHIFT
);
2385 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
2389 btrfs_free_path(path
);
2394 * remove an extent from the root, returns 0 on success
2396 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2397 struct btrfs_root
*root
,
2398 u64 bytenr
, u64 num_bytes
, u64 parent
,
2399 u64 root_objectid
, u64 ref_generation
,
2400 u64 owner_objectid
, int pin
,
2403 WARN_ON(num_bytes
< root
->sectorsize
);
2406 * if metadata always pin
2407 * if data pin when any transaction has committed this
2409 if (owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
||
2410 ref_generation
!= trans
->transid
)
2413 if (ref_generation
!= trans
->transid
)
2416 return __free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2417 root_objectid
, ref_generation
,
2418 owner_objectid
, pin
, pin
== 0, refs_to_drop
);
2422 * when we free an extent, it is possible (and likely) that we free the last
2423 * delayed ref for that extent as well. This searches the delayed ref tree for
2424 * a given extent, and if there are no other delayed refs to be processed, it
2425 * removes it from the tree.
2427 static noinline
int check_ref_cleanup(struct btrfs_trans_handle
*trans
,
2428 struct btrfs_root
*root
, u64 bytenr
)
2430 struct btrfs_delayed_ref_head
*head
;
2431 struct btrfs_delayed_ref_root
*delayed_refs
;
2432 struct btrfs_delayed_ref_node
*ref
;
2433 struct rb_node
*node
;
2436 delayed_refs
= &trans
->transaction
->delayed_refs
;
2437 spin_lock(&delayed_refs
->lock
);
2438 head
= btrfs_find_delayed_ref_head(trans
, bytenr
);
2442 node
= rb_prev(&head
->node
.rb_node
);
2446 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, rb_node
);
2448 /* there are still entries for this ref, we can't drop it */
2449 if (ref
->bytenr
== bytenr
)
2453 * waiting for the lock here would deadlock. If someone else has it
2454 * locked they are already in the process of dropping it anyway
2456 if (!mutex_trylock(&head
->mutex
))
2460 * at this point we have a head with no other entries. Go
2461 * ahead and process it.
2463 head
->node
.in_tree
= 0;
2464 rb_erase(&head
->node
.rb_node
, &delayed_refs
->root
);
2466 delayed_refs
->num_entries
--;
2469 * we don't take a ref on the node because we're removing it from the
2470 * tree, so we just steal the ref the tree was holding.
2472 delayed_refs
->num_heads
--;
2473 if (list_empty(&head
->cluster
))
2474 delayed_refs
->num_heads_ready
--;
2476 list_del_init(&head
->cluster
);
2477 spin_unlock(&delayed_refs
->lock
);
2479 ret
= run_one_delayed_ref(trans
, root
->fs_info
->tree_root
,
2480 &head
->node
, head
->must_insert_reserved
);
2482 btrfs_put_delayed_ref(&head
->node
);
2485 spin_unlock(&delayed_refs
->lock
);
2489 int btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2490 struct btrfs_root
*root
,
2491 u64 bytenr
, u64 num_bytes
, u64 parent
,
2492 u64 root_objectid
, u64 ref_generation
,
2493 u64 owner_objectid
, int pin
)
2498 * tree log blocks never actually go into the extent allocation
2499 * tree, just update pinning info and exit early.
2501 * data extents referenced by the tree log do need to have
2502 * their reference counts bumped.
2504 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
&&
2505 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
2506 /* unlocks the pinned mutex */
2507 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
2508 update_reserved_extents(root
, bytenr
, num_bytes
, 0);
2511 ret
= btrfs_add_delayed_ref(trans
, bytenr
, num_bytes
, parent
,
2512 root_objectid
, ref_generation
,
2514 BTRFS_DROP_DELAYED_REF
, 1);
2516 ret
= check_ref_cleanup(trans
, root
, bytenr
);
2522 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
2524 u64 mask
= ((u64
)root
->stripesize
- 1);
2525 u64 ret
= (val
+ mask
) & ~mask
;
2530 * walks the btree of allocated extents and find a hole of a given size.
2531 * The key ins is changed to record the hole:
2532 * ins->objectid == block start
2533 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2534 * ins->offset == number of blocks
2535 * Any available blocks before search_start are skipped.
2537 static noinline
int find_free_extent(struct btrfs_trans_handle
*trans
,
2538 struct btrfs_root
*orig_root
,
2539 u64 num_bytes
, u64 empty_size
,
2540 u64 search_start
, u64 search_end
,
2541 u64 hint_byte
, struct btrfs_key
*ins
,
2542 u64 exclude_start
, u64 exclude_nr
,
2546 struct btrfs_root
*root
= orig_root
->fs_info
->extent_root
;
2547 struct btrfs_free_cluster
*last_ptr
= NULL
;
2548 struct btrfs_block_group_cache
*block_group
= NULL
;
2549 int empty_cluster
= 2 * 1024 * 1024;
2550 int allowed_chunk_alloc
= 0;
2551 struct btrfs_space_info
*space_info
;
2552 int last_ptr_loop
= 0;
2555 WARN_ON(num_bytes
< root
->sectorsize
);
2556 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
2560 space_info
= __find_space_info(root
->fs_info
, data
);
2562 if (orig_root
->ref_cows
|| empty_size
)
2563 allowed_chunk_alloc
= 1;
2565 if (data
& BTRFS_BLOCK_GROUP_METADATA
) {
2566 last_ptr
= &root
->fs_info
->meta_alloc_cluster
;
2567 if (!btrfs_test_opt(root
, SSD
))
2568 empty_cluster
= 64 * 1024;
2571 if ((data
& BTRFS_BLOCK_GROUP_DATA
) && btrfs_test_opt(root
, SSD
)) {
2572 last_ptr
= &root
->fs_info
->data_alloc_cluster
;
2576 spin_lock(&last_ptr
->lock
);
2577 if (last_ptr
->block_group
)
2578 hint_byte
= last_ptr
->window_start
;
2579 spin_unlock(&last_ptr
->lock
);
2582 search_start
= max(search_start
, first_logical_byte(root
, 0));
2583 search_start
= max(search_start
, hint_byte
);
2590 if (search_start
== hint_byte
) {
2591 block_group
= btrfs_lookup_block_group(root
->fs_info
,
2593 if (block_group
&& block_group_bits(block_group
, data
)) {
2594 down_read(&space_info
->groups_sem
);
2595 goto have_block_group
;
2596 } else if (block_group
) {
2597 btrfs_put_block_group(block_group
);
2602 down_read(&space_info
->groups_sem
);
2603 list_for_each_entry(block_group
, &space_info
->block_groups
, list
) {
2606 atomic_inc(&block_group
->count
);
2607 search_start
= block_group
->key
.objectid
;
2610 if (unlikely(!block_group
->cached
)) {
2611 mutex_lock(&block_group
->cache_mutex
);
2612 ret
= cache_block_group(root
, block_group
);
2613 mutex_unlock(&block_group
->cache_mutex
);
2615 btrfs_put_block_group(block_group
);
2620 if (unlikely(block_group
->ro
))
2625 * the refill lock keeps out other
2626 * people trying to start a new cluster
2628 spin_lock(&last_ptr
->refill_lock
);
2629 offset
= btrfs_alloc_from_cluster(block_group
, last_ptr
,
2630 num_bytes
, search_start
);
2632 /* we have a block, we're done */
2633 spin_unlock(&last_ptr
->refill_lock
);
2637 spin_lock(&last_ptr
->lock
);
2639 * whoops, this cluster doesn't actually point to
2640 * this block group. Get a ref on the block
2641 * group is does point to and try again
2643 if (!last_ptr_loop
&& last_ptr
->block_group
&&
2644 last_ptr
->block_group
!= block_group
) {
2646 btrfs_put_block_group(block_group
);
2647 block_group
= last_ptr
->block_group
;
2648 atomic_inc(&block_group
->count
);
2649 spin_unlock(&last_ptr
->lock
);
2650 spin_unlock(&last_ptr
->refill_lock
);
2653 search_start
= block_group
->key
.objectid
;
2654 goto have_block_group
;
2656 spin_unlock(&last_ptr
->lock
);
2659 * this cluster didn't work out, free it and
2662 btrfs_return_cluster_to_free_space(NULL
, last_ptr
);
2666 /* allocate a cluster in this block group */
2667 ret
= btrfs_find_space_cluster(trans
,
2668 block_group
, last_ptr
,
2670 empty_cluster
+ empty_size
);
2673 * now pull our allocation out of this
2676 offset
= btrfs_alloc_from_cluster(block_group
,
2677 last_ptr
, num_bytes
,
2680 /* we found one, proceed */
2681 spin_unlock(&last_ptr
->refill_lock
);
2686 * at this point we either didn't find a cluster
2687 * or we weren't able to allocate a block from our
2688 * cluster. Free the cluster we've been trying
2689 * to use, and go to the next block group
2692 btrfs_return_cluster_to_free_space(NULL
,
2694 spin_unlock(&last_ptr
->refill_lock
);
2697 spin_unlock(&last_ptr
->refill_lock
);
2700 offset
= btrfs_find_space_for_alloc(block_group
, search_start
,
2701 num_bytes
, empty_size
);
2705 search_start
= stripe_align(root
, offset
);
2707 /* move on to the next group */
2708 if (search_start
+ num_bytes
>= search_end
) {
2709 btrfs_add_free_space(block_group
, offset
, num_bytes
);
2713 /* move on to the next group */
2714 if (search_start
+ num_bytes
>
2715 block_group
->key
.objectid
+ block_group
->key
.offset
) {
2716 btrfs_add_free_space(block_group
, offset
, num_bytes
);
2720 if (exclude_nr
> 0 &&
2721 (search_start
+ num_bytes
> exclude_start
&&
2722 search_start
< exclude_start
+ exclude_nr
)) {
2723 search_start
= exclude_start
+ exclude_nr
;
2725 btrfs_add_free_space(block_group
, offset
, num_bytes
);
2727 * if search_start is still in this block group
2728 * then we just re-search this block group
2730 if (search_start
>= block_group
->key
.objectid
&&
2731 search_start
< (block_group
->key
.objectid
+
2732 block_group
->key
.offset
))
2733 goto have_block_group
;
2737 ins
->objectid
= search_start
;
2738 ins
->offset
= num_bytes
;
2740 if (offset
< search_start
)
2741 btrfs_add_free_space(block_group
, offset
,
2742 search_start
- offset
);
2743 BUG_ON(offset
> search_start
);
2745 /* we are all good, lets return */
2748 btrfs_put_block_group(block_group
);
2750 up_read(&space_info
->groups_sem
);
2752 /* loop == 0, try to find a clustered alloc in every block group
2753 * loop == 1, try again after forcing a chunk allocation
2754 * loop == 2, set empty_size and empty_cluster to 0 and try again
2756 if (!ins
->objectid
&& loop
< 3 &&
2757 (empty_size
|| empty_cluster
|| allowed_chunk_alloc
)) {
2763 if (allowed_chunk_alloc
) {
2764 ret
= do_chunk_alloc(trans
, root
, num_bytes
+
2765 2 * 1024 * 1024, data
, 1);
2766 allowed_chunk_alloc
= 0;
2768 space_info
->force_alloc
= 1;
2776 } else if (!ins
->objectid
) {
2780 /* we found what we needed */
2781 if (ins
->objectid
) {
2782 if (!(data
& BTRFS_BLOCK_GROUP_DATA
))
2783 trans
->block_group
= block_group
->key
.objectid
;
2785 btrfs_put_block_group(block_group
);
2792 static void dump_space_info(struct btrfs_space_info
*info
, u64 bytes
)
2794 struct btrfs_block_group_cache
*cache
;
2796 printk(KERN_INFO
"space_info has %llu free, is %sfull\n",
2797 (unsigned long long)(info
->total_bytes
- info
->bytes_used
-
2798 info
->bytes_pinned
- info
->bytes_reserved
),
2799 (info
->full
) ? "" : "not ");
2800 printk(KERN_INFO
"space_info total=%llu, pinned=%llu, delalloc=%llu,"
2801 " may_use=%llu, used=%llu\n", info
->total_bytes
,
2802 info
->bytes_pinned
, info
->bytes_delalloc
, info
->bytes_may_use
,
2805 down_read(&info
->groups_sem
);
2806 list_for_each_entry(cache
, &info
->block_groups
, list
) {
2807 spin_lock(&cache
->lock
);
2808 printk(KERN_INFO
"block group %llu has %llu bytes, %llu used "
2809 "%llu pinned %llu reserved\n",
2810 (unsigned long long)cache
->key
.objectid
,
2811 (unsigned long long)cache
->key
.offset
,
2812 (unsigned long long)btrfs_block_group_used(&cache
->item
),
2813 (unsigned long long)cache
->pinned
,
2814 (unsigned long long)cache
->reserved
);
2815 btrfs_dump_free_space(cache
, bytes
);
2816 spin_unlock(&cache
->lock
);
2818 up_read(&info
->groups_sem
);
2821 static int __btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
2822 struct btrfs_root
*root
,
2823 u64 num_bytes
, u64 min_alloc_size
,
2824 u64 empty_size
, u64 hint_byte
,
2825 u64 search_end
, struct btrfs_key
*ins
,
2829 u64 search_start
= 0;
2830 struct btrfs_fs_info
*info
= root
->fs_info
;
2832 data
= btrfs_get_alloc_profile(root
, data
);
2835 * the only place that sets empty_size is btrfs_realloc_node, which
2836 * is not called recursively on allocations
2838 if (empty_size
|| root
->ref_cows
) {
2839 if (!(data
& BTRFS_BLOCK_GROUP_METADATA
)) {
2840 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2842 BTRFS_BLOCK_GROUP_METADATA
|
2843 (info
->metadata_alloc_profile
&
2844 info
->avail_metadata_alloc_bits
), 0);
2846 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2847 num_bytes
+ 2 * 1024 * 1024, data
, 0);
2850 WARN_ON(num_bytes
< root
->sectorsize
);
2851 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
2852 search_start
, search_end
, hint_byte
, ins
,
2853 trans
->alloc_exclude_start
,
2854 trans
->alloc_exclude_nr
, data
);
2856 if (ret
== -ENOSPC
&& num_bytes
> min_alloc_size
) {
2857 num_bytes
= num_bytes
>> 1;
2858 num_bytes
= num_bytes
& ~(root
->sectorsize
- 1);
2859 num_bytes
= max(num_bytes
, min_alloc_size
);
2860 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
2861 num_bytes
, data
, 1);
2865 struct btrfs_space_info
*sinfo
;
2867 sinfo
= __find_space_info(root
->fs_info
, data
);
2868 printk(KERN_ERR
"btrfs allocation failed flags %llu, "
2869 "wanted %llu\n", (unsigned long long)data
,
2870 (unsigned long long)num_bytes
);
2871 dump_space_info(sinfo
, num_bytes
);
2878 int btrfs_free_reserved_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
2880 struct btrfs_block_group_cache
*cache
;
2883 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
2885 printk(KERN_ERR
"Unable to find block group for %llu\n",
2886 (unsigned long long)start
);
2890 ret
= btrfs_discard_extent(root
, start
, len
);
2892 btrfs_add_free_space(cache
, start
, len
);
2893 btrfs_put_block_group(cache
);
2894 update_reserved_extents(root
, start
, len
, 0);
2899 int btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
2900 struct btrfs_root
*root
,
2901 u64 num_bytes
, u64 min_alloc_size
,
2902 u64 empty_size
, u64 hint_byte
,
2903 u64 search_end
, struct btrfs_key
*ins
,
2907 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, min_alloc_size
,
2908 empty_size
, hint_byte
, search_end
, ins
,
2910 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
2914 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
2915 struct btrfs_root
*root
, u64 parent
,
2916 u64 root_objectid
, u64 ref_generation
,
2917 u64 owner
, struct btrfs_key
*ins
,
2923 u64 num_bytes
= ins
->offset
;
2925 struct btrfs_fs_info
*info
= root
->fs_info
;
2926 struct btrfs_root
*extent_root
= info
->extent_root
;
2927 struct btrfs_extent_item
*extent_item
;
2928 struct btrfs_extent_ref
*ref
;
2929 struct btrfs_path
*path
;
2930 struct btrfs_key keys
[2];
2933 parent
= ins
->objectid
;
2935 /* block accounting for super block */
2936 spin_lock(&info
->delalloc_lock
);
2937 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
2938 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
2940 /* block accounting for root item */
2941 root_used
= btrfs_root_used(&root
->root_item
);
2942 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
2943 spin_unlock(&info
->delalloc_lock
);
2945 memcpy(&keys
[0], ins
, sizeof(*ins
));
2946 keys
[1].objectid
= ins
->objectid
;
2947 keys
[1].type
= BTRFS_EXTENT_REF_KEY
;
2948 keys
[1].offset
= parent
;
2949 sizes
[0] = sizeof(*extent_item
);
2950 sizes
[1] = sizeof(*ref
);
2952 path
= btrfs_alloc_path();
2955 path
->leave_spinning
= 1;
2956 ret
= btrfs_insert_empty_items(trans
, extent_root
, path
, keys
,
2960 extent_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2961 struct btrfs_extent_item
);
2962 btrfs_set_extent_refs(path
->nodes
[0], extent_item
, ref_mod
);
2963 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0] + 1,
2964 struct btrfs_extent_ref
);
2966 btrfs_set_ref_root(path
->nodes
[0], ref
, root_objectid
);
2967 btrfs_set_ref_generation(path
->nodes
[0], ref
, ref_generation
);
2968 btrfs_set_ref_objectid(path
->nodes
[0], ref
, owner
);
2969 btrfs_set_ref_num_refs(path
->nodes
[0], ref
, ref_mod
);
2971 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2973 trans
->alloc_exclude_start
= 0;
2974 trans
->alloc_exclude_nr
= 0;
2975 btrfs_free_path(path
);
2980 ret
= update_block_group(trans
, root
, ins
->objectid
,
2983 printk(KERN_ERR
"btrfs update block group failed for %llu "
2984 "%llu\n", (unsigned long long)ins
->objectid
,
2985 (unsigned long long)ins
->offset
);
2992 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
2993 struct btrfs_root
*root
, u64 parent
,
2994 u64 root_objectid
, u64 ref_generation
,
2995 u64 owner
, struct btrfs_key
*ins
)
2999 if (root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
3002 ret
= btrfs_add_delayed_ref(trans
, ins
->objectid
,
3003 ins
->offset
, parent
, root_objectid
,
3004 ref_generation
, owner
,
3005 BTRFS_ADD_DELAYED_EXTENT
, 0);
3011 * this is used by the tree logging recovery code. It records that
3012 * an extent has been allocated and makes sure to clear the free
3013 * space cache bits as well
3015 int btrfs_alloc_logged_extent(struct btrfs_trans_handle
*trans
,
3016 struct btrfs_root
*root
, u64 parent
,
3017 u64 root_objectid
, u64 ref_generation
,
3018 u64 owner
, struct btrfs_key
*ins
)
3021 struct btrfs_block_group_cache
*block_group
;
3023 block_group
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
3024 mutex_lock(&block_group
->cache_mutex
);
3025 cache_block_group(root
, block_group
);
3026 mutex_unlock(&block_group
->cache_mutex
);
3028 ret
= btrfs_remove_free_space(block_group
, ins
->objectid
,
3031 btrfs_put_block_group(block_group
);
3032 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
3033 ref_generation
, owner
, ins
, 1);
3038 * finds a free extent and does all the dirty work required for allocation
3039 * returns the key for the extent through ins, and a tree buffer for
3040 * the first block of the extent through buf.
3042 * returns 0 if everything worked, non-zero otherwise.
3044 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
3045 struct btrfs_root
*root
,
3046 u64 num_bytes
, u64 parent
, u64 min_alloc_size
,
3047 u64 root_objectid
, u64 ref_generation
,
3048 u64 owner_objectid
, u64 empty_size
, u64 hint_byte
,
3049 u64 search_end
, struct btrfs_key
*ins
, u64 data
)
3052 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
,
3053 min_alloc_size
, empty_size
, hint_byte
,
3054 search_end
, ins
, data
);
3056 if (root_objectid
!= BTRFS_TREE_LOG_OBJECTID
) {
3057 ret
= btrfs_add_delayed_ref(trans
, ins
->objectid
,
3058 ins
->offset
, parent
, root_objectid
,
3059 ref_generation
, owner_objectid
,
3060 BTRFS_ADD_DELAYED_EXTENT
, 0);
3063 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
3067 struct extent_buffer
*btrfs_init_new_buffer(struct btrfs_trans_handle
*trans
,
3068 struct btrfs_root
*root
,
3069 u64 bytenr
, u32 blocksize
,
3072 struct extent_buffer
*buf
;
3074 buf
= btrfs_find_create_tree_block(root
, bytenr
, blocksize
);
3076 return ERR_PTR(-ENOMEM
);
3077 btrfs_set_header_generation(buf
, trans
->transid
);
3078 btrfs_set_buffer_lockdep_class(buf
, level
);
3079 btrfs_tree_lock(buf
);
3080 clean_tree_block(trans
, root
, buf
);
3082 btrfs_set_lock_blocking(buf
);
3083 btrfs_set_buffer_uptodate(buf
);
3085 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
3086 set_extent_dirty(&root
->dirty_log_pages
, buf
->start
,
3087 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
3089 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
3090 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
3092 trans
->blocks_used
++;
3093 /* this returns a buffer locked for blocking */
3098 * helper function to allocate a block for a given tree
3099 * returns the tree buffer or NULL.
3101 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
3102 struct btrfs_root
*root
,
3103 u32 blocksize
, u64 parent
,
3110 struct btrfs_key ins
;
3112 struct extent_buffer
*buf
;
3114 ret
= btrfs_alloc_extent(trans
, root
, blocksize
, parent
, blocksize
,
3115 root_objectid
, ref_generation
, level
,
3116 empty_size
, hint
, (u64
)-1, &ins
, 0);
3119 return ERR_PTR(ret
);
3122 buf
= btrfs_init_new_buffer(trans
, root
, ins
.objectid
,
3127 int btrfs_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
3128 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
3131 u64 leaf_generation
;
3132 struct refsort
*sorted
;
3133 struct btrfs_key key
;
3134 struct btrfs_file_extent_item
*fi
;
3141 BUG_ON(!btrfs_is_leaf(leaf
));
3142 nritems
= btrfs_header_nritems(leaf
);
3143 leaf_owner
= btrfs_header_owner(leaf
);
3144 leaf_generation
= btrfs_header_generation(leaf
);
3146 sorted
= kmalloc(sizeof(*sorted
) * nritems
, GFP_NOFS
);
3147 /* we do this loop twice. The first time we build a list
3148 * of the extents we have a reference on, then we sort the list
3149 * by bytenr. The second time around we actually do the
3152 for (i
= 0; i
< nritems
; i
++) {
3156 btrfs_item_key_to_cpu(leaf
, &key
, i
);
3158 /* only extents have references, skip everything else */
3159 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
3162 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
3164 /* inline extents live in the btree, they don't have refs */
3165 if (btrfs_file_extent_type(leaf
, fi
) ==
3166 BTRFS_FILE_EXTENT_INLINE
)
3169 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
3171 /* holes don't have refs */
3172 if (disk_bytenr
== 0)
3175 sorted
[refi
].bytenr
= disk_bytenr
;
3176 sorted
[refi
].slot
= i
;
3183 sort(sorted
, refi
, sizeof(struct refsort
), refsort_cmp
, NULL
);
3185 for (i
= 0; i
< refi
; i
++) {
3188 disk_bytenr
= sorted
[i
].bytenr
;
3189 slot
= sorted
[i
].slot
;
3193 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
3194 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
3197 fi
= btrfs_item_ptr(leaf
, slot
, struct btrfs_file_extent_item
);
3199 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
,
3200 btrfs_file_extent_disk_num_bytes(leaf
, fi
),
3201 leaf
->start
, leaf_owner
, leaf_generation
,
3205 atomic_inc(&root
->fs_info
->throttle_gen
);
3206 wake_up(&root
->fs_info
->transaction_throttle
);
3214 static noinline
int cache_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
3215 struct btrfs_root
*root
,
3216 struct btrfs_leaf_ref
*ref
)
3220 struct btrfs_extent_info
*info
;
3221 struct refsort
*sorted
;
3223 if (ref
->nritems
== 0)
3226 sorted
= kmalloc(sizeof(*sorted
) * ref
->nritems
, GFP_NOFS
);
3227 for (i
= 0; i
< ref
->nritems
; i
++) {
3228 sorted
[i
].bytenr
= ref
->extents
[i
].bytenr
;
3231 sort(sorted
, ref
->nritems
, sizeof(struct refsort
), refsort_cmp
, NULL
);
3234 * the items in the ref were sorted when the ref was inserted
3235 * into the ref cache, so this is already in order
3237 for (i
= 0; i
< ref
->nritems
; i
++) {
3238 info
= ref
->extents
+ sorted
[i
].slot
;
3239 ret
= btrfs_free_extent(trans
, root
, info
->bytenr
,
3240 info
->num_bytes
, ref
->bytenr
,
3241 ref
->owner
, ref
->generation
,
3244 atomic_inc(&root
->fs_info
->throttle_gen
);
3245 wake_up(&root
->fs_info
->transaction_throttle
);
3256 static int drop_snap_lookup_refcount(struct btrfs_trans_handle
*trans
,
3257 struct btrfs_root
*root
, u64 start
,
3262 ret
= btrfs_lookup_extent_ref(trans
, root
, start
, len
, refs
);
3265 #if 0 /* some debugging code in case we see problems here */
3266 /* if the refs count is one, it won't get increased again. But
3267 * if the ref count is > 1, someone may be decreasing it at
3268 * the same time we are.
3271 struct extent_buffer
*eb
= NULL
;
3272 eb
= btrfs_find_create_tree_block(root
, start
, len
);
3274 btrfs_tree_lock(eb
);
3276 mutex_lock(&root
->fs_info
->alloc_mutex
);
3277 ret
= lookup_extent_ref(NULL
, root
, start
, len
, refs
);
3279 mutex_unlock(&root
->fs_info
->alloc_mutex
);
3282 btrfs_tree_unlock(eb
);
3283 free_extent_buffer(eb
);
3286 printk(KERN_ERR
"btrfs block %llu went down to one "
3287 "during drop_snap\n", (unsigned long long)start
);
3298 * this is used while deleting old snapshots, and it drops the refs
3299 * on a whole subtree starting from a level 1 node.
3301 * The idea is to sort all the leaf pointers, and then drop the
3302 * ref on all the leaves in order. Most of the time the leaves
3303 * will have ref cache entries, so no leaf IOs will be required to
3304 * find the extents they have references on.
3306 * For each leaf, any references it has are also dropped in order
3308 * This ends up dropping the references in something close to optimal
3309 * order for reading and modifying the extent allocation tree.
3311 static noinline
int drop_level_one_refs(struct btrfs_trans_handle
*trans
,
3312 struct btrfs_root
*root
,
3313 struct btrfs_path
*path
)
3318 struct extent_buffer
*eb
= path
->nodes
[1];
3319 struct extent_buffer
*leaf
;
3320 struct btrfs_leaf_ref
*ref
;
3321 struct refsort
*sorted
= NULL
;
3322 int nritems
= btrfs_header_nritems(eb
);
3326 int slot
= path
->slots
[1];
3327 u32 blocksize
= btrfs_level_size(root
, 0);
3333 root_owner
= btrfs_header_owner(eb
);
3334 root_gen
= btrfs_header_generation(eb
);
3335 sorted
= kmalloc(sizeof(*sorted
) * nritems
, GFP_NOFS
);
3338 * step one, sort all the leaf pointers so we don't scribble
3339 * randomly into the extent allocation tree
3341 for (i
= slot
; i
< nritems
; i
++) {
3342 sorted
[refi
].bytenr
= btrfs_node_blockptr(eb
, i
);
3343 sorted
[refi
].slot
= i
;
3348 * nritems won't be zero, but if we're picking up drop_snapshot
3349 * after a crash, slot might be > 0, so double check things
3355 sort(sorted
, refi
, sizeof(struct refsort
), refsort_cmp
, NULL
);
3358 * the first loop frees everything the leaves point to
3360 for (i
= 0; i
< refi
; i
++) {
3363 bytenr
= sorted
[i
].bytenr
;
3366 * check the reference count on this leaf. If it is > 1
3367 * we just decrement it below and don't update any
3368 * of the refs the leaf points to.
3370 ret
= drop_snap_lookup_refcount(trans
, root
, bytenr
,
3376 ptr_gen
= btrfs_node_ptr_generation(eb
, sorted
[i
].slot
);
3379 * the leaf only had one reference, which means the
3380 * only thing pointing to this leaf is the snapshot
3381 * we're deleting. It isn't possible for the reference
3382 * count to increase again later
3384 * The reference cache is checked for the leaf,
3385 * and if found we'll be able to drop any refs held by
3386 * the leaf without needing to read it in.
3388 ref
= btrfs_lookup_leaf_ref(root
, bytenr
);
3389 if (ref
&& ref
->generation
!= ptr_gen
) {
3390 btrfs_free_leaf_ref(root
, ref
);
3394 ret
= cache_drop_leaf_ref(trans
, root
, ref
);
3396 btrfs_remove_leaf_ref(root
, ref
);
3397 btrfs_free_leaf_ref(root
, ref
);
3400 * the leaf wasn't in the reference cache, so
3401 * we have to read it.
3403 leaf
= read_tree_block(root
, bytenr
, blocksize
,
3405 ret
= btrfs_drop_leaf_ref(trans
, root
, leaf
);
3407 free_extent_buffer(leaf
);
3409 atomic_inc(&root
->fs_info
->throttle_gen
);
3410 wake_up(&root
->fs_info
->transaction_throttle
);
3415 * run through the loop again to free the refs on the leaves.
3416 * This is faster than doing it in the loop above because
3417 * the leaves are likely to be clustered together. We end up
3418 * working in nice chunks on the extent allocation tree.
3420 for (i
= 0; i
< refi
; i
++) {
3421 bytenr
= sorted
[i
].bytenr
;
3422 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3423 blocksize
, eb
->start
,
3424 root_owner
, root_gen
, 0, 1);
3427 atomic_inc(&root
->fs_info
->throttle_gen
);
3428 wake_up(&root
->fs_info
->transaction_throttle
);
3435 * update the path to show we've processed the entire level 1
3436 * node. This will get saved into the root's drop_snapshot_progress
3437 * field so these drops are not repeated again if this transaction
3440 path
->slots
[1] = nritems
;
3445 * helper function for drop_snapshot, this walks down the tree dropping ref
3446 * counts as it goes.
3448 static noinline
int walk_down_tree(struct btrfs_trans_handle
*trans
,
3449 struct btrfs_root
*root
,
3450 struct btrfs_path
*path
, int *level
)
3456 struct extent_buffer
*next
;
3457 struct extent_buffer
*cur
;
3458 struct extent_buffer
*parent
;
3463 WARN_ON(*level
< 0);
3464 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3465 ret
= drop_snap_lookup_refcount(trans
, root
, path
->nodes
[*level
]->start
,
3466 path
->nodes
[*level
]->len
, &refs
);
3472 * walk down to the last node level and free all the leaves
3474 while (*level
>= 0) {
3475 WARN_ON(*level
< 0);
3476 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3477 cur
= path
->nodes
[*level
];
3479 if (btrfs_header_level(cur
) != *level
)
3482 if (path
->slots
[*level
] >=
3483 btrfs_header_nritems(cur
))
3486 /* the new code goes down to level 1 and does all the
3487 * leaves pointed to that node in bulk. So, this check
3488 * for level 0 will always be false.
3490 * But, the disk format allows the drop_snapshot_progress
3491 * field in the root to leave things in a state where
3492 * a leaf will need cleaning up here. If someone crashes
3493 * with the old code and then boots with the new code,
3494 * we might find a leaf here.
3497 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3503 * once we get to level one, process the whole node
3504 * at once, including everything below it.
3507 ret
= drop_level_one_refs(trans
, root
, path
);
3512 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3513 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3514 blocksize
= btrfs_level_size(root
, *level
- 1);
3516 ret
= drop_snap_lookup_refcount(trans
, root
, bytenr
,
3521 * if there is more than one reference, we don't need
3522 * to read that node to drop any references it has. We
3523 * just drop the ref we hold on that node and move on to the
3524 * next slot in this level.
3527 parent
= path
->nodes
[*level
];
3528 root_owner
= btrfs_header_owner(parent
);
3529 root_gen
= btrfs_header_generation(parent
);
3530 path
->slots
[*level
]++;
3532 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3533 blocksize
, parent
->start
,
3534 root_owner
, root_gen
,
3538 atomic_inc(&root
->fs_info
->throttle_gen
);
3539 wake_up(&root
->fs_info
->transaction_throttle
);
3546 * we need to keep freeing things in the next level down.
3547 * read the block and loop around to process it
3549 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
3550 WARN_ON(*level
<= 0);
3551 if (path
->nodes
[*level
-1])
3552 free_extent_buffer(path
->nodes
[*level
-1]);
3553 path
->nodes
[*level
-1] = next
;
3554 *level
= btrfs_header_level(next
);
3555 path
->slots
[*level
] = 0;
3559 WARN_ON(*level
< 0);
3560 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3562 if (path
->nodes
[*level
] == root
->node
) {
3563 parent
= path
->nodes
[*level
];
3564 bytenr
= path
->nodes
[*level
]->start
;
3566 parent
= path
->nodes
[*level
+ 1];
3567 bytenr
= btrfs_node_blockptr(parent
, path
->slots
[*level
+ 1]);
3570 blocksize
= btrfs_level_size(root
, *level
);
3571 root_owner
= btrfs_header_owner(parent
);
3572 root_gen
= btrfs_header_generation(parent
);
3575 * cleanup and free the reference on the last node
3578 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3579 parent
->start
, root_owner
, root_gen
,
3581 free_extent_buffer(path
->nodes
[*level
]);
3582 path
->nodes
[*level
] = NULL
;
3592 * helper function for drop_subtree, this function is similar to
3593 * walk_down_tree. The main difference is that it checks reference
3594 * counts while tree blocks are locked.
3596 static noinline
int walk_down_subtree(struct btrfs_trans_handle
*trans
,
3597 struct btrfs_root
*root
,
3598 struct btrfs_path
*path
, int *level
)
3600 struct extent_buffer
*next
;
3601 struct extent_buffer
*cur
;
3602 struct extent_buffer
*parent
;
3609 cur
= path
->nodes
[*level
];
3610 ret
= btrfs_lookup_extent_ref(trans
, root
, cur
->start
, cur
->len
,
3616 while (*level
>= 0) {
3617 cur
= path
->nodes
[*level
];
3619 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3621 clean_tree_block(trans
, root
, cur
);
3624 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
)) {
3625 clean_tree_block(trans
, root
, cur
);
3629 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3630 blocksize
= btrfs_level_size(root
, *level
- 1);
3631 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3633 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
3634 btrfs_tree_lock(next
);
3635 btrfs_set_lock_blocking(next
);
3637 ret
= btrfs_lookup_extent_ref(trans
, root
, bytenr
, blocksize
,
3641 parent
= path
->nodes
[*level
];
3642 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3643 blocksize
, parent
->start
,
3644 btrfs_header_owner(parent
),
3645 btrfs_header_generation(parent
),
3648 path
->slots
[*level
]++;
3649 btrfs_tree_unlock(next
);
3650 free_extent_buffer(next
);
3654 *level
= btrfs_header_level(next
);
3655 path
->nodes
[*level
] = next
;
3656 path
->slots
[*level
] = 0;
3657 path
->locks
[*level
] = 1;
3661 parent
= path
->nodes
[*level
+ 1];
3662 bytenr
= path
->nodes
[*level
]->start
;
3663 blocksize
= path
->nodes
[*level
]->len
;
3665 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3666 parent
->start
, btrfs_header_owner(parent
),
3667 btrfs_header_generation(parent
), *level
, 1);
3670 if (path
->locks
[*level
]) {
3671 btrfs_tree_unlock(path
->nodes
[*level
]);
3672 path
->locks
[*level
] = 0;
3674 free_extent_buffer(path
->nodes
[*level
]);
3675 path
->nodes
[*level
] = NULL
;
3682 * helper for dropping snapshots. This walks back up the tree in the path
3683 * to find the first node higher up where we haven't yet gone through
3686 static noinline
int walk_up_tree(struct btrfs_trans_handle
*trans
,
3687 struct btrfs_root
*root
,
3688 struct btrfs_path
*path
,
3689 int *level
, int max_level
)
3693 struct btrfs_root_item
*root_item
= &root
->root_item
;
3698 for (i
= *level
; i
< max_level
&& path
->nodes
[i
]; i
++) {
3699 slot
= path
->slots
[i
];
3700 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
3701 struct extent_buffer
*node
;
3702 struct btrfs_disk_key disk_key
;
3705 * there is more work to do in this level.
3706 * Update the drop_progress marker to reflect
3707 * the work we've done so far, and then bump
3710 node
= path
->nodes
[i
];
3713 WARN_ON(*level
== 0);
3714 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
3715 memcpy(&root_item
->drop_progress
,
3716 &disk_key
, sizeof(disk_key
));
3717 root_item
->drop_level
= i
;
3720 struct extent_buffer
*parent
;
3723 * this whole node is done, free our reference
3724 * on it and go up one level
3726 if (path
->nodes
[*level
] == root
->node
)
3727 parent
= path
->nodes
[*level
];
3729 parent
= path
->nodes
[*level
+ 1];
3731 root_owner
= btrfs_header_owner(parent
);
3732 root_gen
= btrfs_header_generation(parent
);
3734 clean_tree_block(trans
, root
, path
->nodes
[*level
]);
3735 ret
= btrfs_free_extent(trans
, root
,
3736 path
->nodes
[*level
]->start
,
3737 path
->nodes
[*level
]->len
,
3738 parent
->start
, root_owner
,
3739 root_gen
, *level
, 1);
3741 if (path
->locks
[*level
]) {
3742 btrfs_tree_unlock(path
->nodes
[*level
]);
3743 path
->locks
[*level
] = 0;
3745 free_extent_buffer(path
->nodes
[*level
]);
3746 path
->nodes
[*level
] = NULL
;
3754 * drop the reference count on the tree rooted at 'snap'. This traverses
3755 * the tree freeing any blocks that have a ref count of zero after being
3758 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
3764 struct btrfs_path
*path
;
3768 struct btrfs_root_item
*root_item
= &root
->root_item
;
3770 WARN_ON(!mutex_is_locked(&root
->fs_info
->drop_mutex
));
3771 path
= btrfs_alloc_path();
3774 level
= btrfs_header_level(root
->node
);
3776 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3777 path
->nodes
[level
] = root
->node
;
3778 extent_buffer_get(root
->node
);
3779 path
->slots
[level
] = 0;
3781 struct btrfs_key key
;
3782 struct btrfs_disk_key found_key
;
3783 struct extent_buffer
*node
;
3785 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3786 level
= root_item
->drop_level
;
3787 path
->lowest_level
= level
;
3788 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3793 node
= path
->nodes
[level
];
3794 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
3795 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3796 sizeof(found_key
)));
3798 * unlock our path, this is safe because only this
3799 * function is allowed to delete this snapshot
3801 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
3802 if (path
->nodes
[i
] && path
->locks
[i
]) {
3804 btrfs_tree_unlock(path
->nodes
[i
]);
3809 unsigned long update
;
3810 wret
= walk_down_tree(trans
, root
, path
, &level
);
3816 wret
= walk_up_tree(trans
, root
, path
, &level
,
3822 if (trans
->transaction
->in_commit
||
3823 trans
->transaction
->delayed_refs
.flushing
) {
3827 atomic_inc(&root
->fs_info
->throttle_gen
);
3828 wake_up(&root
->fs_info
->transaction_throttle
);
3829 for (update_count
= 0; update_count
< 16; update_count
++) {
3830 update
= trans
->delayed_ref_updates
;
3831 trans
->delayed_ref_updates
= 0;
3833 btrfs_run_delayed_refs(trans
, root
, update
);
3838 for (i
= 0; i
<= orig_level
; i
++) {
3839 if (path
->nodes
[i
]) {
3840 free_extent_buffer(path
->nodes
[i
]);
3841 path
->nodes
[i
] = NULL
;
3845 btrfs_free_path(path
);
3849 int btrfs_drop_subtree(struct btrfs_trans_handle
*trans
,
3850 struct btrfs_root
*root
,
3851 struct extent_buffer
*node
,
3852 struct extent_buffer
*parent
)
3854 struct btrfs_path
*path
;
3860 path
= btrfs_alloc_path();
3863 btrfs_assert_tree_locked(parent
);
3864 parent_level
= btrfs_header_level(parent
);
3865 extent_buffer_get(parent
);
3866 path
->nodes
[parent_level
] = parent
;
3867 path
->slots
[parent_level
] = btrfs_header_nritems(parent
);
3869 btrfs_assert_tree_locked(node
);
3870 level
= btrfs_header_level(node
);
3871 extent_buffer_get(node
);
3872 path
->nodes
[level
] = node
;
3873 path
->slots
[level
] = 0;
3876 wret
= walk_down_subtree(trans
, root
, path
, &level
);
3882 wret
= walk_up_tree(trans
, root
, path
, &level
, parent_level
);
3889 btrfs_free_path(path
);
3893 static unsigned long calc_ra(unsigned long start
, unsigned long last
,
3896 return min(last
, start
+ nr
- 1);
3899 static noinline
int relocate_inode_pages(struct inode
*inode
, u64 start
,
3904 unsigned long first_index
;
3905 unsigned long last_index
;
3908 struct extent_io_tree
*io_tree
= &BTRFS_I(inode
)->io_tree
;
3909 struct file_ra_state
*ra
;
3910 struct btrfs_ordered_extent
*ordered
;
3911 unsigned int total_read
= 0;
3912 unsigned int total_dirty
= 0;
3915 ra
= kzalloc(sizeof(*ra
), GFP_NOFS
);
3917 mutex_lock(&inode
->i_mutex
);
3918 first_index
= start
>> PAGE_CACHE_SHIFT
;
3919 last_index
= (start
+ len
- 1) >> PAGE_CACHE_SHIFT
;
3921 /* make sure the dirty trick played by the caller work */
3922 ret
= invalidate_inode_pages2_range(inode
->i_mapping
,
3923 first_index
, last_index
);
3927 file_ra_state_init(ra
, inode
->i_mapping
);
3929 for (i
= first_index
; i
<= last_index
; i
++) {
3930 if (total_read
% ra
->ra_pages
== 0) {
3931 btrfs_force_ra(inode
->i_mapping
, ra
, NULL
, i
,
3932 calc_ra(i
, last_index
, ra
->ra_pages
));
3936 if (((u64
)i
<< PAGE_CACHE_SHIFT
) > i_size_read(inode
))
3938 page
= grab_cache_page(inode
->i_mapping
, i
);
3943 if (!PageUptodate(page
)) {
3944 btrfs_readpage(NULL
, page
);
3946 if (!PageUptodate(page
)) {
3948 page_cache_release(page
);
3953 wait_on_page_writeback(page
);
3955 page_start
= (u64
)page
->index
<< PAGE_CACHE_SHIFT
;
3956 page_end
= page_start
+ PAGE_CACHE_SIZE
- 1;
3957 lock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3959 ordered
= btrfs_lookup_ordered_extent(inode
, page_start
);
3961 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3963 page_cache_release(page
);
3964 btrfs_start_ordered_extent(inode
, ordered
, 1);
3965 btrfs_put_ordered_extent(ordered
);
3968 set_page_extent_mapped(page
);
3970 if (i
== first_index
)
3971 set_extent_bits(io_tree
, page_start
, page_end
,
3972 EXTENT_BOUNDARY
, GFP_NOFS
);
3973 btrfs_set_extent_delalloc(inode
, page_start
, page_end
);
3975 set_page_dirty(page
);
3978 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3980 page_cache_release(page
);
3985 mutex_unlock(&inode
->i_mutex
);
3986 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
, total_dirty
);
3990 static noinline
int relocate_data_extent(struct inode
*reloc_inode
,
3991 struct btrfs_key
*extent_key
,
3994 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
3995 struct extent_map_tree
*em_tree
= &BTRFS_I(reloc_inode
)->extent_tree
;
3996 struct extent_map
*em
;
3997 u64 start
= extent_key
->objectid
- offset
;
3998 u64 end
= start
+ extent_key
->offset
- 1;
4000 em
= alloc_extent_map(GFP_NOFS
);
4001 BUG_ON(!em
|| IS_ERR(em
));
4004 em
->len
= extent_key
->offset
;
4005 em
->block_len
= extent_key
->offset
;
4006 em
->block_start
= extent_key
->objectid
;
4007 em
->bdev
= root
->fs_info
->fs_devices
->latest_bdev
;
4008 set_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
4010 /* setup extent map to cheat btrfs_readpage */
4011 lock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
4014 spin_lock(&em_tree
->lock
);
4015 ret
= add_extent_mapping(em_tree
, em
);
4016 spin_unlock(&em_tree
->lock
);
4017 if (ret
!= -EEXIST
) {
4018 free_extent_map(em
);
4021 btrfs_drop_extent_cache(reloc_inode
, start
, end
, 0);
4023 unlock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
4025 return relocate_inode_pages(reloc_inode
, start
, extent_key
->offset
);
4028 struct btrfs_ref_path
{
4030 u64 nodes
[BTRFS_MAX_LEVEL
];
4032 u64 root_generation
;
4039 struct btrfs_key node_keys
[BTRFS_MAX_LEVEL
];
4040 u64 new_nodes
[BTRFS_MAX_LEVEL
];
4043 struct disk_extent
{
4054 static int is_cowonly_root(u64 root_objectid
)
4056 if (root_objectid
== BTRFS_ROOT_TREE_OBJECTID
||
4057 root_objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
4058 root_objectid
== BTRFS_CHUNK_TREE_OBJECTID
||
4059 root_objectid
== BTRFS_DEV_TREE_OBJECTID
||
4060 root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
4061 root_objectid
== BTRFS_CSUM_TREE_OBJECTID
)
4066 static noinline
int __next_ref_path(struct btrfs_trans_handle
*trans
,
4067 struct btrfs_root
*extent_root
,
4068 struct btrfs_ref_path
*ref_path
,
4071 struct extent_buffer
*leaf
;
4072 struct btrfs_path
*path
;
4073 struct btrfs_extent_ref
*ref
;
4074 struct btrfs_key key
;
4075 struct btrfs_key found_key
;
4081 path
= btrfs_alloc_path();
4086 ref_path
->lowest_level
= -1;
4087 ref_path
->current_level
= -1;
4088 ref_path
->shared_level
= -1;
4092 level
= ref_path
->current_level
- 1;
4093 while (level
>= -1) {
4095 if (level
< ref_path
->lowest_level
)
4099 bytenr
= ref_path
->nodes
[level
];
4101 bytenr
= ref_path
->extent_start
;
4102 BUG_ON(bytenr
== 0);
4104 parent
= ref_path
->nodes
[level
+ 1];
4105 ref_path
->nodes
[level
+ 1] = 0;
4106 ref_path
->current_level
= level
;
4107 BUG_ON(parent
== 0);
4109 key
.objectid
= bytenr
;
4110 key
.offset
= parent
+ 1;
4111 key
.type
= BTRFS_EXTENT_REF_KEY
;
4113 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
4118 leaf
= path
->nodes
[0];
4119 nritems
= btrfs_header_nritems(leaf
);
4120 if (path
->slots
[0] >= nritems
) {
4121 ret
= btrfs_next_leaf(extent_root
, path
);
4126 leaf
= path
->nodes
[0];
4129 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4130 if (found_key
.objectid
== bytenr
&&
4131 found_key
.type
== BTRFS_EXTENT_REF_KEY
) {
4132 if (level
< ref_path
->shared_level
)
4133 ref_path
->shared_level
= level
;
4138 btrfs_release_path(extent_root
, path
);
4141 /* reached lowest level */
4145 level
= ref_path
->current_level
;
4146 while (level
< BTRFS_MAX_LEVEL
- 1) {
4150 bytenr
= ref_path
->nodes
[level
];
4152 bytenr
= ref_path
->extent_start
;
4154 BUG_ON(bytenr
== 0);
4156 key
.objectid
= bytenr
;
4158 key
.type
= BTRFS_EXTENT_REF_KEY
;
4160 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
4164 leaf
= path
->nodes
[0];
4165 nritems
= btrfs_header_nritems(leaf
);
4166 if (path
->slots
[0] >= nritems
) {
4167 ret
= btrfs_next_leaf(extent_root
, path
);
4171 /* the extent was freed by someone */
4172 if (ref_path
->lowest_level
== level
)
4174 btrfs_release_path(extent_root
, path
);
4177 leaf
= path
->nodes
[0];
4180 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4181 if (found_key
.objectid
!= bytenr
||
4182 found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
4183 /* the extent was freed by someone */
4184 if (ref_path
->lowest_level
== level
) {
4188 btrfs_release_path(extent_root
, path
);
4192 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
4193 struct btrfs_extent_ref
);
4194 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
4195 if (ref_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
4197 level
= (int)ref_objectid
;
4198 BUG_ON(level
>= BTRFS_MAX_LEVEL
);
4199 ref_path
->lowest_level
= level
;
4200 ref_path
->current_level
= level
;
4201 ref_path
->nodes
[level
] = bytenr
;
4203 WARN_ON(ref_objectid
!= level
);
4206 WARN_ON(level
!= -1);
4210 if (ref_path
->lowest_level
== level
) {
4211 ref_path
->owner_objectid
= ref_objectid
;
4212 ref_path
->num_refs
= btrfs_ref_num_refs(leaf
, ref
);
4216 * the block is tree root or the block isn't in reference
4219 if (found_key
.objectid
== found_key
.offset
||
4220 is_cowonly_root(btrfs_ref_root(leaf
, ref
))) {
4221 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
4222 ref_path
->root_generation
=
4223 btrfs_ref_generation(leaf
, ref
);
4225 /* special reference from the tree log */
4226 ref_path
->nodes
[0] = found_key
.offset
;
4227 ref_path
->current_level
= 0;
4234 BUG_ON(ref_path
->nodes
[level
] != 0);
4235 ref_path
->nodes
[level
] = found_key
.offset
;
4236 ref_path
->current_level
= level
;
4239 * the reference was created in the running transaction,
4240 * no need to continue walking up.
4242 if (btrfs_ref_generation(leaf
, ref
) == trans
->transid
) {
4243 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
4244 ref_path
->root_generation
=
4245 btrfs_ref_generation(leaf
, ref
);
4250 btrfs_release_path(extent_root
, path
);
4253 /* reached max tree level, but no tree root found. */
4256 btrfs_free_path(path
);
4260 static int btrfs_first_ref_path(struct btrfs_trans_handle
*trans
,
4261 struct btrfs_root
*extent_root
,
4262 struct btrfs_ref_path
*ref_path
,
4265 memset(ref_path
, 0, sizeof(*ref_path
));
4266 ref_path
->extent_start
= extent_start
;
4268 return __next_ref_path(trans
, extent_root
, ref_path
, 1);
4271 static int btrfs_next_ref_path(struct btrfs_trans_handle
*trans
,
4272 struct btrfs_root
*extent_root
,
4273 struct btrfs_ref_path
*ref_path
)
4275 return __next_ref_path(trans
, extent_root
, ref_path
, 0);
4278 static noinline
int get_new_locations(struct inode
*reloc_inode
,
4279 struct btrfs_key
*extent_key
,
4280 u64 offset
, int no_fragment
,
4281 struct disk_extent
**extents
,
4284 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
4285 struct btrfs_path
*path
;
4286 struct btrfs_file_extent_item
*fi
;
4287 struct extent_buffer
*leaf
;
4288 struct disk_extent
*exts
= *extents
;
4289 struct btrfs_key found_key
;
4294 int max
= *nr_extents
;
4297 WARN_ON(!no_fragment
&& *extents
);
4300 exts
= kmalloc(sizeof(*exts
) * max
, GFP_NOFS
);
4305 path
= btrfs_alloc_path();
4308 cur_pos
= extent_key
->objectid
- offset
;
4309 last_byte
= extent_key
->objectid
+ extent_key
->offset
;
4310 ret
= btrfs_lookup_file_extent(NULL
, root
, path
, reloc_inode
->i_ino
,
4320 leaf
= path
->nodes
[0];
4321 nritems
= btrfs_header_nritems(leaf
);
4322 if (path
->slots
[0] >= nritems
) {
4323 ret
= btrfs_next_leaf(root
, path
);
4328 leaf
= path
->nodes
[0];
4331 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4332 if (found_key
.offset
!= cur_pos
||
4333 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
||
4334 found_key
.objectid
!= reloc_inode
->i_ino
)
4337 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4338 struct btrfs_file_extent_item
);
4339 if (btrfs_file_extent_type(leaf
, fi
) !=
4340 BTRFS_FILE_EXTENT_REG
||
4341 btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4345 struct disk_extent
*old
= exts
;
4347 exts
= kzalloc(sizeof(*exts
) * max
, GFP_NOFS
);
4348 memcpy(exts
, old
, sizeof(*exts
) * nr
);
4349 if (old
!= *extents
)
4353 exts
[nr
].disk_bytenr
=
4354 btrfs_file_extent_disk_bytenr(leaf
, fi
);
4355 exts
[nr
].disk_num_bytes
=
4356 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4357 exts
[nr
].offset
= btrfs_file_extent_offset(leaf
, fi
);
4358 exts
[nr
].num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4359 exts
[nr
].ram_bytes
= btrfs_file_extent_ram_bytes(leaf
, fi
);
4360 exts
[nr
].compression
= btrfs_file_extent_compression(leaf
, fi
);
4361 exts
[nr
].encryption
= btrfs_file_extent_encryption(leaf
, fi
);
4362 exts
[nr
].other_encoding
= btrfs_file_extent_other_encoding(leaf
,
4364 BUG_ON(exts
[nr
].offset
> 0);
4365 BUG_ON(exts
[nr
].compression
|| exts
[nr
].encryption
);
4366 BUG_ON(exts
[nr
].num_bytes
!= exts
[nr
].disk_num_bytes
);
4368 cur_pos
+= exts
[nr
].num_bytes
;
4371 if (cur_pos
+ offset
>= last_byte
)
4381 BUG_ON(cur_pos
+ offset
> last_byte
);
4382 if (cur_pos
+ offset
< last_byte
) {
4388 btrfs_free_path(path
);
4390 if (exts
!= *extents
)
4399 static noinline
int replace_one_extent(struct btrfs_trans_handle
*trans
,
4400 struct btrfs_root
*root
,
4401 struct btrfs_path
*path
,
4402 struct btrfs_key
*extent_key
,
4403 struct btrfs_key
*leaf_key
,
4404 struct btrfs_ref_path
*ref_path
,
4405 struct disk_extent
*new_extents
,
4408 struct extent_buffer
*leaf
;
4409 struct btrfs_file_extent_item
*fi
;
4410 struct inode
*inode
= NULL
;
4411 struct btrfs_key key
;
4416 u64 search_end
= (u64
)-1;
4419 int extent_locked
= 0;
4423 memcpy(&key
, leaf_key
, sizeof(key
));
4424 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
4425 if (key
.objectid
< ref_path
->owner_objectid
||
4426 (key
.objectid
== ref_path
->owner_objectid
&&
4427 key
.type
< BTRFS_EXTENT_DATA_KEY
)) {
4428 key
.objectid
= ref_path
->owner_objectid
;
4429 key
.type
= BTRFS_EXTENT_DATA_KEY
;
4435 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
4439 leaf
= path
->nodes
[0];
4440 nritems
= btrfs_header_nritems(leaf
);
4442 if (extent_locked
&& ret
> 0) {
4444 * the file extent item was modified by someone
4445 * before the extent got locked.
4447 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4448 lock_end
, GFP_NOFS
);
4452 if (path
->slots
[0] >= nritems
) {
4453 if (++nr_scaned
> 2)
4456 BUG_ON(extent_locked
);
4457 ret
= btrfs_next_leaf(root
, path
);
4462 leaf
= path
->nodes
[0];
4463 nritems
= btrfs_header_nritems(leaf
);
4466 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
4468 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
4469 if ((key
.objectid
> ref_path
->owner_objectid
) ||
4470 (key
.objectid
== ref_path
->owner_objectid
&&
4471 key
.type
> BTRFS_EXTENT_DATA_KEY
) ||
4472 key
.offset
>= search_end
)
4476 if (inode
&& key
.objectid
!= inode
->i_ino
) {
4477 BUG_ON(extent_locked
);
4478 btrfs_release_path(root
, path
);
4479 mutex_unlock(&inode
->i_mutex
);
4485 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
4490 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4491 struct btrfs_file_extent_item
);
4492 extent_type
= btrfs_file_extent_type(leaf
, fi
);
4493 if ((extent_type
!= BTRFS_FILE_EXTENT_REG
&&
4494 extent_type
!= BTRFS_FILE_EXTENT_PREALLOC
) ||
4495 (btrfs_file_extent_disk_bytenr(leaf
, fi
) !=
4496 extent_key
->objectid
)) {
4502 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4503 ext_offset
= btrfs_file_extent_offset(leaf
, fi
);
4505 if (search_end
== (u64
)-1) {
4506 search_end
= key
.offset
- ext_offset
+
4507 btrfs_file_extent_ram_bytes(leaf
, fi
);
4510 if (!extent_locked
) {
4511 lock_start
= key
.offset
;
4512 lock_end
= lock_start
+ num_bytes
- 1;
4514 if (lock_start
> key
.offset
||
4515 lock_end
+ 1 < key
.offset
+ num_bytes
) {
4516 unlock_extent(&BTRFS_I(inode
)->io_tree
,
4517 lock_start
, lock_end
, GFP_NOFS
);
4523 btrfs_release_path(root
, path
);
4525 inode
= btrfs_iget_locked(root
->fs_info
->sb
,
4526 key
.objectid
, root
);
4527 if (inode
->i_state
& I_NEW
) {
4528 BTRFS_I(inode
)->root
= root
;
4529 BTRFS_I(inode
)->location
.objectid
=
4531 BTRFS_I(inode
)->location
.type
=
4532 BTRFS_INODE_ITEM_KEY
;
4533 BTRFS_I(inode
)->location
.offset
= 0;
4534 btrfs_read_locked_inode(inode
);
4535 unlock_new_inode(inode
);
4538 * some code call btrfs_commit_transaction while
4539 * holding the i_mutex, so we can't use mutex_lock
4542 if (is_bad_inode(inode
) ||
4543 !mutex_trylock(&inode
->i_mutex
)) {
4546 key
.offset
= (u64
)-1;
4551 if (!extent_locked
) {
4552 struct btrfs_ordered_extent
*ordered
;
4554 btrfs_release_path(root
, path
);
4556 lock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4557 lock_end
, GFP_NOFS
);
4558 ordered
= btrfs_lookup_first_ordered_extent(inode
,
4561 ordered
->file_offset
<= lock_end
&&
4562 ordered
->file_offset
+ ordered
->len
> lock_start
) {
4563 unlock_extent(&BTRFS_I(inode
)->io_tree
,
4564 lock_start
, lock_end
, GFP_NOFS
);
4565 btrfs_start_ordered_extent(inode
, ordered
, 1);
4566 btrfs_put_ordered_extent(ordered
);
4567 key
.offset
+= num_bytes
;
4571 btrfs_put_ordered_extent(ordered
);
4577 if (nr_extents
== 1) {
4578 /* update extent pointer in place */
4579 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4580 new_extents
[0].disk_bytenr
);
4581 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4582 new_extents
[0].disk_num_bytes
);
4583 btrfs_mark_buffer_dirty(leaf
);
4585 btrfs_drop_extent_cache(inode
, key
.offset
,
4586 key
.offset
+ num_bytes
- 1, 0);
4588 ret
= btrfs_inc_extent_ref(trans
, root
,
4589 new_extents
[0].disk_bytenr
,
4590 new_extents
[0].disk_num_bytes
,
4592 root
->root_key
.objectid
,
4597 ret
= btrfs_free_extent(trans
, root
,
4598 extent_key
->objectid
,
4601 btrfs_header_owner(leaf
),
4602 btrfs_header_generation(leaf
),
4606 btrfs_release_path(root
, path
);
4607 key
.offset
+= num_bytes
;
4615 * drop old extent pointer at first, then insert the
4616 * new pointers one bye one
4618 btrfs_release_path(root
, path
);
4619 ret
= btrfs_drop_extents(trans
, root
, inode
, key
.offset
,
4620 key
.offset
+ num_bytes
,
4621 key
.offset
, &alloc_hint
);
4624 for (i
= 0; i
< nr_extents
; i
++) {
4625 if (ext_offset
>= new_extents
[i
].num_bytes
) {
4626 ext_offset
-= new_extents
[i
].num_bytes
;
4629 extent_len
= min(new_extents
[i
].num_bytes
-
4630 ext_offset
, num_bytes
);
4632 ret
= btrfs_insert_empty_item(trans
, root
,
4637 leaf
= path
->nodes
[0];
4638 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4639 struct btrfs_file_extent_item
);
4640 btrfs_set_file_extent_generation(leaf
, fi
,
4642 btrfs_set_file_extent_type(leaf
, fi
,
4643 BTRFS_FILE_EXTENT_REG
);
4644 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4645 new_extents
[i
].disk_bytenr
);
4646 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4647 new_extents
[i
].disk_num_bytes
);
4648 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
4649 new_extents
[i
].ram_bytes
);
4651 btrfs_set_file_extent_compression(leaf
, fi
,
4652 new_extents
[i
].compression
);
4653 btrfs_set_file_extent_encryption(leaf
, fi
,
4654 new_extents
[i
].encryption
);
4655 btrfs_set_file_extent_other_encoding(leaf
, fi
,
4656 new_extents
[i
].other_encoding
);
4658 btrfs_set_file_extent_num_bytes(leaf
, fi
,
4660 ext_offset
+= new_extents
[i
].offset
;
4661 btrfs_set_file_extent_offset(leaf
, fi
,
4663 btrfs_mark_buffer_dirty(leaf
);
4665 btrfs_drop_extent_cache(inode
, key
.offset
,
4666 key
.offset
+ extent_len
- 1, 0);
4668 ret
= btrfs_inc_extent_ref(trans
, root
,
4669 new_extents
[i
].disk_bytenr
,
4670 new_extents
[i
].disk_num_bytes
,
4672 root
->root_key
.objectid
,
4673 trans
->transid
, key
.objectid
);
4675 btrfs_release_path(root
, path
);
4677 inode_add_bytes(inode
, extent_len
);
4680 num_bytes
-= extent_len
;
4681 key
.offset
+= extent_len
;
4686 BUG_ON(i
>= nr_extents
);
4690 if (extent_locked
) {
4691 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4692 lock_end
, GFP_NOFS
);
4696 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
&&
4697 key
.offset
>= search_end
)
4704 btrfs_release_path(root
, path
);
4706 mutex_unlock(&inode
->i_mutex
);
4707 if (extent_locked
) {
4708 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4709 lock_end
, GFP_NOFS
);
4716 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle
*trans
,
4717 struct btrfs_root
*root
,
4718 struct extent_buffer
*buf
, u64 orig_start
)
4723 BUG_ON(btrfs_header_generation(buf
) != trans
->transid
);
4724 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
4726 level
= btrfs_header_level(buf
);
4728 struct btrfs_leaf_ref
*ref
;
4729 struct btrfs_leaf_ref
*orig_ref
;
4731 orig_ref
= btrfs_lookup_leaf_ref(root
, orig_start
);
4735 ref
= btrfs_alloc_leaf_ref(root
, orig_ref
->nritems
);
4737 btrfs_free_leaf_ref(root
, orig_ref
);
4741 ref
->nritems
= orig_ref
->nritems
;
4742 memcpy(ref
->extents
, orig_ref
->extents
,
4743 sizeof(ref
->extents
[0]) * ref
->nritems
);
4745 btrfs_free_leaf_ref(root
, orig_ref
);
4747 ref
->root_gen
= trans
->transid
;
4748 ref
->bytenr
= buf
->start
;
4749 ref
->owner
= btrfs_header_owner(buf
);
4750 ref
->generation
= btrfs_header_generation(buf
);
4752 ret
= btrfs_add_leaf_ref(root
, ref
, 0);
4754 btrfs_free_leaf_ref(root
, ref
);
4759 static noinline
int invalidate_extent_cache(struct btrfs_root
*root
,
4760 struct extent_buffer
*leaf
,
4761 struct btrfs_block_group_cache
*group
,
4762 struct btrfs_root
*target_root
)
4764 struct btrfs_key key
;
4765 struct inode
*inode
= NULL
;
4766 struct btrfs_file_extent_item
*fi
;
4768 u64 skip_objectid
= 0;
4772 nritems
= btrfs_header_nritems(leaf
);
4773 for (i
= 0; i
< nritems
; i
++) {
4774 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4775 if (key
.objectid
== skip_objectid
||
4776 key
.type
!= BTRFS_EXTENT_DATA_KEY
)
4778 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4779 if (btrfs_file_extent_type(leaf
, fi
) ==
4780 BTRFS_FILE_EXTENT_INLINE
)
4782 if (btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4784 if (!inode
|| inode
->i_ino
!= key
.objectid
) {
4786 inode
= btrfs_ilookup(target_root
->fs_info
->sb
,
4787 key
.objectid
, target_root
, 1);
4790 skip_objectid
= key
.objectid
;
4793 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4795 lock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4796 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4797 btrfs_drop_extent_cache(inode
, key
.offset
,
4798 key
.offset
+ num_bytes
- 1, 1);
4799 unlock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4800 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4807 static noinline
int replace_extents_in_leaf(struct btrfs_trans_handle
*trans
,
4808 struct btrfs_root
*root
,
4809 struct extent_buffer
*leaf
,
4810 struct btrfs_block_group_cache
*group
,
4811 struct inode
*reloc_inode
)
4813 struct btrfs_key key
;
4814 struct btrfs_key extent_key
;
4815 struct btrfs_file_extent_item
*fi
;
4816 struct btrfs_leaf_ref
*ref
;
4817 struct disk_extent
*new_extent
;
4826 new_extent
= kmalloc(sizeof(*new_extent
), GFP_NOFS
);
4827 BUG_ON(!new_extent
);
4829 ref
= btrfs_lookup_leaf_ref(root
, leaf
->start
);
4833 nritems
= btrfs_header_nritems(leaf
);
4834 for (i
= 0; i
< nritems
; i
++) {
4835 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4836 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
4838 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4839 if (btrfs_file_extent_type(leaf
, fi
) ==
4840 BTRFS_FILE_EXTENT_INLINE
)
4842 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
4843 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4848 if (bytenr
>= group
->key
.objectid
+ group
->key
.offset
||
4849 bytenr
+ num_bytes
<= group
->key
.objectid
)
4852 extent_key
.objectid
= bytenr
;
4853 extent_key
.offset
= num_bytes
;
4854 extent_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
4856 ret
= get_new_locations(reloc_inode
, &extent_key
,
4857 group
->key
.objectid
, 1,
4858 &new_extent
, &nr_extent
);
4863 BUG_ON(ref
->extents
[ext_index
].bytenr
!= bytenr
);
4864 BUG_ON(ref
->extents
[ext_index
].num_bytes
!= num_bytes
);
4865 ref
->extents
[ext_index
].bytenr
= new_extent
->disk_bytenr
;
4866 ref
->extents
[ext_index
].num_bytes
= new_extent
->disk_num_bytes
;
4868 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4869 new_extent
->disk_bytenr
);
4870 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4871 new_extent
->disk_num_bytes
);
4872 btrfs_mark_buffer_dirty(leaf
);
4874 ret
= btrfs_inc_extent_ref(trans
, root
,
4875 new_extent
->disk_bytenr
,
4876 new_extent
->disk_num_bytes
,
4878 root
->root_key
.objectid
,
4879 trans
->transid
, key
.objectid
);
4882 ret
= btrfs_free_extent(trans
, root
,
4883 bytenr
, num_bytes
, leaf
->start
,
4884 btrfs_header_owner(leaf
),
4885 btrfs_header_generation(leaf
),
4891 BUG_ON(ext_index
+ 1 != ref
->nritems
);
4892 btrfs_free_leaf_ref(root
, ref
);
4896 int btrfs_free_reloc_root(struct btrfs_trans_handle
*trans
,
4897 struct btrfs_root
*root
)
4899 struct btrfs_root
*reloc_root
;
4902 if (root
->reloc_root
) {
4903 reloc_root
= root
->reloc_root
;
4904 root
->reloc_root
= NULL
;
4905 list_add(&reloc_root
->dead_list
,
4906 &root
->fs_info
->dead_reloc_roots
);
4908 btrfs_set_root_bytenr(&reloc_root
->root_item
,
4909 reloc_root
->node
->start
);
4910 btrfs_set_root_level(&root
->root_item
,
4911 btrfs_header_level(reloc_root
->node
));
4912 memset(&reloc_root
->root_item
.drop_progress
, 0,
4913 sizeof(struct btrfs_disk_key
));
4914 reloc_root
->root_item
.drop_level
= 0;
4916 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
4917 &reloc_root
->root_key
,
4918 &reloc_root
->root_item
);
4924 int btrfs_drop_dead_reloc_roots(struct btrfs_root
*root
)
4926 struct btrfs_trans_handle
*trans
;
4927 struct btrfs_root
*reloc_root
;
4928 struct btrfs_root
*prev_root
= NULL
;
4929 struct list_head dead_roots
;
4933 INIT_LIST_HEAD(&dead_roots
);
4934 list_splice_init(&root
->fs_info
->dead_reloc_roots
, &dead_roots
);
4936 while (!list_empty(&dead_roots
)) {
4937 reloc_root
= list_entry(dead_roots
.prev
,
4938 struct btrfs_root
, dead_list
);
4939 list_del_init(&reloc_root
->dead_list
);
4941 BUG_ON(reloc_root
->commit_root
!= NULL
);
4943 trans
= btrfs_join_transaction(root
, 1);
4946 mutex_lock(&root
->fs_info
->drop_mutex
);
4947 ret
= btrfs_drop_snapshot(trans
, reloc_root
);
4950 mutex_unlock(&root
->fs_info
->drop_mutex
);
4952 nr
= trans
->blocks_used
;
4953 ret
= btrfs_end_transaction(trans
, root
);
4955 btrfs_btree_balance_dirty(root
, nr
);
4958 free_extent_buffer(reloc_root
->node
);
4960 ret
= btrfs_del_root(trans
, root
->fs_info
->tree_root
,
4961 &reloc_root
->root_key
);
4963 mutex_unlock(&root
->fs_info
->drop_mutex
);
4965 nr
= trans
->blocks_used
;
4966 ret
= btrfs_end_transaction(trans
, root
);
4968 btrfs_btree_balance_dirty(root
, nr
);
4971 prev_root
= reloc_root
;
4974 btrfs_remove_leaf_refs(prev_root
, (u64
)-1, 0);
4980 int btrfs_add_dead_reloc_root(struct btrfs_root
*root
)
4982 list_add(&root
->dead_list
, &root
->fs_info
->dead_reloc_roots
);
4986 int btrfs_cleanup_reloc_trees(struct btrfs_root
*root
)
4988 struct btrfs_root
*reloc_root
;
4989 struct btrfs_trans_handle
*trans
;
4990 struct btrfs_key location
;
4994 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
4995 ret
= btrfs_find_dead_roots(root
, BTRFS_TREE_RELOC_OBJECTID
, NULL
);
4997 found
= !list_empty(&root
->fs_info
->dead_reloc_roots
);
4998 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
5001 trans
= btrfs_start_transaction(root
, 1);
5003 ret
= btrfs_commit_transaction(trans
, root
);
5007 location
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
5008 location
.offset
= (u64
)-1;
5009 location
.type
= BTRFS_ROOT_ITEM_KEY
;
5011 reloc_root
= btrfs_read_fs_root_no_name(root
->fs_info
, &location
);
5012 BUG_ON(!reloc_root
);
5013 btrfs_orphan_cleanup(reloc_root
);
5017 static noinline
int init_reloc_tree(struct btrfs_trans_handle
*trans
,
5018 struct btrfs_root
*root
)
5020 struct btrfs_root
*reloc_root
;
5021 struct extent_buffer
*eb
;
5022 struct btrfs_root_item
*root_item
;
5023 struct btrfs_key root_key
;
5026 BUG_ON(!root
->ref_cows
);
5027 if (root
->reloc_root
)
5030 root_item
= kmalloc(sizeof(*root_item
), GFP_NOFS
);
5033 ret
= btrfs_copy_root(trans
, root
, root
->commit_root
,
5034 &eb
, BTRFS_TREE_RELOC_OBJECTID
);
5037 root_key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
5038 root_key
.offset
= root
->root_key
.objectid
;
5039 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5041 memcpy(root_item
, &root
->root_item
, sizeof(root_item
));
5042 btrfs_set_root_refs(root_item
, 0);
5043 btrfs_set_root_bytenr(root_item
, eb
->start
);
5044 btrfs_set_root_level(root_item
, btrfs_header_level(eb
));
5045 btrfs_set_root_generation(root_item
, trans
->transid
);
5047 btrfs_tree_unlock(eb
);
5048 free_extent_buffer(eb
);
5050 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
5051 &root_key
, root_item
);
5055 reloc_root
= btrfs_read_fs_root_no_radix(root
->fs_info
->tree_root
,
5057 BUG_ON(!reloc_root
);
5058 reloc_root
->last_trans
= trans
->transid
;
5059 reloc_root
->commit_root
= NULL
;
5060 reloc_root
->ref_tree
= &root
->fs_info
->reloc_ref_tree
;
5062 root
->reloc_root
= reloc_root
;
5067 * Core function of space balance.
5069 * The idea is using reloc trees to relocate tree blocks in reference
5070 * counted roots. There is one reloc tree for each subvol, and all
5071 * reloc trees share same root key objectid. Reloc trees are snapshots
5072 * of the latest committed roots of subvols (root->commit_root).
5074 * To relocate a tree block referenced by a subvol, there are two steps.
5075 * COW the block through subvol's reloc tree, then update block pointer
5076 * in the subvol to point to the new block. Since all reloc trees share
5077 * same root key objectid, doing special handing for tree blocks owned
5078 * by them is easy. Once a tree block has been COWed in one reloc tree,
5079 * we can use the resulting new block directly when the same block is
5080 * required to COW again through other reloc trees. By this way, relocated
5081 * tree blocks are shared between reloc trees, so they are also shared
5084 static noinline
int relocate_one_path(struct btrfs_trans_handle
*trans
,
5085 struct btrfs_root
*root
,
5086 struct btrfs_path
*path
,
5087 struct btrfs_key
*first_key
,
5088 struct btrfs_ref_path
*ref_path
,
5089 struct btrfs_block_group_cache
*group
,
5090 struct inode
*reloc_inode
)
5092 struct btrfs_root
*reloc_root
;
5093 struct extent_buffer
*eb
= NULL
;
5094 struct btrfs_key
*keys
;
5098 int lowest_level
= 0;
5101 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
5102 lowest_level
= ref_path
->owner_objectid
;
5104 if (!root
->ref_cows
) {
5105 path
->lowest_level
= lowest_level
;
5106 ret
= btrfs_search_slot(trans
, root
, first_key
, path
, 0, 1);
5108 path
->lowest_level
= 0;
5109 btrfs_release_path(root
, path
);
5113 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
5114 ret
= init_reloc_tree(trans
, root
);
5116 reloc_root
= root
->reloc_root
;
5118 shared_level
= ref_path
->shared_level
;
5119 ref_path
->shared_level
= BTRFS_MAX_LEVEL
- 1;
5121 keys
= ref_path
->node_keys
;
5122 nodes
= ref_path
->new_nodes
;
5123 memset(&keys
[shared_level
+ 1], 0,
5124 sizeof(*keys
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
5125 memset(&nodes
[shared_level
+ 1], 0,
5126 sizeof(*nodes
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
5128 if (nodes
[lowest_level
] == 0) {
5129 path
->lowest_level
= lowest_level
;
5130 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
5133 for (level
= lowest_level
; level
< BTRFS_MAX_LEVEL
; level
++) {
5134 eb
= path
->nodes
[level
];
5135 if (!eb
|| eb
== reloc_root
->node
)
5137 nodes
[level
] = eb
->start
;
5139 btrfs_item_key_to_cpu(eb
, &keys
[level
], 0);
5141 btrfs_node_key_to_cpu(eb
, &keys
[level
], 0);
5144 ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5145 eb
= path
->nodes
[0];
5146 ret
= replace_extents_in_leaf(trans
, reloc_root
, eb
,
5147 group
, reloc_inode
);
5150 btrfs_release_path(reloc_root
, path
);
5152 ret
= btrfs_merge_path(trans
, reloc_root
, keys
, nodes
,
5158 * replace tree blocks in the fs tree with tree blocks in
5161 ret
= btrfs_merge_path(trans
, root
, keys
, nodes
, lowest_level
);
5164 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5165 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
5168 extent_buffer_get(path
->nodes
[0]);
5169 eb
= path
->nodes
[0];
5170 btrfs_release_path(reloc_root
, path
);
5171 ret
= invalidate_extent_cache(reloc_root
, eb
, group
, root
);
5173 free_extent_buffer(eb
);
5176 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
5177 path
->lowest_level
= 0;
5181 static noinline
int relocate_tree_block(struct btrfs_trans_handle
*trans
,
5182 struct btrfs_root
*root
,
5183 struct btrfs_path
*path
,
5184 struct btrfs_key
*first_key
,
5185 struct btrfs_ref_path
*ref_path
)
5189 ret
= relocate_one_path(trans
, root
, path
, first_key
,
5190 ref_path
, NULL
, NULL
);
5196 static noinline
int del_extent_zero(struct btrfs_trans_handle
*trans
,
5197 struct btrfs_root
*extent_root
,
5198 struct btrfs_path
*path
,
5199 struct btrfs_key
*extent_key
)
5203 ret
= btrfs_search_slot(trans
, extent_root
, extent_key
, path
, -1, 1);
5206 ret
= btrfs_del_item(trans
, extent_root
, path
);
5208 btrfs_release_path(extent_root
, path
);
5212 static noinline
struct btrfs_root
*read_ref_root(struct btrfs_fs_info
*fs_info
,
5213 struct btrfs_ref_path
*ref_path
)
5215 struct btrfs_key root_key
;
5217 root_key
.objectid
= ref_path
->root_objectid
;
5218 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5219 if (is_cowonly_root(ref_path
->root_objectid
))
5220 root_key
.offset
= 0;
5222 root_key
.offset
= (u64
)-1;
5224 return btrfs_read_fs_root_no_name(fs_info
, &root_key
);
5227 static noinline
int relocate_one_extent(struct btrfs_root
*extent_root
,
5228 struct btrfs_path
*path
,
5229 struct btrfs_key
*extent_key
,
5230 struct btrfs_block_group_cache
*group
,
5231 struct inode
*reloc_inode
, int pass
)
5233 struct btrfs_trans_handle
*trans
;
5234 struct btrfs_root
*found_root
;
5235 struct btrfs_ref_path
*ref_path
= NULL
;
5236 struct disk_extent
*new_extents
= NULL
;
5241 struct btrfs_key first_key
;
5245 trans
= btrfs_start_transaction(extent_root
, 1);
5248 if (extent_key
->objectid
== 0) {
5249 ret
= del_extent_zero(trans
, extent_root
, path
, extent_key
);
5253 ref_path
= kmalloc(sizeof(*ref_path
), GFP_NOFS
);
5259 for (loops
= 0; ; loops
++) {
5261 ret
= btrfs_first_ref_path(trans
, extent_root
, ref_path
,
5262 extent_key
->objectid
);
5264 ret
= btrfs_next_ref_path(trans
, extent_root
, ref_path
);
5271 if (ref_path
->root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
5272 ref_path
->root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
5275 found_root
= read_ref_root(extent_root
->fs_info
, ref_path
);
5276 BUG_ON(!found_root
);
5278 * for reference counted tree, only process reference paths
5279 * rooted at the latest committed root.
5281 if (found_root
->ref_cows
&&
5282 ref_path
->root_generation
!= found_root
->root_key
.offset
)
5285 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5288 * copy data extents to new locations
5290 u64 group_start
= group
->key
.objectid
;
5291 ret
= relocate_data_extent(reloc_inode
,
5300 level
= ref_path
->owner_objectid
;
5303 if (prev_block
!= ref_path
->nodes
[level
]) {
5304 struct extent_buffer
*eb
;
5305 u64 block_start
= ref_path
->nodes
[level
];
5306 u64 block_size
= btrfs_level_size(found_root
, level
);
5308 eb
= read_tree_block(found_root
, block_start
,
5310 btrfs_tree_lock(eb
);
5311 BUG_ON(level
!= btrfs_header_level(eb
));
5314 btrfs_item_key_to_cpu(eb
, &first_key
, 0);
5316 btrfs_node_key_to_cpu(eb
, &first_key
, 0);
5318 btrfs_tree_unlock(eb
);
5319 free_extent_buffer(eb
);
5320 prev_block
= block_start
;
5323 mutex_lock(&extent_root
->fs_info
->trans_mutex
);
5324 btrfs_record_root_in_trans(found_root
);
5325 mutex_unlock(&extent_root
->fs_info
->trans_mutex
);
5326 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5328 * try to update data extent references while
5329 * keeping metadata shared between snapshots.
5332 ret
= relocate_one_path(trans
, found_root
,
5333 path
, &first_key
, ref_path
,
5334 group
, reloc_inode
);
5340 * use fallback method to process the remaining
5344 u64 group_start
= group
->key
.objectid
;
5345 new_extents
= kmalloc(sizeof(*new_extents
),
5348 ret
= get_new_locations(reloc_inode
,
5356 ret
= replace_one_extent(trans
, found_root
,
5358 &first_key
, ref_path
,
5359 new_extents
, nr_extents
);
5361 ret
= relocate_tree_block(trans
, found_root
, path
,
5362 &first_key
, ref_path
);
5369 btrfs_end_transaction(trans
, extent_root
);
5375 static u64
update_block_group_flags(struct btrfs_root
*root
, u64 flags
)
5378 u64 stripped
= BTRFS_BLOCK_GROUP_RAID0
|
5379 BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID10
;
5381 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
5382 if (num_devices
== 1) {
5383 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
5384 stripped
= flags
& ~stripped
;
5386 /* turn raid0 into single device chunks */
5387 if (flags
& BTRFS_BLOCK_GROUP_RAID0
)
5390 /* turn mirroring into duplication */
5391 if (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
5392 BTRFS_BLOCK_GROUP_RAID10
))
5393 return stripped
| BTRFS_BLOCK_GROUP_DUP
;
5396 /* they already had raid on here, just return */
5397 if (flags
& stripped
)
5400 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
5401 stripped
= flags
& ~stripped
;
5403 /* switch duplicated blocks with raid1 */
5404 if (flags
& BTRFS_BLOCK_GROUP_DUP
)
5405 return stripped
| BTRFS_BLOCK_GROUP_RAID1
;
5407 /* turn single device chunks into raid0 */
5408 return stripped
| BTRFS_BLOCK_GROUP_RAID0
;
5413 static int __alloc_chunk_for_shrink(struct btrfs_root
*root
,
5414 struct btrfs_block_group_cache
*shrink_block_group
,
5417 struct btrfs_trans_handle
*trans
;
5418 u64 new_alloc_flags
;
5421 spin_lock(&shrink_block_group
->lock
);
5422 if (btrfs_block_group_used(&shrink_block_group
->item
) > 0) {
5423 spin_unlock(&shrink_block_group
->lock
);
5425 trans
= btrfs_start_transaction(root
, 1);
5426 spin_lock(&shrink_block_group
->lock
);
5428 new_alloc_flags
= update_block_group_flags(root
,
5429 shrink_block_group
->flags
);
5430 if (new_alloc_flags
!= shrink_block_group
->flags
) {
5432 btrfs_block_group_used(&shrink_block_group
->item
);
5434 calc
= shrink_block_group
->key
.offset
;
5436 spin_unlock(&shrink_block_group
->lock
);
5438 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
5439 calc
+ 2 * 1024 * 1024, new_alloc_flags
, force
);
5441 btrfs_end_transaction(trans
, root
);
5443 spin_unlock(&shrink_block_group
->lock
);
5447 static int __insert_orphan_inode(struct btrfs_trans_handle
*trans
,
5448 struct btrfs_root
*root
,
5449 u64 objectid
, u64 size
)
5451 struct btrfs_path
*path
;
5452 struct btrfs_inode_item
*item
;
5453 struct extent_buffer
*leaf
;
5456 path
= btrfs_alloc_path();
5460 path
->leave_spinning
= 1;
5461 ret
= btrfs_insert_empty_inode(trans
, root
, path
, objectid
);
5465 leaf
= path
->nodes
[0];
5466 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_inode_item
);
5467 memset_extent_buffer(leaf
, 0, (unsigned long)item
, sizeof(*item
));
5468 btrfs_set_inode_generation(leaf
, item
, 1);
5469 btrfs_set_inode_size(leaf
, item
, size
);
5470 btrfs_set_inode_mode(leaf
, item
, S_IFREG
| 0600);
5471 btrfs_set_inode_flags(leaf
, item
, BTRFS_INODE_NOCOMPRESS
);
5472 btrfs_mark_buffer_dirty(leaf
);
5473 btrfs_release_path(root
, path
);
5475 btrfs_free_path(path
);
5479 static noinline
struct inode
*create_reloc_inode(struct btrfs_fs_info
*fs_info
,
5480 struct btrfs_block_group_cache
*group
)
5482 struct inode
*inode
= NULL
;
5483 struct btrfs_trans_handle
*trans
;
5484 struct btrfs_root
*root
;
5485 struct btrfs_key root_key
;
5486 u64 objectid
= BTRFS_FIRST_FREE_OBJECTID
;
5489 root_key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
5490 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5491 root_key
.offset
= (u64
)-1;
5492 root
= btrfs_read_fs_root_no_name(fs_info
, &root_key
);
5494 return ERR_CAST(root
);
5496 trans
= btrfs_start_transaction(root
, 1);
5499 err
= btrfs_find_free_objectid(trans
, root
, objectid
, &objectid
);
5503 err
= __insert_orphan_inode(trans
, root
, objectid
, group
->key
.offset
);
5506 err
= btrfs_insert_file_extent(trans
, root
, objectid
, 0, 0, 0,
5507 group
->key
.offset
, 0, group
->key
.offset
,
5511 inode
= btrfs_iget_locked(root
->fs_info
->sb
, objectid
, root
);
5512 if (inode
->i_state
& I_NEW
) {
5513 BTRFS_I(inode
)->root
= root
;
5514 BTRFS_I(inode
)->location
.objectid
= objectid
;
5515 BTRFS_I(inode
)->location
.type
= BTRFS_INODE_ITEM_KEY
;
5516 BTRFS_I(inode
)->location
.offset
= 0;
5517 btrfs_read_locked_inode(inode
);
5518 unlock_new_inode(inode
);
5519 BUG_ON(is_bad_inode(inode
));
5523 BTRFS_I(inode
)->index_cnt
= group
->key
.objectid
;
5525 err
= btrfs_orphan_add(trans
, inode
);
5527 btrfs_end_transaction(trans
, root
);
5531 inode
= ERR_PTR(err
);
5536 int btrfs_reloc_clone_csums(struct inode
*inode
, u64 file_pos
, u64 len
)
5539 struct btrfs_ordered_sum
*sums
;
5540 struct btrfs_sector_sum
*sector_sum
;
5541 struct btrfs_ordered_extent
*ordered
;
5542 struct btrfs_root
*root
= BTRFS_I(inode
)->root
;
5543 struct list_head list
;
5548 INIT_LIST_HEAD(&list
);
5550 ordered
= btrfs_lookup_ordered_extent(inode
, file_pos
);
5551 BUG_ON(ordered
->file_offset
!= file_pos
|| ordered
->len
!= len
);
5553 disk_bytenr
= file_pos
+ BTRFS_I(inode
)->index_cnt
;
5554 ret
= btrfs_lookup_csums_range(root
->fs_info
->csum_root
, disk_bytenr
,
5555 disk_bytenr
+ len
- 1, &list
);
5557 while (!list_empty(&list
)) {
5558 sums
= list_entry(list
.next
, struct btrfs_ordered_sum
, list
);
5559 list_del_init(&sums
->list
);
5561 sector_sum
= sums
->sums
;
5562 sums
->bytenr
= ordered
->start
;
5565 while (offset
< sums
->len
) {
5566 sector_sum
->bytenr
+= ordered
->start
- disk_bytenr
;
5568 offset
+= root
->sectorsize
;
5571 btrfs_add_ordered_sum(inode
, ordered
, sums
);
5573 btrfs_put_ordered_extent(ordered
);
5577 int btrfs_relocate_block_group(struct btrfs_root
*root
, u64 group_start
)
5579 struct btrfs_trans_handle
*trans
;
5580 struct btrfs_path
*path
;
5581 struct btrfs_fs_info
*info
= root
->fs_info
;
5582 struct extent_buffer
*leaf
;
5583 struct inode
*reloc_inode
;
5584 struct btrfs_block_group_cache
*block_group
;
5585 struct btrfs_key key
;
5594 root
= root
->fs_info
->extent_root
;
5596 block_group
= btrfs_lookup_block_group(info
, group_start
);
5597 BUG_ON(!block_group
);
5599 printk(KERN_INFO
"btrfs relocating block group %llu flags %llu\n",
5600 (unsigned long long)block_group
->key
.objectid
,
5601 (unsigned long long)block_group
->flags
);
5603 path
= btrfs_alloc_path();
5606 reloc_inode
= create_reloc_inode(info
, block_group
);
5607 BUG_ON(IS_ERR(reloc_inode
));
5609 __alloc_chunk_for_shrink(root
, block_group
, 1);
5610 set_block_group_readonly(block_group
);
5612 btrfs_start_delalloc_inodes(info
->tree_root
);
5613 btrfs_wait_ordered_extents(info
->tree_root
, 0);
5618 key
.objectid
= block_group
->key
.objectid
;
5621 cur_byte
= key
.objectid
;
5623 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5624 btrfs_commit_transaction(trans
, info
->tree_root
);
5626 mutex_lock(&root
->fs_info
->cleaner_mutex
);
5627 btrfs_clean_old_snapshots(info
->tree_root
);
5628 btrfs_remove_leaf_refs(info
->tree_root
, (u64
)-1, 1);
5629 mutex_unlock(&root
->fs_info
->cleaner_mutex
);
5631 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5632 btrfs_commit_transaction(trans
, info
->tree_root
);
5635 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5639 leaf
= path
->nodes
[0];
5640 nritems
= btrfs_header_nritems(leaf
);
5641 if (path
->slots
[0] >= nritems
) {
5642 ret
= btrfs_next_leaf(root
, path
);
5649 leaf
= path
->nodes
[0];
5650 nritems
= btrfs_header_nritems(leaf
);
5653 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5655 if (key
.objectid
>= block_group
->key
.objectid
+
5656 block_group
->key
.offset
)
5659 if (progress
&& need_resched()) {
5660 btrfs_release_path(root
, path
);
5667 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
||
5668 key
.objectid
+ key
.offset
<= cur_byte
) {
5674 cur_byte
= key
.objectid
+ key
.offset
;
5675 btrfs_release_path(root
, path
);
5677 __alloc_chunk_for_shrink(root
, block_group
, 0);
5678 ret
= relocate_one_extent(root
, path
, &key
, block_group
,
5684 key
.objectid
= cur_byte
;
5689 btrfs_release_path(root
, path
);
5692 btrfs_wait_ordered_range(reloc_inode
, 0, (u64
)-1);
5693 invalidate_mapping_pages(reloc_inode
->i_mapping
, 0, -1);
5696 if (total_found
> 0) {
5697 printk(KERN_INFO
"btrfs found %llu extents in pass %d\n",
5698 (unsigned long long)total_found
, pass
);
5700 if (total_found
== skipped
&& pass
> 2) {
5702 reloc_inode
= create_reloc_inode(info
, block_group
);
5708 /* delete reloc_inode */
5711 /* unpin extents in this range */
5712 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5713 btrfs_commit_transaction(trans
, info
->tree_root
);
5715 spin_lock(&block_group
->lock
);
5716 WARN_ON(block_group
->pinned
> 0);
5717 WARN_ON(block_group
->reserved
> 0);
5718 WARN_ON(btrfs_block_group_used(&block_group
->item
) > 0);
5719 spin_unlock(&block_group
->lock
);
5720 btrfs_put_block_group(block_group
);
5723 btrfs_free_path(path
);
5727 static int find_first_block_group(struct btrfs_root
*root
,
5728 struct btrfs_path
*path
, struct btrfs_key
*key
)
5731 struct btrfs_key found_key
;
5732 struct extent_buffer
*leaf
;
5735 ret
= btrfs_search_slot(NULL
, root
, key
, path
, 0, 0);
5740 slot
= path
->slots
[0];
5741 leaf
= path
->nodes
[0];
5742 if (slot
>= btrfs_header_nritems(leaf
)) {
5743 ret
= btrfs_next_leaf(root
, path
);
5750 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
5752 if (found_key
.objectid
>= key
->objectid
&&
5753 found_key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
5764 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
5766 struct btrfs_block_group_cache
*block_group
;
5767 struct btrfs_space_info
*space_info
;
5770 spin_lock(&info
->block_group_cache_lock
);
5771 while ((n
= rb_last(&info
->block_group_cache_tree
)) != NULL
) {
5772 block_group
= rb_entry(n
, struct btrfs_block_group_cache
,
5774 rb_erase(&block_group
->cache_node
,
5775 &info
->block_group_cache_tree
);
5776 spin_unlock(&info
->block_group_cache_lock
);
5778 btrfs_remove_free_space_cache(block_group
);
5779 down_write(&block_group
->space_info
->groups_sem
);
5780 list_del(&block_group
->list
);
5781 up_write(&block_group
->space_info
->groups_sem
);
5783 WARN_ON(atomic_read(&block_group
->count
) != 1);
5786 spin_lock(&info
->block_group_cache_lock
);
5788 spin_unlock(&info
->block_group_cache_lock
);
5790 /* now that all the block groups are freed, go through and
5791 * free all the space_info structs. This is only called during
5792 * the final stages of unmount, and so we know nobody is
5793 * using them. We call synchronize_rcu() once before we start,
5794 * just to be on the safe side.
5798 while(!list_empty(&info
->space_info
)) {
5799 space_info
= list_entry(info
->space_info
.next
,
5800 struct btrfs_space_info
,
5803 list_del(&space_info
->list
);
5809 int btrfs_read_block_groups(struct btrfs_root
*root
)
5811 struct btrfs_path
*path
;
5813 struct btrfs_block_group_cache
*cache
;
5814 struct btrfs_fs_info
*info
= root
->fs_info
;
5815 struct btrfs_space_info
*space_info
;
5816 struct btrfs_key key
;
5817 struct btrfs_key found_key
;
5818 struct extent_buffer
*leaf
;
5820 root
= info
->extent_root
;
5823 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5824 path
= btrfs_alloc_path();
5829 ret
= find_first_block_group(root
, path
, &key
);
5837 leaf
= path
->nodes
[0];
5838 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5839 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5845 atomic_set(&cache
->count
, 1);
5846 spin_lock_init(&cache
->lock
);
5847 spin_lock_init(&cache
->tree_lock
);
5848 mutex_init(&cache
->cache_mutex
);
5849 INIT_LIST_HEAD(&cache
->list
);
5850 INIT_LIST_HEAD(&cache
->cluster_list
);
5851 read_extent_buffer(leaf
, &cache
->item
,
5852 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
5853 sizeof(cache
->item
));
5854 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
5856 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
5857 btrfs_release_path(root
, path
);
5858 cache
->flags
= btrfs_block_group_flags(&cache
->item
);
5860 ret
= update_space_info(info
, cache
->flags
, found_key
.offset
,
5861 btrfs_block_group_used(&cache
->item
),
5864 cache
->space_info
= space_info
;
5865 down_write(&space_info
->groups_sem
);
5866 list_add_tail(&cache
->list
, &space_info
->block_groups
);
5867 up_write(&space_info
->groups_sem
);
5869 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5872 set_avail_alloc_bits(root
->fs_info
, cache
->flags
);
5873 if (btrfs_chunk_readonly(root
, cache
->key
.objectid
))
5874 set_block_group_readonly(cache
);
5878 btrfs_free_path(path
);
5882 int btrfs_make_block_group(struct btrfs_trans_handle
*trans
,
5883 struct btrfs_root
*root
, u64 bytes_used
,
5884 u64 type
, u64 chunk_objectid
, u64 chunk_offset
,
5888 struct btrfs_root
*extent_root
;
5889 struct btrfs_block_group_cache
*cache
;
5891 extent_root
= root
->fs_info
->extent_root
;
5893 root
->fs_info
->last_trans_log_full_commit
= trans
->transid
;
5895 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5899 cache
->key
.objectid
= chunk_offset
;
5900 cache
->key
.offset
= size
;
5901 cache
->key
.type
= BTRFS_BLOCK_GROUP_ITEM_KEY
;
5902 atomic_set(&cache
->count
, 1);
5903 spin_lock_init(&cache
->lock
);
5904 spin_lock_init(&cache
->tree_lock
);
5905 mutex_init(&cache
->cache_mutex
);
5906 INIT_LIST_HEAD(&cache
->list
);
5907 INIT_LIST_HEAD(&cache
->cluster_list
);
5909 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
5910 btrfs_set_block_group_chunk_objectid(&cache
->item
, chunk_objectid
);
5911 cache
->flags
= type
;
5912 btrfs_set_block_group_flags(&cache
->item
, type
);
5914 ret
= update_space_info(root
->fs_info
, cache
->flags
, size
, bytes_used
,
5915 &cache
->space_info
);
5917 down_write(&cache
->space_info
->groups_sem
);
5918 list_add_tail(&cache
->list
, &cache
->space_info
->block_groups
);
5919 up_write(&cache
->space_info
->groups_sem
);
5921 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5924 ret
= btrfs_insert_item(trans
, extent_root
, &cache
->key
, &cache
->item
,
5925 sizeof(cache
->item
));
5928 set_avail_alloc_bits(extent_root
->fs_info
, type
);
5933 int btrfs_remove_block_group(struct btrfs_trans_handle
*trans
,
5934 struct btrfs_root
*root
, u64 group_start
)
5936 struct btrfs_path
*path
;
5937 struct btrfs_block_group_cache
*block_group
;
5938 struct btrfs_key key
;
5941 root
= root
->fs_info
->extent_root
;
5943 block_group
= btrfs_lookup_block_group(root
->fs_info
, group_start
);
5944 BUG_ON(!block_group
);
5945 BUG_ON(!block_group
->ro
);
5947 memcpy(&key
, &block_group
->key
, sizeof(key
));
5949 path
= btrfs_alloc_path();
5952 spin_lock(&root
->fs_info
->block_group_cache_lock
);
5953 rb_erase(&block_group
->cache_node
,
5954 &root
->fs_info
->block_group_cache_tree
);
5955 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
5956 btrfs_remove_free_space_cache(block_group
);
5957 down_write(&block_group
->space_info
->groups_sem
);
5958 list_del(&block_group
->list
);
5959 up_write(&block_group
->space_info
->groups_sem
);
5961 spin_lock(&block_group
->space_info
->lock
);
5962 block_group
->space_info
->total_bytes
-= block_group
->key
.offset
;
5963 block_group
->space_info
->bytes_readonly
-= block_group
->key
.offset
;
5964 spin_unlock(&block_group
->space_info
->lock
);
5965 block_group
->space_info
->full
= 0;
5967 btrfs_put_block_group(block_group
);
5968 btrfs_put_block_group(block_group
);
5970 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
5976 ret
= btrfs_del_item(trans
, root
, path
);
5978 btrfs_free_path(path
);