X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=fs%2Fbtrfs%2Fextent-tree.c;h=41a63462d3eb71671d81ffce3707747510f5e512;hb=df5b5520b21d8a2554ede65c09b288833c77144d;hp=c226656f29b746dd0a585709299edbd19b5bd895;hpb=593060d756e0c2382d59cf00d5f3b9e3a336c408;p=deliverable%2Flinux.git diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c226656f29b7..41a63462d3eb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,11 +15,11 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ - #include -#include #include +#include #include "hash.h" +#include "crc32c.h" #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -36,10 +36,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, - u64 size); static int cache_block_group(struct btrfs_root *root, @@ -140,6 +136,35 @@ err: return 0; } +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; + int ret; + + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + bytenr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) { + return NULL; + } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; + return block_group; +} + struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) @@ -151,6 +176,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct u64 end; int ret; + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, @@ -177,7 +204,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) static int noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, int num, int data) + u64 *start_ret, u64 num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -190,20 +217,21 @@ static int noinline find_search_start(struct btrfs_root *root, u64 search_start = *start_ret; int wrapped = 0; - if (!cache) - goto out; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; + if (!cache) + goto out; + again: ret = cache_block_group(root, cache); - if (ret) + if (ret) { goto out; + } last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data)) { + if (!block_group_bits(cache, data) || cache->ro) goto new_group; - } spin_lock_irq(&free_space_cache->lock); state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); @@ -218,21 +246,24 @@ again: start = max(last, state->start); last = state->end + 1; if (last - start < num) { - if (last == cache->key.objectid + cache->key.offset) - cache_miss = start; do { state = extent_state_next(state); } while(state && !(state->state & EXTENT_DIRTY)); continue; } spin_unlock_irq(&free_space_cache->lock); - if (start + num > cache->key.objectid + cache->key.offset) + if (cache->ro) { goto new_group; - if (start + num > total_fs_bytes) + } + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; + if (!block_group_bits(cache, data)) { + printk("block group bits don't match %Lu %d\n", cache->flags, data); + } *start_ret = start; return 0; - } out: + } +out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %Lu\n", search_start); @@ -243,7 +274,7 @@ again: new_group: last = cache->key.objectid + cache->key.offset; wrapped: - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache || cache->key.objectid >= total_fs_bytes) { no_cache: if (!wrapped) { @@ -256,13 +287,13 @@ no_cache: if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); } + cache_miss = 0; cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; - cache_miss = 0; goto again; } @@ -298,29 +329,27 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_fs_info *info = root->fs_info; u64 used; u64 last = 0; - u64 hint_last; u64 start; u64 end; u64 free_check; u64 ptr; - u64 total_fs_bytes; int bit; int ret; int full_search = 0; - int factor = 8; + int factor = 10; + int wrapped = 0; block_group_cache = &info->block_group_cache; - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - if (!owner) - factor = 8; + if (data & BTRFS_BLOCK_GROUP_METADATA) + factor = 9; bit = block_group_state_bits(data); - if (search_start && search_start < total_fs_bytes) { + if (search_start) { struct btrfs_block_group_cache *shint; - shint = btrfs_lookup_block_group(info, search_start); - if (shint && block_group_bits(shint, data)) { + shint = btrfs_lookup_first_block_group(info, search_start); + if (shint && block_group_bits(shint, data) && !shint->ro) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -328,24 +357,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && block_group_bits(hint, data) && - hint->key.objectid < total_fs_bytes) { + if (hint && !hint->ro && block_group_bits(hint, data)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.objectid + hint->key.offset; - hint_last = last; } else { if (hint) - hint_last = max(hint->key.objectid, search_start); + last = max(hint->key.objectid, search_start); else - hint_last = search_start; - - if (hint_last >= total_fs_bytes) - hint_last = search_start; - last = hint_last; + last = search_start; } again: while(1) { @@ -355,30 +378,33 @@ again: break; ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - break; + if (ret) { + last = end + 1; + continue; + } cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (cache->key.objectid > total_fs_bytes) - break; - - if (full_search) - free_check = cache->key.offset; - else + if (!cache->ro && block_group_bits(cache, data)) { free_check = div_factor(cache->key.offset, factor); - - if (used + cache->pinned < free_check) { - found_group = cache; - goto found; + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; + } } cond_resched(); } - if (!full_search) { + if (!wrapped) { + last = search_start; + wrapped = 1; + goto again; + } + if (!full_search && factor < 10) { last = search_start; full_search = 1; + factor = 10; goto again; } found: @@ -391,16 +417,15 @@ static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u32 high_crc = ~(u32)0; u32 low_crc = ~(u32)0; __le64 lenum; - lenum = cpu_to_le64(root_objectid); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(ref_generation); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); if (owner >= BTRFS_FIRST_FREE_OBJECTID) { lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(owner_offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); } return ((u64)high_crc << 32) | (u64)low_crc; } @@ -639,7 +664,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = 0; + path->reada = 1; key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -659,7 +684,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); - path->reada = 0; + path->reada = 1; ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, root_objectid, ref_generation, owner, owner_offset); @@ -691,7 +716,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - path->reada = 0; + path->reada = 1; key.objectid = bytenr; key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -714,14 +739,17 @@ out: u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, + u64 expected_owner, u64 first_extent) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; u64 bytenr; u64 found_objectid; + u64 found_owner; u64 root_objectid = root->root_key.objectid; u32 total_count = 0; + u32 extent_refs; u32 cur_count; u32 nritems; int ret; @@ -758,6 +786,7 @@ again: } item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + extent_refs = btrfs_extent_refs(l, item); while (1) { l = path->nodes[0]; nritems = btrfs_header_nritems(l); @@ -785,9 +814,34 @@ again: total_count = 2; goto out; } + if (level == -1) { + found_owner = btrfs_ref_objectid(l, ref_item); + if (found_owner != expected_owner) { + total_count = 2; + goto out; + } + /* + * nasty. we don't count a reference held by + * the running transaction. This allows nodatacow + * to avoid cow most of the time + */ + if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && + btrfs_ref_generation(l, ref_item) == + root->fs_info->generation) { + extent_refs--; + } + } total_count = 1; path->slots[0]++; } + /* + * if there is more than one reference against a data extent, + * we have to assume the other ref is another snapshot + */ + if (level == -1 && extent_refs > 1) { + total_count = 2; + goto out; + } if (cur_count == 0) { total_count = 0; goto out; @@ -975,7 +1029,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, ret = get_state_private(block_group_cache, start, &ptr); if (ret) break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; err = write_one_cache_group(trans, root, path, cache); @@ -1020,6 +1073,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (found) { found->total_bytes += total_bytes; found->bytes_used += bytes_used; + found->full = 0; WARN_ON(found->total_bytes < found->bytes_used); *space_info = found; return 0; @@ -1034,14 +1088,58 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_used = bytes_used; found->bytes_pinned = 0; found->full = 0; + found->force_alloc = 0; *space_info = found; return 0; } +static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) +{ + u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP); + if (extra_flags) { + if (flags & BTRFS_BLOCK_GROUP_DATA) + fs_info->avail_data_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + fs_info->avail_metadata_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + fs_info->avail_system_alloc_bits |= extra_flags; + } +} + +static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) +{ + u64 num_devices = root->fs_info->fs_devices->num_devices; + + if (num_devices == 1) + flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); + if (num_devices < 4) + flags &= ~BTRFS_BLOCK_GROUP_RAID10; + + if ((flags & BTRFS_BLOCK_GROUP_DUP) && + (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10))) { + flags &= ~BTRFS_BLOCK_GROUP_DUP; + } + + if ((flags & BTRFS_BLOCK_GROUP_RAID1) && + (flags & BTRFS_BLOCK_GROUP_RAID10)) { + flags &= ~BTRFS_BLOCK_GROUP_RAID1; + } + + if ((flags & BTRFS_BLOCK_GROUP_RAID0) && + ((flags & BTRFS_BLOCK_GROUP_RAID1) | + (flags & BTRFS_BLOCK_GROUP_RAID10) | + (flags & BTRFS_BLOCK_GROUP_DUP))) + flags &= ~BTRFS_BLOCK_GROUP_RAID0; + return flags; +} static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, - u64 flags) + u64 flags, int force) { struct btrfs_space_info *space_info; u64 thresh; @@ -1049,6 +1147,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; + flags = reduce_alloc_profile(extent_root, flags); + space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, @@ -1057,11 +1157,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); + if (space_info->force_alloc) { + force = 1; + space_info->force_alloc = 0; + } if (space_info->full) return 0; - thresh = div_factor(space_info->total_bytes, 7); - if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + thresh = div_factor(space_info->total_bytes, 6); + if (!force && + (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) return 0; @@ -1071,24 +1176,12 @@ printk("space info full %Lu\n", flags); space_info->full = 1; return 0; } - BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, - extent_root->fs_info->chunk_root->root_key.objectid, - start, num_bytes); + BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); - if (flags & BTRFS_BLOCK_GROUP_RAID0) { - if (flags & BTRFS_BLOCK_GROUP_DATA) { - extent_root->fs_info->extra_data_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - if (flags & BTRFS_BLOCK_GROUP_METADATA) { - extent_root->fs_info->extra_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - } return 0; } @@ -1138,6 +1231,21 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) +{ + u64 start; + u64 end; + int ret; + ret = find_first_extent_bit(&root->fs_info->block_group_cache, + search_start, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) + return 0; + return start; +} + + static int update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1154,16 +1262,25 @@ static int update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); - WARN_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); + if (!cache) { + u64 first = first_logical_byte(root, bytenr); + WARN_ON(first < bytenr); + len = min(first - bytenr, num); + } else { + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + } if (pin) { - cache->pinned += len; - cache->space_info->bytes_pinned += len; + if (cache) { + cache->pinned += len; + cache->space_info->bytes_pinned += len; + } fs_info->total_pinned += len; } else { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; + if (cache) { + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + } fs_info->total_pinned -= len; } bytenr += len; @@ -1244,7 +1361,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = read_tree_block(extent_root, ins.objectid, ins.offset); + eb = read_tree_block(extent_root, ins.objectid, ins.offset, + trans->transid); level = btrfs_header_level(eb); if (level == 0) { btrfs_item_key(eb, &first, 0); @@ -1271,12 +1389,14 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (!pending) { buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_buffer_uptodate(buf)) { + if (btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = btrfs_header_generation(buf); - if (header_transid == transid) { + if (header_transid == transid && + !btrfs_header_flag(buf, + BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); free_extent_buffer(buf); return 1; @@ -1322,7 +1442,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - path->reada = 0; + path->reada = 1; ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, @@ -1514,7 +1634,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int data) { int ret; - u64 orig_search_start = search_start; + u64 orig_search_start; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; @@ -1522,13 +1642,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; + int allowed_chunk_alloc = 0; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + if (orig_root->ref_cows || empty_size) + allowed_chunk_alloc = 1; + if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; + empty_cluster = 256 * 1024; } if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { @@ -1543,11 +1669,14 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } } + search_start = max(search_start, first_logical_byte(root, 0)); + orig_search_start = search_start; + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { - block_group = btrfs_lookup_block_group(info, hint_byte); + block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, @@ -1565,17 +1694,28 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, check_failed: if (!block_group) { - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, + search_start); if (!block_group) - block_group = btrfs_lookup_block_group(info, + block_group = btrfs_lookup_first_block_group(info, orig_search_start); } + if (full_scan && !chunk_alloc_done) { + if (allowed_chunk_alloc) { + do_chunk_alloc(trans, root, + num_bytes + 2 * 1024 * 1024, data, 1); + allowed_chunk_alloc = 0; + } else if (block_group && block_group_bits(block_group, data)) { + block_group->space_info->force_alloc = 1; + } + chunk_alloc_done = 1; + } ret = find_search_start(root, &block_group, &search_start, total_needed, data); if (ret == -ENOSPC && last_ptr && *last_ptr) { *last_ptr = 0; - block_group = btrfs_lookup_block_group(info, - orig_search_start); + block_group = btrfs_lookup_first_block_group(info, + orig_search_start); search_start = orig_search_start; ret = find_search_start(root, &block_group, &search_start, total_needed, data); @@ -1591,7 +1731,7 @@ check_failed: empty_size += empty_cluster; total_needed += empty_size; } - block_group = btrfs_lookup_block_group(info, + block_group = btrfs_lookup_first_block_group(info, orig_search_start); search_start = orig_search_start; ret = find_search_start(root, &block_group, @@ -1664,7 +1804,7 @@ enospc: } else wrapped = 1; } - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); @@ -1673,6 +1813,7 @@ enospc: error: return ret; } + /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -1682,17 +1823,18 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, int data) + u64 search_end, struct btrfs_key *ins, u64 data) { int ret; int pending_ret; u64 super_used; u64 root_used; u64 search_start = 0; - u64 new_hint; + u64 alloc_profile; u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1700,43 +1842,56 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_path *path; struct btrfs_key keys[2]; - int extra_chunk_alloc_bits = 0; if (data) { - data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; } else if (root == root->fs_info->chunk_root) { - data = BTRFS_BLOCK_GROUP_SYSTEM; + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { - data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } - if (btrfs_super_num_devices(&info->super_copy) > 1 && - !(data & BTRFS_BLOCK_GROUP_SYSTEM)) - extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; - - if (root->ref_cows) { +again: + data = reduce_alloc_profile(root, data); + /* + * the only place that sets empty_size is btrfs_realloc_node, which + * is not called recursively on allocations + */ + if (empty_size || root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, - 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA | - info->extra_alloc_bits | - extra_chunk_alloc_bits); + 2 * 1024 * 1024, + BTRFS_BLOCK_GROUP_METADATA | + (info->metadata_alloc_profile & + info->avail_metadata_alloc_bits), 0); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data | - extra_chunk_alloc_bits); + num_bytes + 2 * 1024 * 1024, data, 0); BUG_ON(ret); } - new_hint = max(hint_byte, root->fs_info->alloc_start); - if (new_hint < btrfs_super_total_bytes(&info->super_copy)) - hint_byte = new_hint; - WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); + + if (ret == -ENOSPC && num_bytes > min_alloc_size) { + num_bytes = num_bytes >> 1; + num_bytes = max(num_bytes, min_alloc_size); + do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data, 1); + goto again; + } + if (ret) { + printk("allocation failed flags %Lu\n", data); + } BUG_ON(ret); if (ret) return ret; @@ -1855,7 +2010,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, blocksize, + ret = btrfs_alloc_extent(trans, root, blocksize, blocksize, root_objectid, ref_generation, level, first_objectid, empty_size, hint, (u64)-1, &ins, 0); @@ -1872,7 +2027,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } btrfs_set_header_generation(buf, trans->transid); clean_tree_block(trans, root, buf); - wait_on_tree_block_writeback(root, buf); btrfs_set_buffer_uptodate(buf); if (PageDirty(buf->first_page)) { @@ -1882,10 +2036,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; if (!btrfs_test_opt(root, SSD)) btrfs_set_buffer_defrag(buf); trans->blocks_used++; @@ -1972,7 +2122,8 @@ static void noinline reada_walk_down(struct btrfs_root *root, } } mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, bytenr, blocksize); + ret = readahead_tree_block(root, bytenr, blocksize, + btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); mutex_lock(&root->fs_info->fs_mutex); @@ -1992,6 +2143,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, u64 root_owner; u64 root_gen; u64 bytenr; + u64 ptr_gen; struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; @@ -2028,6 +2180,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, break; } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); @@ -2043,14 +2196,16 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, continue; } next = btrfs_find_tree_block(root, bytenr, blocksize); - if (!next || !btrfs_buffer_uptodate(next)) { + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); + mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, bytenr, blocksize); + next = read_tree_block(root, bytenr, blocksize, + ptr_gen); mutex_lock(&root->fs_info->fs_mutex); - /* we dropped the lock, check one more time */ + /* we've dropped the lock, double check */ ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); @@ -2250,18 +2405,25 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static unsigned long calc_ra(unsigned long start, unsigned long last, + unsigned long nr) +{ + return min(last, start + nr - 1); +} + static int noinline relocate_inode_pages(struct inode *inode, u64 start, u64 len) { u64 page_start; u64 page_end; - u64 delalloc_start; - u64 existing_delalloc; unsigned long last_index; unsigned long i; struct page *page; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; + unsigned long total_read = 0; + unsigned long ra_pages; + struct btrfs_trans_handle *trans; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2269,14 +2431,23 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, i = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; + file_ra_state_init(ra, inode->i_mapping); - btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); - kfree(ra); for (; i <= last_index; i++) { + if (total_read % ra_pages == 0) { + btrfs_force_ra(inode->i_mapping, ra, NULL, i, + calc_ra(i, last_index, ra_pages)); + } + total_read++; + if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) + goto truncate_racing; + page = grab_cache_page(inode->i_mapping, i); - if (!page) + if (!page) { goto out_unlock; + } if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -2286,28 +2457,124 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, goto out_unlock; } } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(page); +#else + cancel_dirty_page(page, PAGE_CACHE_SIZE); +#endif + wait_on_page_writeback(page); + set_page_extent_mapped(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = count_range_bits(io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); unlock_page(page); page_cache_release(page); } + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); out_unlock: + kfree(ra); + trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); + if (trans) { + btrfs_add_ordered_inode(inode); + btrfs_end_transaction(trans, BTRFS_I(inode)->root); + mark_inode_dirty(inode); + } mutex_unlock(&inode->i_mutex); return 0; + +truncate_racing: + vmtruncate(inode, inode->i_size); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); + goto out_unlock; +} + +/* + * The back references tell us which tree holds a ref on a block, + * but it is possible for the tree root field in the reference to + * reflect the original root before a snapshot was made. In this + * case we should search through all the children of a given root + * to find potential holders of references on a block. + * + * Instead, we do something a little less fancy and just search + * all the roots for a given key/block combination. + */ +static int find_root_for_ref(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key0, + int level, + int file_key, + struct btrfs_root **found_root, + u64 bytenr) +{ + struct btrfs_key root_location; + struct btrfs_root *cur_root = *found_root; + struct btrfs_file_extent_item *file_extent; + u64 root_search_start = BTRFS_FS_TREE_OBJECTID; + u64 found_bytenr; + int ret; + int i; + + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + path->lowest_level = level; + path->reada = 0; + while(1) { + ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); + found_bytenr = 0; + if (ret == 0 && file_key) { + struct extent_buffer *leaf = path->nodes[0]; + file_extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, file_extent) == + BTRFS_FILE_EXTENT_REG) { + found_bytenr = + btrfs_file_extent_disk_bytenr(leaf, + file_extent); + } + } else if (!file_key) { + if (path->nodes[level]) + found_bytenr = path->nodes[level]->start; + } + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(cur_root, path); + + if (found_bytenr == bytenr) { + *found_root = cur_root; + ret = 0; + goto out; + } + ret = btrfs_search_root(root->fs_info->tree_root, + root_search_start, &root_search_start); + if (ret) + break; + + root_location.objectid = root_search_start; + cur_root = btrfs_read_fs_root_no_name(root->fs_info, + &root_location); + if (!cur_root) { + ret = 1; + break; + } + } +out: + path->lowest_level = 0; + return ret; } /* @@ -2315,17 +2582,23 @@ out_unlock: */ static int noinline relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, - struct btrfs_key *extent_key) + struct btrfs_key *extent_key, + u64 *last_file_objectid, + u64 *last_file_offset, + u64 *last_file_root, + u64 last_extent) { struct inode *inode; struct btrfs_root *found_root; - struct btrfs_key *root_location; + struct btrfs_key root_location; + struct btrfs_key found_key; struct btrfs_extent_ref *ref; u64 ref_root; u64 ref_gen; u64 ref_objectid; u64 ref_offset; int ret; + int level; ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_ref); @@ -2335,20 +2608,42 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, ref_offset = btrfs_ref_offset(path->nodes[0], ref); btrfs_release_path(extent_root, path); - root_location = kmalloc(sizeof(*root_location), GFP_NOFS); - root_location->objectid = ref_root; + root_location.objectid = ref_root; if (ref_gen == 0) - root_location->offset = 0; + root_location.offset = 0; else - root_location->offset = (u64)-1; - root_location->type = BTRFS_ROOT_ITEM_KEY; + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - root_location); + &root_location); BUG_ON(!found_root); - kfree(root_location); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + found_key.objectid = ref_objectid; + found_key.type = BTRFS_EXTENT_DATA_KEY; + found_key.offset = ref_offset; + level = 0; + + if (last_extent == extent_key->objectid && + *last_file_objectid == ref_objectid && + *last_file_offset == ref_offset && + *last_file_root == ref_root) + goto out; + + ret = find_root_for_ref(extent_root, path, &found_key, + level, 1, &found_root, + extent_key->objectid); + + if (ret) + goto out; + + if (last_extent == extent_key->objectid && + *last_file_objectid == ref_objectid && + *last_file_offset == ref_offset && + *last_file_root == ref_root) + goto out; + mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); @@ -2369,21 +2664,20 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, mutex_lock(&extent_root->fs_info->fs_mutex); goto out; } + *last_file_objectid = inode->i_ino; + *last_file_root = found_root->root_key.objectid; + *last_file_offset = ref_offset; + relocate_inode_pages(inode, ref_offset, extent_key->offset); - /* FIXME, data=ordered will help get rid of this */ - filemap_fdatawrite(inode->i_mapping); iput(inode); mutex_lock(&extent_root->fs_info->fs_mutex); } else { struct btrfs_trans_handle *trans; - struct btrfs_key found_key; struct extent_buffer *eb; - int level; int i; - trans = btrfs_start_transaction(found_root, 1); eb = read_tree_block(found_root, extent_key->objectid, - extent_key->offset); + extent_key->offset, 0); level = btrfs_header_level(eb); if (level == 0) @@ -2393,6 +2687,15 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, free_extent_buffer(eb); + ret = find_root_for_ref(extent_root, path, &found_key, + level, 0, &found_root, + extent_key->objectid); + + if (ret) + goto out; + + trans = btrfs_start_transaction(found_root, 1); + path->lowest_level = level; path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, @@ -2405,6 +2708,8 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, path->nodes[i] = NULL; } btrfs_release_path(found_root, path); + if (found_root == found_root->fs_info->extent_root) + btrfs_extent_post_op(trans, found_root); btrfs_end_transaction(trans, found_root); } @@ -2412,6 +2717,27 @@ out: return 0; } +static int noinline del_extent_zero(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + int ret; + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(extent_root, 1); + ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); + if (ret > 0) { + ret = -EIO; + goto out; + } + if (ret < 0) + goto out; + ret = btrfs_del_item(trans, extent_root, path); +out: + btrfs_end_transaction(trans, extent_root); + return ret; +} + static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) @@ -2419,10 +2745,18 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; + u64 last_file_objectid = 0; + u64 last_file_root = 0; + u64 last_file_offset = (u64)-1; + u64 last_extent = 0; u32 nritems; u32 item_size; int ret = 0; + if (extent_key->objectid == 0) { + ret = del_extent_zero(extent_root, path, extent_key); + goto out; + } key.objectid = extent_key->objectid; key.type = BTRFS_EXTENT_REF_KEY; key.offset = 0; @@ -2436,22 +2770,36 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, ret = 0; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - if (path->slots[0] == nritems) - goto out; + if (path->slots[0] == nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + leaf = path->nodes[0]; + } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != extent_key->objectid) + if (found_key.objectid != extent_key->objectid) { break; + } - if (found_key.type != BTRFS_EXTENT_REF_KEY) + if (found_key.type != BTRFS_EXTENT_REF_KEY) { break; + } key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ret = relocate_one_reference(extent_root, path, extent_key); + ret = relocate_one_reference(extent_root, path, extent_key, + &last_file_objectid, + &last_file_offset, + &last_file_root, last_extent); if (ret) goto out; + last_extent = extent_key->objectid; } ret = 0; out: @@ -2459,33 +2807,113 @@ out: return ret; } -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) +{ + u64 num_devices; + u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; + + num_devices = root->fs_info->fs_devices->num_devices; + if (num_devices == 1) { + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* turn raid0 into single device chunks */ + if (flags & BTRFS_BLOCK_GROUP_RAID0) + return stripped; + + /* turn mirroring into duplication */ + if (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + return stripped | BTRFS_BLOCK_GROUP_DUP; + return flags; + } else { + /* they already had raid on here, just return */ + if (flags & stripped) + return flags; + + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* switch duplicated blocks with raid1 */ + if (flags & BTRFS_BLOCK_GROUP_DUP) + return stripped | BTRFS_BLOCK_GROUP_RAID1; + + /* turn single device chunks into raid0 */ + return stripped | BTRFS_BLOCK_GROUP_RAID0; + } + return flags; +} + +int __alloc_chunk_for_shrink(struct btrfs_root *root, + struct btrfs_block_group_cache *shrink_block_group, + int force) +{ + struct btrfs_trans_handle *trans; + u64 new_alloc_flags; + u64 calc; + + if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + + trans = btrfs_start_transaction(root, 1); + new_alloc_flags = update_block_group_flags(root, + shrink_block_group->flags); + if (new_alloc_flags != shrink_block_group->flags) { + calc = + btrfs_block_group_used(&shrink_block_group->item); + } else { + calc = shrink_block_group->key.offset; + } + do_chunk_alloc(trans, root->fs_info->extent_root, + calc + 2 * 1024 * 1024, new_alloc_flags, force); + btrfs_end_transaction(trans, root); + } + return 0; +} + +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_path *path; u64 cur_byte; u64 total_found; + u64 shrink_last_byte; + struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; - struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; u32 nritems; int ret; - int progress = 0; + int progress; - btrfs_set_super_total_bytes(&info->super_copy, new_size); - clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, - GFP_NOFS); - block_group_cache = &info->block_group_cache; + shrink_block_group = btrfs_lookup_block_group(root->fs_info, + shrink_start); + BUG_ON(!shrink_block_group); + + shrink_last_byte = shrink_block_group->key.objectid + + shrink_block_group->key.offset; + + shrink_block_group->space_info->total_bytes -= + shrink_block_group->key.offset; path = btrfs_alloc_path(); root = root->fs_info->extent_root; path->reada = 2; + printk("btrfs relocating block group %llu flags %llu\n", + (unsigned long long)shrink_start, + (unsigned long long)shrink_block_group->flags); + + __alloc_chunk_for_shrink(root, shrink_block_group, 1); + again: + + shrink_block_group->ro = 1; + total_found = 0; - key.objectid = new_size; + progress = 0; + key.objectid = shrink_start; key.offset = 0; key.type = 0; cur_byte = key.objectid; @@ -2497,10 +2925,12 @@ again: ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) goto out; + if (ret == 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid + found_key.offset > new_size) { + if (found_key.objectid + found_key.offset > shrink_start && + found_key.objectid < shrink_last_byte) { cur_byte = found_key.objectid; key.objectid = cur_byte; } @@ -2529,6 +2959,9 @@ next: btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid >= shrink_last_byte) + break; + if (progress && need_resched()) { memcpy(&key, &found_key, sizeof(key)); mutex_unlock(&root->fs_info->fs_mutex); @@ -2543,6 +2976,8 @@ next: if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || found_key.objectid + found_key.offset <= cur_byte) { + memcpy(&key, &found_key, sizeof(key)); + key.offset++; path->slots[0]++; goto next; } @@ -2552,11 +2987,15 @@ next: key.objectid = cur_byte; btrfs_release_path(root, path); ret = relocate_one_extent(root, path, &found_key); + __alloc_chunk_for_shrink(root, shrink_block_group, 0); } btrfs_release_path(root, path); if (total_found > 0) { + printk("btrfs relocate found %llu last extent was %llu\n", + (unsigned long long)total_found, + (unsigned long long)found_key.objectid); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); @@ -2569,82 +3008,45 @@ next: goto again; } + /* + * we've freed all the extents, now remove the block + * group item from the tree + */ trans = btrfs_start_transaction(root, 1); - key.objectid = new_size; - key.offset = 0; - key.type = 0; - while(1) { - u64 ptr; + memcpy(&key, &shrink_block_group->key, sizeof(key)); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -EIO; + if (ret < 0) + goto out; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); -bg_next: - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - break; - if (ret == 1) { - ret = 0; - break; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + clear_extent_bits(&info->block_group_cache, key.objectid, + key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); - /* - * btrfs_next_leaf doesn't cow buffers, we have to - * do the search again - */ - memcpy(&key, &found_key, sizeof(key)); - btrfs_release_path(root, path); - goto resched_check; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { - printk("shrinker found key %Lu %u %Lu\n", - found_key.objectid, found_key.type, - found_key.offset); - path->slots[0]++; - goto bg_next; - } - ret = get_state_private(&info->block_group_cache, - found_key.objectid, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); + clear_extent_bits(&info->free_space_cache, + key.objectid, key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); - clear_extent_bits(&info->block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - (unsigned int)-1, GFP_NOFS); + memset(shrink_block_group, 0, sizeof(*shrink_block_group)); + kfree(shrink_block_group); - key.objectid = found_key.objectid + 1; - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); -resched_check: - if (need_resched()) { - mutex_unlock(&root->fs_info->fs_mutex); - cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); - } - } - clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, - GFP_NOFS); + btrfs_del_item(trans, root, path); btrfs_commit_transaction(trans, root); + + /* the code to unpin extents might set a few bits in the free + * space cache for this range again + */ + clear_extent_bits(&info->free_space_cache, + key.objectid, key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); out: btrfs_free_path(path); return ret; } -int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 new_size) -{ - btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size); - return 0; -} - int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) { @@ -2712,7 +3114,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - cache = kmalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) { ret = -ENOMEM; break; @@ -2722,8 +3124,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->cached = 0; - cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); @@ -2736,16 +3136,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; } - if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - info->extra_data_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - info->extra_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - } + set_avail_alloc_bits(info, cache->flags); ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), @@ -2772,7 +3163,7 @@ error: int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size) { int ret; @@ -2784,16 +3175,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; - cache = kmalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); - cache->key.objectid = chunk_objectid; + cache->key.objectid = chunk_offset; cache->key.offset = size; - cache->cached = 0; - cache->pinned = 0; btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); - memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, bytes_used); - btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); @@ -2802,19 +3190,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, &cache->space_info); BUG_ON(ret); - if (type & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { - bit = BLOCK_GROUP_SYSTEM; - } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - bit = BLOCK_GROUP_METADATA; - } - set_extent_bits(block_group_cache, chunk_objectid, - chunk_objectid + size - 1, + bit = block_group_state_bits(type); + set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, chunk_objectid, - (unsigned long)cache); + set_state_private(block_group_cache, chunk_offset, + (unsigned long)cache); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); @@ -2822,5 +3204,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); ret = del_pending_extents(trans, extent_root); BUG_ON(ret); + set_avail_alloc_bits(extent_root->fs_info, type); return 0; }