X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=fs%2Fbtrfs%2Fvolumes.c;h=538c5cfa005f161b45eaebf1e32c16788f5781de;hb=86db25785a6e7cacc1ee868fe437e8a2957eae94;hp=77620f2d8af9a9a23978db12e1121924efffccd4;hpb=1f0905ec156eec8f12cd593bc564551770319720;p=deliverable%2Flinux.git diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 77620f2d8af9..538c5cfa005f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -650,6 +650,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->writeable = 0; new_device->in_fs_metadata = 0; new_device->can_discard = 0; + spin_lock_init(&new_device->io_lock); list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -795,26 +796,76 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, return ret; } +/* + * Look for a btrfs signature on a device. This may be called out of the mount path + * and we are not allowed to call set_blocksize during the scan. The superblock + * is read via pagecache + */ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct btrfs_fs_devices **fs_devices_ret) { struct btrfs_super_block *disk_super; struct block_device *bdev; - struct buffer_head *bh; - int ret; + struct page *page; + void *p; + int ret = -EINVAL; u64 devid; u64 transid; u64 total_devices; + u64 bytenr; + pgoff_t index; + /* + * we would like to check all the supers, but that would make + * a btrfs mount succeed after a mkfs from a different FS. + * So, we need to add a special mount option to scan for + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ + bytenr = btrfs_sb_offset(0); flags |= FMODE_EXCL; mutex_lock(&uuid_mutex); - ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh); - if (ret) + + bdev = blkdev_get_by_path(path, flags, holder); + + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + printk(KERN_INFO "btrfs: open %s failed\n", path); goto error; - disk_super = (struct btrfs_super_block *)bh->b_data; + } + + /* make sure our super fits in the device */ + if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode)) + goto error_bdev_put; + + /* make sure our super fits in the page */ + if (sizeof(*disk_super) > PAGE_CACHE_SIZE) + goto error_bdev_put; + + /* make sure our super doesn't straddle pages on disk */ + index = bytenr >> PAGE_CACHE_SHIFT; + if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index) + goto error_bdev_put; + + /* pull in the page with our super */ + page = read_cache_page_gfp(bdev->bd_inode->i_mapping, + index, GFP_NOFS); + + if (IS_ERR_OR_NULL(page)) + goto error_bdev_put; + + p = kmap(page); + + /* align our pointer to the offset of the super block */ + disk_super = p + (bytenr & ~PAGE_CACHE_MASK); + + if (btrfs_super_bytenr(disk_super) != bytenr || + disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) + goto error_unmap; + devid = btrfs_stack_device_id(&disk_super->dev_item); transid = btrfs_super_generation(disk_super); total_devices = btrfs_super_num_devices(disk_super); + if (disk_super->label[0]) { if (disk_super->label[BTRFS_LABEL_SIZE - 1]) disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; @@ -822,12 +873,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, } else { printk(KERN_INFO "device fsid %pU ", disk_super->fsid); } + printk(KERN_CONT "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); + ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (!ret && fs_devices_ret) (*fs_devices_ret)->total_devices = total_devices; - brelse(bh); + +error_unmap: + kunmap(page); + page_cache_release(page); + +error_bdev_put: blkdev_put(bdev, flags); error: mutex_unlock(&uuid_mutex); @@ -1375,14 +1433,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; u64 num_devices; u8 *dev_uuid; + unsigned seq; int ret = 0; bool clear_super = false; mutex_lock(&uuid_mutex); - all_avail = root->fs_info->avail_data_alloc_bits | - root->fs_info->avail_system_alloc_bits | - root->fs_info->avail_metadata_alloc_bits; + do { + seq = read_seqbegin(&root->fs_info->profiles_lock); + + all_avail = root->fs_info->avail_data_alloc_bits | + root->fs_info->avail_system_alloc_bits | + root->fs_info->avail_metadata_alloc_bits; + } while (read_seqretry(&root->fs_info->profiles_lock, seq)); num_devices = root->fs_info->fs_devices->num_devices; btrfs_dev_replace_lock(&root->fs_info->dev_replace); @@ -1449,7 +1512,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } } else { ret = btrfs_get_bdev_and_sb(device_path, - FMODE_READ | FMODE_EXCL, + FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder, 0, &bdev, &bh); if (ret) @@ -2633,7 +2696,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, cache = btrfs_lookup_block_group(fs_info, chunk_offset); chunk_used = btrfs_block_group_used(&cache->item); - user_thresh = div_factor_fine(cache->key.offset, bargs->usage); + if (bargs->usage == 0) + user_thresh = 1; + else if (bargs->usage > 100) + user_thresh = cache->key.offset; + else + user_thresh = div_factor_fine(cache->key.offset, + bargs->usage); + if (chunk_used < user_thresh) ret = 0; @@ -2982,6 +3052,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) unset_balance_control(fs_info); ret = del_balance_item(fs_info->tree_root); BUG_ON(ret); + + atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, @@ -2998,7 +3070,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, int mixed = 0; int ret; u64 num_devices; - int cancel = 0; + unsigned seq; if (btrfs_fs_closing(fs_info) || atomic_read(&fs_info->balance_pause_req) || @@ -3086,23 +3158,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl, BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6; - - if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (fs_info->avail_system_alloc_bits & allowed) && - !(bctl->sys.target & allowed)) || - ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (fs_info->avail_metadata_alloc_bits & allowed) && - !(bctl->meta.target & allowed))) { - if (bctl->flags & BTRFS_BALANCE_FORCE) { - printk(KERN_INFO "btrfs: force reducing metadata " - "integrity\n"); - } else { - printk(KERN_ERR "btrfs: balance will reduce metadata " - "integrity, use force if you want this\n"); - ret = -EINVAL; - goto out; + do { + seq = read_seqbegin(&fs_info->profiles_lock); + + if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (fs_info->avail_system_alloc_bits & allowed) && + !(bctl->sys.target & allowed)) || + ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (fs_info->avail_metadata_alloc_bits & allowed) && + !(bctl->meta.target & allowed))) { + if (bctl->flags & BTRFS_BALANCE_FORCE) { + printk(KERN_INFO "btrfs: force reducing metadata " + "integrity\n"); + } else { + printk(KERN_ERR "btrfs: balance will reduce metadata " + "integrity, use force if you want this\n"); + ret = -EINVAL; + goto out; + } } - } + } while (read_seqretry(&fs_info->profiles_lock, seq)); if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { int num_tolerated_disk_barrier_failures; @@ -3146,22 +3221,15 @@ int btrfs_balance(struct btrfs_balance_control *bctl, mutex_lock(&fs_info->balance_mutex); atomic_dec(&fs_info->balance_running); - if (bargs) { - memset(bargs, 0, sizeof(*bargs)); - update_ioctl_balance_args(fs_info, 0, bargs); - } - - if ((ret && ret != -ECANCELED && ret != -ENOSPC) || - balance_need_close(fs_info)) - cancel = 1; - if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { fs_info->num_tolerated_disk_barrier_failures = btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); } - if (cancel) - __cancel_balance(fs_info); + if (bargs) { + memset(bargs, 0, sizeof(*bargs)); + update_ioctl_balance_args(fs_info, 0, bargs); + } wake_up(&fs_info->balance_wait_q); @@ -3169,8 +3237,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, out: if (bctl->flags & BTRFS_BALANCE_RESUME) __cancel_balance(fs_info); - else + else { kfree(bctl); + atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + } return ret; } @@ -3187,7 +3257,6 @@ static int balance_kthread(void *data) ret = btrfs_balance(fs_info->balance_ctl, NULL); } - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); @@ -3210,7 +3279,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) return 0; } - WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); if (IS_ERR(tsk)) return PTR_ERR(tsk); @@ -3264,6 +3332,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); + WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); + mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); @@ -3523,21 +3593,62 @@ static int btrfs_cmp_device_info(const void *a, const void *b) } struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { - /* - * sub_stripes info for map, - * dev_stripes -- stripes per dev, 2 for DUP, 1 other wise - * devs_max -- max devices per stripe, 0 for unlimited - * devs_min -- min devices per stripe - * devs_increment -- ndevs must be a multiple of this - * ncopies -- how many copies of the data we have - */ - { 2, 1, 0, 4, 2, 2 /* raid10 */ }, - { 1, 1, 2, 2, 2, 2 /* raid1 */ }, - { 1, 2, 1, 1, 1, 2 /* dup */ }, - { 1, 1, 0, 2, 1, 1 /* raid0 */ }, - { 1, 1, 0, 1, 1, 1 /* single */ }, - { 1, 1, 0, 2, 1, 2 /* raid5 */ }, - { 1, 1, 0, 3, 1, 3 /* raid6 */ }, + [BTRFS_RAID_RAID10] = { + .sub_stripes = 2, + .dev_stripes = 1, + .devs_max = 0, /* 0 == as many as possible */ + .devs_min = 4, + .devs_increment = 2, + .ncopies = 2, + }, + [BTRFS_RAID_RAID1] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 2, + .devs_min = 2, + .devs_increment = 2, + .ncopies = 2, + }, + [BTRFS_RAID_DUP] = { + .sub_stripes = 1, + .dev_stripes = 2, + .devs_max = 1, + .devs_min = 1, + .devs_increment = 1, + .ncopies = 2, + }, + [BTRFS_RAID_RAID0] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .devs_increment = 1, + .ncopies = 1, + }, + [BTRFS_RAID_SINGLE] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 1, + .devs_min = 1, + .devs_increment = 1, + .ncopies = 1, + }, + [BTRFS_RAID_RAID5] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .devs_increment = 1, + .ncopies = 2, + }, + [BTRFS_RAID_RAID6] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 3, + .devs_increment = 1, + .ncopies = 3, + }, }; static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) @@ -3685,12 +3796,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) continue; + if (ndevs == fs_devices->rw_devices) { + WARN(1, "%s: found more than %llu devices\n", + __func__, fs_devices->rw_devices); + break; + } devices_info[ndevs].dev_offset = dev_offset; devices_info[ndevs].max_avail = max_avail; devices_info[ndevs].total_avail = total_avail; devices_info[ndevs].dev = device; ++ndevs; - WARN_ON(ndevs > fs_devices->rw_devices); } /* @@ -3722,10 +3837,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, */ data_stripes = num_stripes / ncopies; - if (stripe_size * ndevs > max_chunk_size * ncopies) { - stripe_size = max_chunk_size * ncopies; - do_div(stripe_size, ndevs); - } if (type & BTRFS_BLOCK_GROUP_RAID5) { raid_stripe_len = find_raid56_stripe_len(ndevs - 1, btrfs_super_stripesize(info->super_copy)); @@ -3736,6 +3847,27 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, btrfs_super_stripesize(info->super_copy)); data_stripes = num_stripes - 2; } + + /* + * Use the number of data stripes to figure out how big this chunk + * is really going to be in terms of logical address space, + * and compare that answer with the max chunk size + */ + if (stripe_size * data_stripes > max_chunk_size) { + u64 mask = (1ULL << 24) - 1; + stripe_size = max_chunk_size; + do_div(stripe_size, data_stripes); + + /* bump the answer up to a 16MB boundary */ + stripe_size = (stripe_size + mask) & ~mask; + + /* but don't go higher than the limits we found + * while searching for free extents + */ + if (stripe_size > devices_info[ndevs-1].max_avail) + stripe_size = devices_info[ndevs-1].max_avail; + } + do_div(stripe_size, dev_stripes); /* align to BTRFS_STRIPE_LEN */ @@ -3787,15 +3919,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); - free_extent_map(em); - if (ret) - goto error; - - ret = btrfs_make_block_group(trans, extent_root, 0, type, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, - start, num_bytes); - if (ret) + if (ret) { + free_extent_map(em); goto error; + } for (i = 0; i < map->num_stripes; ++i) { struct btrfs_device *device; @@ -3808,17 +3935,44 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, info->chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, dev_offset, stripe_size); - if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); - goto error; - } + if (ret) + goto error_dev_extent; } + ret = btrfs_make_block_group(trans, extent_root, 0, type, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, + start, num_bytes); + if (ret) { + i = map->num_stripes - 1; + goto error_dev_extent; + } + + free_extent_map(em); check_raid56_incompat_flag(extent_root->fs_info, type); kfree(devices_info); return 0; +error_dev_extent: + for (; i >= 0; i--) { + struct btrfs_device *device; + int err; + + device = map->stripes[i].dev; + err = btrfs_free_dev_extent(trans, device, start); + if (err) { + btrfs_abort_transaction(trans, extent_root, err); + break; + } + } + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + + /* One for our allocation */ + free_extent_map(em); + /* One for the tree reference */ + free_extent_map(em); error: kfree(map); kfree(devices_info); @@ -3958,10 +4112,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, if (ret) return ret; - alloc_profile = BTRFS_BLOCK_GROUP_METADATA | - fs_info->avail_metadata_alloc_bits; - alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); - + alloc_profile = btrfs_get_alloc_profile(extent_root, 0); ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, &stripe_size, chunk_offset, alloc_profile); if (ret) @@ -3969,10 +4120,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, sys_chunk_offset = chunk_offset + chunk_size; - alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | - fs_info->avail_system_alloc_bits; - alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); - + alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, &sys_chunk_size, &sys_stripe_size, sys_chunk_offset, alloc_profile);