X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=fs%2Fbtrfs%2Fvolumes.c;h=4e7cee27aab565cef76d1e4041e643382d2438b1;hb=a74a4b97b61beede185b4b3ad359d7d378b0d312;hp=501d23d3ebfd2f0c0a23ad4672af2d445a9391a7;hpb=a0af469b58944f6e8c5c8ecbebb42997baf0cb9e;p=deliverable%2Flinux.git diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 501d23d3ebfd..4e7cee27aab5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -27,6 +27,7 @@ #include "transaction.h" #include "print-tree.h" #include "volumes.h" +#include "async-thread.h" struct map_lookup { u64 type; @@ -110,6 +111,101 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid) return NULL; } +/* + * we try to collect pending bios for a device so we don't get a large + * number of procs sending bios down to the same device. This greatly + * improves the schedulers ability to collect and merge the bios. + * + * But, it also turns into a long list of bios to process and that is sure + * to eventually make the worker thread block. The solution here is to + * make some progress and then put this work struct back at the end of + * the list if the block device is congested. This way, multiple devices + * can make progress from a single worker thread. + */ +int run_scheduled_bios(struct btrfs_device *device) +{ + struct bio *pending; + struct backing_dev_info *bdi; + struct bio *tail; + struct bio *cur; + int again = 0; + unsigned long num_run = 0; + + bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; +loop: + spin_lock(&device->io_lock); + + /* take all the bios off the list at once and process them + * later on (without the lock held). But, remember the + * tail and other pointers so the bios can be properly reinserted + * into the list if we hit congestion + */ + pending = device->pending_bios; + tail = device->pending_bio_tail; + WARN_ON(pending && !tail); + device->pending_bios = NULL; + device->pending_bio_tail = NULL; + + /* + * if pending was null this time around, no bios need processing + * at all and we can stop. Otherwise it'll loop back up again + * and do an additional check so no bios are missed. + * + * device->running_pending is used to synchronize with the + * schedule_bio code. + */ + if (pending) { + again = 1; + device->running_pending = 1; + } else { + again = 0; + device->running_pending = 0; + } + spin_unlock(&device->io_lock); + + while(pending) { + cur = pending; + pending = pending->bi_next; + cur->bi_next = NULL; + atomic_dec(&device->dev_root->fs_info->nr_async_submits); + submit_bio(cur->bi_rw, cur); + num_run++; + + /* + * we made progress, there is more work to do and the bdi + * is now congested. Back off and let other work structs + * run instead + */ + if (pending && num_run && bdi_write_congested(bdi)) { + struct bio *old_head; + + spin_lock(&device->io_lock); + old_head = device->pending_bios; + device->pending_bios = pending; + if (device->pending_bio_tail) + tail->bi_next = old_head; + else + device->pending_bio_tail = tail; + + spin_unlock(&device->io_lock); + btrfs_requeue_work(&device->work); + goto done; + } + } + if (again) + goto loop; +done: + return 0; +} + +void pending_bios_fn(struct btrfs_work *work) +{ + struct btrfs_device *device; + + device = container_of(work, struct btrfs_device, work); + run_scheduled_bios(device); +} + static int device_list_add(const char *path, struct btrfs_super_block *disk_super, u64 devid, struct btrfs_fs_devices **fs_devices_ret) @@ -120,7 +216,7 @@ static int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); + fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); if (!fs_devices) return -ENOMEM; INIT_LIST_HEAD(&fs_devices->devices); @@ -129,7 +225,6 @@ static int device_list_add(const char *path, memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - fs_devices->num_devices = 0; device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -142,6 +237,7 @@ static int device_list_add(const char *path, return -ENOMEM; } device->devid = devid; + device->work.func = pending_bios_fn; memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); device->barriers = 1; @@ -175,13 +271,17 @@ again: list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->in_fs_metadata) { - if (device->bdev) { - close_bdev_excl(device->bdev); - fs_devices->open_devices--; - } + struct block_device *bdev; list_del(&device->dev_list); list_del(&device->dev_alloc_list); fs_devices->num_devices--; + if (device->bdev) { + bdev = device->bdev; + fs_devices->open_devices--; + mutex_unlock(&uuid_mutex); + close_bdev_excl(bdev); + mutex_lock(&uuid_mutex); + } kfree(device->name); kfree(device); goto again; @@ -262,7 +362,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, goto error_brelse; transid = btrfs_super_generation(disk_super); - if (transid > latest_transid) { + if (!latest_transid || transid > latest_transid) { latest_devid = devid; latest_transid = transid; latest_bdev = bdev; @@ -751,10 +851,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, if (bdev == fs_devices->latest_bdev) fs_devices->latest_bdev = next_dev->bdev; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - btrfs_set_super_total_bytes(&root->fs_info->super_copy, - total_bytes - device->total_bytes); - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); btrfs_set_super_num_devices(&root->fs_info->super_copy, total_bytes - 1); @@ -774,7 +870,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&uuid_mutex); all_avail = root->fs_info->avail_data_alloc_bits | @@ -850,6 +947,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } root->fs_info->fs_devices->num_devices--; + root->fs_info->fs_devices->open_devices--; ret = btrfs_shrink_device(device, 0); if (ret) @@ -874,7 +972,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { /* one close for the device struct or super_block */ close_bdev_excl(device->bdev); - root->fs_info->fs_devices->open_devices--; } if (bdev) { /* one close for us */ @@ -892,7 +989,8 @@ error_close: close_bdev_excl(bdev); out: mutex_unlock(&uuid_mutex); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -911,7 +1009,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (!bdev) { return -EIO; } - mutex_lock(&root->fs_info->fs_mutex); + + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); + trans = btrfs_start_transaction(root, 1); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { @@ -930,6 +1031,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } device->barriers = 1; + device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); spin_lock_init(&device->io_lock); device->name = kstrdup(device_path, GFP_NOFS); @@ -964,7 +1066,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->open_devices++; out: btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; out_close_bdev: @@ -1204,9 +1308,10 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_key found_key; + BUG(); /* FIXME, needs locking */ + dev_root = dev_root->fs_info->dev_root; - mutex_lock(&dev_root->fs_info->fs_mutex); /* step one make some room on all the devices */ list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1275,7 +1380,6 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = 0; error: btrfs_free_path(path); - mutex_unlock(&dev_root->fs_info->fs_mutex); return ret; } @@ -1451,7 +1555,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return -ENOSPC; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = extent_root->fs_info->fs_devices->open_devices; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { @@ -1460,13 +1564,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, - btrfs_super_num_devices(&info->super_copy)); + extent_root->fs_info->fs_devices->open_devices); if (num_stripes < 2) return -ENOSPC; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = extent_root->fs_info->fs_devices->open_devices; if (num_stripes < 4) return -ENOSPC; num_stripes &= ~(u32)1; @@ -1970,8 +2074,62 @@ static int end_bio_multi_stripe(struct bio *bio, #endif } +struct async_sched { + struct bio *bio; + int rw; + struct btrfs_fs_info *info; + struct btrfs_work work; +}; + +/* + * see run_scheduled_bios for a description of why bios are collected for + * async submit. + * + * This will add one bio to the pending list for a device and make sure + * the work struct is scheduled. + */ +int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, + int rw, struct bio *bio) +{ + int should_queue = 1; + + /* don't bother with additional async steps for reads, right now */ + if (!(rw & (1 << BIO_RW))) { + submit_bio(rw, bio); + return 0; + } + + /* + * nr_async_sumbits allows us to reliably return congestion to the + * higher layers. Otherwise, the async bio makes it appear we have + * made progress against dirty pages when we've really just put it + * on a queue for later + */ + atomic_inc(&root->fs_info->nr_async_submits); + bio->bi_next = NULL; + bio->bi_rw |= rw; + + spin_lock(&device->io_lock); + + if (device->pending_bio_tail) + device->pending_bio_tail->bi_next = bio; + + device->pending_bio_tail = bio; + if (!device->pending_bios) + device->pending_bios = bio; + if (device->running_pending) + should_queue = 0; + + spin_unlock(&device->io_lock); + + if (should_queue) + btrfs_queue_worker(&root->fs_info->submit_workers, + &device->work); + return 0; +} + int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, - int mirror_num) + int mirror_num, int async_submit) { struct btrfs_mapping_tree *map_tree; struct btrfs_device *dev; @@ -2017,10 +2175,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, dev = multi->stripes[dev_nr].dev; if (dev && dev->bdev) { bio->bi_bdev = dev->bdev; - spin_lock(&dev->io_lock); - dev->total_ios++; - spin_unlock(&dev->io_lock); - submit_bio(rw, bio); + if (async_submit) + schedule_bio(root, dev, rw, bio); + else + submit_bio(rw, bio); } else { bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; bio->bi_sector = logical >> 9; @@ -2059,6 +2217,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, device->barriers = 1; device->dev_root = root->fs_info->dev_root; device->devid = devid; + device->work.func = pending_bios_fn; fs_devices->num_devices++; spin_lock_init(&device->io_lock); memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);