Btrfs: fix double increment of path->slots[0] in btrfs_next_leaf
[deliverable/linux.git] / fs / btrfs / volumes.c
index e0913e4697284673b4c00cd58bf911fc44fe4019..f057730a72bbd8ce2470945fb5c0f5f3a3946bd9 100644 (file)
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
        return NULL;
 }
 
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+                       struct bio *head, struct bio *tail)
+{
+
+       struct bio *old_head;
+
+       old_head = pending_bios->head;
+       pending_bios->head = head;
+       if (pending_bios->tail)
+               tail->bi_next = old_head;
+       else
+               pending_bios->tail = tail;
+}
+
 /*
  * we try to collect pending bios for a device so we don't get a large
  * number of procs sending bios down to the same device.  This greatly
@@ -141,32 +155,49 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        struct bio *pending;
        struct backing_dev_info *bdi;
        struct btrfs_fs_info *fs_info;
+       struct btrfs_pending_bios *pending_bios;
        struct bio *tail;
        struct bio *cur;
        int again = 0;
-       unsigned long num_run = 0;
+       unsigned long num_run;
+       unsigned long num_sync_run;
+       unsigned long batch_run = 0;
        unsigned long limit;
        unsigned long last_waited = 0;
+       int force_reg = 0;
 
        bdi = blk_get_backing_dev_info(device->bdev);
        fs_info = device->dev_root->fs_info;
        limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
 
+       /* we want to make sure that every time we switch from the sync
+        * list to the normal list, we unplug
+        */
+       num_sync_run = 0;
+
 loop:
        spin_lock(&device->io_lock);
 
 loop_lock:
+       num_run = 0;
+
        /* take all the bios off the list at once and process them
         * later on (without the lock held).  But, remember the
         * tail and other pointers so the bios can be properly reinserted
         * into the list if we hit congestion
         */
-       pending = device->pending_bios;
-       tail = device->pending_bio_tail;
+       if (!force_reg && device->pending_sync_bios.head) {
+               pending_bios = &device->pending_sync_bios;
+               force_reg = 1;
+       } else {
+               pending_bios = &device->pending_bios;
+               force_reg = 0;
+       }
+
+       pending = pending_bios->head;
+       tail = pending_bios->tail;
        WARN_ON(pending && !tail);
-       device->pending_bios = NULL;
-       device->pending_bio_tail = NULL;
 
        /*
         * if pending was null this time around, no bios need processing
@@ -176,16 +207,45 @@ loop_lock:
         * device->running_pending is used to synchronize with the
         * schedule_bio code.
         */
-       if (pending) {
-               again = 1;
-               device->running_pending = 1;
-       } else {
+       if (device->pending_sync_bios.head == NULL &&
+           device->pending_bios.head == NULL) {
                again = 0;
                device->running_pending = 0;
+       } else {
+               again = 1;
+               device->running_pending = 1;
        }
+
+       pending_bios->head = NULL;
+       pending_bios->tail = NULL;
+
        spin_unlock(&device->io_lock);
 
+       /*
+        * if we're doing the regular priority list, make sure we unplug
+        * for any high prio bios we've sent down
+        */
+       if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+               num_sync_run = 0;
+               blk_run_backing_dev(bdi, NULL);
+       }
+
        while (pending) {
+
+               rmb();
+               /* we want to work on both lists, but do more bios on the
+                * sync list than the regular list
+                */
+               if ((num_run > 32 &&
+                   pending_bios != &device->pending_sync_bios &&
+                   device->pending_sync_bios.head) ||
+                  (num_run > 64 && pending_bios == &device->pending_sync_bios &&
+                   device->pending_bios.head)) {
+                       spin_lock(&device->io_lock);
+                       requeue_list(pending_bios, pending, tail);
+                       goto loop_lock;
+               }
+
                cur = pending;
                pending = pending->bi_next;
                cur->bi_next = NULL;
@@ -196,19 +256,28 @@ loop_lock:
                        wake_up(&fs_info->async_submit_wait);
 
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
-               bio_get(cur);
                submit_bio(cur->bi_rw, cur);
-               bio_put(cur);
                num_run++;
+               batch_run++;
+
+               if (bio_sync(cur))
+                       num_sync_run++;
+
+               if (need_resched()) {
+                       if (num_sync_run) {
+                               blk_run_backing_dev(bdi, NULL);
+                               num_sync_run = 0;
+                       }
+                       cond_resched();
+               }
 
                /*
                 * we made progress, there is more work to do and the bdi
                 * is now congested.  Back off and let other work structs
                 * run instead
                 */
-               if (pending && bdi_write_congested(bdi) && num_run > 16 &&
+               if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
                    fs_info->fs_devices->open_devices > 1) {
-                       struct bio *old_head;
                        struct io_context *ioc;
 
                        ioc = current->io_context;
@@ -233,17 +302,17 @@ loop_lock:
                                 * against it before looping
                                 */
                                last_waited = ioc->last_waited;
+                               if (need_resched()) {
+                                       if (num_sync_run) {
+                                               blk_run_backing_dev(bdi, NULL);
+                                               num_sync_run = 0;
+                                       }
+                                       cond_resched();
+                               }
                                continue;
                        }
                        spin_lock(&device->io_lock);
-
-                       old_head = device->pending_bios;
-                       device->pending_bios = pending;
-                       if (device->pending_bio_tail)
-                               tail->bi_next = old_head;
-                       else
-                               device->pending_bio_tail = tail;
-
+                       requeue_list(pending_bios, pending, tail);
                        device->running_pending = 1;
 
                        spin_unlock(&device->io_lock);
@@ -251,11 +320,18 @@ loop_lock:
                        goto done;
                }
        }
+
+       if (num_sync_run) {
+               num_sync_run = 0;
+               blk_run_backing_dev(bdi, NULL);
+       }
+
+       cond_resched();
        if (again)
                goto loop;
 
        spin_lock(&device->io_lock);
-       if (device->pending_bios)
+       if (device->pending_bios.head || device->pending_sync_bios.head)
                goto loop_lock;
        spin_unlock(&device->io_lock);
 
@@ -301,6 +377,7 @@ static noinline int device_list_add(const char *path,
                memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
                fs_devices->latest_devid = devid;
                fs_devices->latest_trans = found_transid;
+               mutex_init(&fs_devices->device_list_mutex);
                device = NULL;
        } else {
                device = __find_device(&fs_devices->devices, devid,
@@ -327,7 +404,11 @@ static noinline int device_list_add(const char *path,
                        return -ENOMEM;
                }
                INIT_LIST_HEAD(&device->dev_alloc_list);
+
+               mutex_lock(&fs_devices->device_list_mutex);
                list_add(&device->dev_list, &fs_devices->devices);
+               mutex_unlock(&fs_devices->device_list_mutex);
+
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
        }
@@ -353,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        INIT_LIST_HEAD(&fs_devices->devices);
        INIT_LIST_HEAD(&fs_devices->alloc_list);
        INIT_LIST_HEAD(&fs_devices->list);
+       mutex_init(&fs_devices->device_list_mutex);
        fs_devices->latest_devid = orig->latest_devid;
        fs_devices->latest_trans = orig->latest_trans;
        memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 
+       mutex_lock(&orig->device_list_mutex);
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
                device = kzalloc(sizeof(*device), GFP_NOFS);
                if (!device)
@@ -378,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
        }
+       mutex_unlock(&orig->device_list_mutex);
        return fs_devices;
 error:
+       mutex_unlock(&orig->device_list_mutex);
        free_fs_devices(fs_devices);
        return ERR_PTR(-ENOMEM);
 }
@@ -390,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 
        mutex_lock(&uuid_mutex);
 again:
+       mutex_lock(&fs_devices->device_list_mutex);
        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
                if (device->in_fs_metadata)
                        continue;
@@ -409,6 +495,7 @@ again:
                kfree(device->name);
                kfree(device);
        }
+       mutex_unlock(&fs_devices->device_list_mutex);
 
        if (fs_devices->seed) {
                fs_devices = fs_devices->seed;
@@ -529,6 +616,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                device->in_fs_metadata = 0;
                device->mode = flags;
 
+               if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+                       fs_devices->rotating = 1;
+
                fs_devices->open_devices++;
                if (device->writeable) {
                        fs_devices->rw_devices++;
@@ -1056,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
                device = NULL;
                devices = &root->fs_info->fs_devices->devices;
+               mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
                list_for_each_entry(tmp, devices, dev_list) {
                        if (tmp->in_fs_metadata && !tmp->bdev) {
                                device = tmp;
                                break;
                        }
                }
+               mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
                bdev = NULL;
                bh = NULL;
                disk_super = NULL;
@@ -1116,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                goto error_brelse;
 
        device->in_fs_metadata = 0;
+
+       /*
+        * the device list mutex makes sure that we don't change
+        * the device list while someone else is writing out all
+        * the device supers.
+        */
+       mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        list_del_init(&device->dev_list);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
        device->fs_devices->num_devices--;
 
        next_device = list_entry(root->fs_info->fs_devices->devices.next,
@@ -1210,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
        seed_devices->opened = 1;
        INIT_LIST_HEAD(&seed_devices->devices);
        INIT_LIST_HEAD(&seed_devices->alloc_list);
+       mutex_init(&seed_devices->device_list_mutex);
        list_splice_init(&fs_devices->devices, &seed_devices->devices);
        list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
        list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1335,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        mutex_lock(&root->fs_info->volume_mutex);
 
        devices = &root->fs_info->fs_devices->devices;
+       /*
+        * we have the volume lock, so we don't need the extra
+        * device list mutex while reading the list here.
+        */
        list_for_each_entry(device, devices, dev_list) {
                if (device->bdev == bdev) {
                        ret = -EEXIST;
@@ -1375,6 +1481,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->io_align = root->sectorsize;
        device->sector_size = root->sectorsize;
        device->total_bytes = i_size_read(bdev->bd_inode);
+       device->disk_total_bytes = device->total_bytes;
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
@@ -1388,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        }
 
        device->fs_devices = root->fs_info->fs_devices;
+
+       /*
+        * we don't want write_supers to jump in here with our device
+        * half setup
+        */
+       mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
        list_add(&device->dev_alloc_list,
                 &root->fs_info->fs_devices->alloc_list);
@@ -1396,6 +1509,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        root->fs_info->fs_devices->rw_devices++;
        root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
 
+       if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+               root->fs_info->fs_devices->rotating = 1;
+
        total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
        btrfs_set_super_total_bytes(&root->fs_info->super_copy,
                                    total_bytes + device->total_bytes);
@@ -1403,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
        btrfs_set_super_num_devices(&root->fs_info->super_copy,
                                    total_bytes + 1);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        if (seeding_dev) {
                ret = init_first_rw_device(trans, root, device);
@@ -1478,7 +1595,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
        btrfs_set_device_io_align(leaf, dev_item, device->io_align);
        btrfs_set_device_io_width(leaf, dev_item, device->io_width);
        btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
-       btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+       btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
        btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
        btrfs_mark_buffer_dirty(leaf);
 
@@ -1605,8 +1722,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        int ret;
        int i;
 
-       printk(KERN_INFO "btrfs relocating chunk %llu\n",
-              (unsigned long long)chunk_offset);
        root = root->fs_info->chunk_root;
        extent_root = root->fs_info->extent_root;
        em_tree = &root->fs_info->mapping_tree.map_tree;
@@ -1875,14 +1990,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        device->total_bytes = new_size;
        if (device->writeable)
                device->fs_devices->total_rw_bytes -= diff;
-       ret = btrfs_update_device(trans, device);
-       if (ret) {
-               unlock_chunks(root);
-               btrfs_end_transaction(trans, root);
-               goto done;
-       }
-       WARN_ON(diff > old_total);
-       btrfs_set_super_total_bytes(super_copy, old_total - diff);
        unlock_chunks(root);
        btrfs_end_transaction(trans, root);
 
@@ -1900,7 +2007,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                        goto done;
                if (ret) {
                        ret = 0;
-                       goto done;
+                       break;
                }
 
                l = path->nodes[0];
@@ -1908,13 +2015,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
                if (key.objectid != device->devid)
-                       goto done;
+                       break;
 
                dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
                length = btrfs_dev_extent_length(l, dev_extent);
 
                if (key.offset + length <= new_size)
-                       goto done;
+                       break;
 
                chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +2034,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                        goto done;
        }
 
+       /* Shrinking succeeded, else we would be at "done". */
+       trans = btrfs_start_transaction(root, 1);
+       if (!trans) {
+               ret = -ENOMEM;
+               goto done;
+       }
+       lock_chunks(root);
+
+       device->disk_total_bytes = new_size;
+       /* Now btrfs_update_device() will change the on-disk size. */
+       ret = btrfs_update_device(trans, device);
+       if (ret) {
+               unlock_chunks(root);
+               btrfs_end_transaction(trans, root);
+               goto done;
+       }
+       WARN_ON(diff > old_total);
+       btrfs_set_super_total_bytes(super_copy, old_total - diff);
+       unlock_chunks(root);
+       btrfs_end_transaction(trans, root);
 done:
        btrfs_free_path(path);
        return ret;
@@ -2497,7 +2624,7 @@ again:
                        max_errors = 1;
                }
        }
-       if (multi_ret && rw == WRITE &&
+       if (multi_ret && (rw & (1 << BIO_RW)) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -2762,6 +2889,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
                                 int rw, struct bio *bio)
 {
        int should_queue = 1;
+       struct btrfs_pending_bios *pending_bios;
 
        /* don't bother with additional async steps for reads, right now */
        if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2911,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
        bio->bi_rw |= rw;
 
        spin_lock(&device->io_lock);
+       if (bio_sync(bio))
+               pending_bios = &device->pending_sync_bios;
+       else
+               pending_bios = &device->pending_bios;
 
-       if (device->pending_bio_tail)
-               device->pending_bio_tail->bi_next = bio;
+       if (pending_bios->tail)
+               pending_bios->tail->bi_next = bio;
 
-       device->pending_bio_tail = bio;
-       if (!device->pending_bios)
-               device->pending_bios = bio;
+       pending_bios->tail = bio;
+       if (!pending_bios->head)
+               pending_bios->head = bio;
        if (device->running_pending)
                should_queue = 0;
 
@@ -3006,7 +3138,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
        unsigned long ptr;
 
        device->devid = btrfs_device_id(leaf, dev_item);
-       device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->total_bytes = device->disk_total_bytes;
        device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
        device->type = btrfs_device_type(leaf, dev_item);
        device->io_align = btrfs_device_io_align(leaf, dev_item);
This page took 0.039803 seconds and 5 git commands to generate.