Fix btrfs_get_extent and get_block corner cases, and disable O_DIRECT reads
[deliverable/linux.git] / fs / btrfs / volumes.c
index e3ddd7fb8edd341c94ac23ad9c8e37e3dc4a1987..bccb5566fd84b8365d50906d0d7a8d639c95ae40 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/bio.h>
 #include <linux/buffer_head.h>
+#include <linux/blkdev.h>
 #include <asm/div64.h>
 #include "ctree.h"
 #include "extent_map.h"
@@ -110,6 +111,7 @@ static int device_list_add(const char *path,
                if (!fs_devices)
                        return -ENOMEM;
                INIT_LIST_HEAD(&fs_devices->devices);
+               INIT_LIST_HEAD(&fs_devices->alloc_list);
                list_add(&fs_devices->list, &fs_uuids);
                memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
                fs_devices->latest_devid = devid;
@@ -138,6 +140,7 @@ static int device_list_add(const char *path,
                        return -ENOMEM;
                }
                list_add(&device->dev_list, &fs_devices->devices);
+               list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
                fs_devices->num_devices++;
        }
 
@@ -659,12 +662,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        struct btrfs_device *device = NULL;
        struct btrfs_chunk *chunk;
        struct list_head private_devs;
-       struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices;
+       struct list_head *dev_list;
        struct list_head *cur;
        struct extent_map_tree *em_tree;
        struct map_lookup *map;
        struct extent_map *em;
-       int min_chunk_size = 8 * 1024 * 1024;
+       int min_stripe_size = 1 * 1024 * 1024;
        u64 physical;
        u64 calc_size = 1024 * 1024 * 1024;
        u64 max_chunk_size = calc_size;
@@ -673,6 +676,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        u64 max_avail = 0;
        u64 percent_max;
        int num_stripes = 1;
+       int min_stripes = 1;
        int sub_stripes = 0;
        int looped = 0;
        int ret;
@@ -680,18 +684,24 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        int stripe_len = 64 * 1024;
        struct btrfs_key key;
 
+       dev_list = &extent_root->fs_info->fs_devices->alloc_list;
        if (list_empty(dev_list))
                return -ENOSPC;
 
-       if (type & (BTRFS_BLOCK_GROUP_RAID0))
+       if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
                num_stripes = btrfs_super_num_devices(&info->super_copy);
-       if (type & (BTRFS_BLOCK_GROUP_DUP))
+               min_stripes = 2;
+       }
+       if (type & (BTRFS_BLOCK_GROUP_DUP)) {
                num_stripes = 2;
+               min_stripes = 2;
+       }
        if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
                num_stripes = min_t(u64, 2,
                                  btrfs_super_num_devices(&info->super_copy));
                if (num_stripes < 2)
                        return -ENOSPC;
+               min_stripes = 2;
        }
        if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
                num_stripes = btrfs_super_num_devices(&info->super_copy);
@@ -699,22 +709,26 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                        return -ENOSPC;
                num_stripes &= ~(u32)1;
                sub_stripes = 2;
+               min_stripes = 4;
        }
 
        if (type & BTRFS_BLOCK_GROUP_DATA) {
                max_chunk_size = 10 * calc_size;
-               min_chunk_size = 256 * 1024 * 1024;
+               min_stripe_size = 64 * 1024 * 1024;
        } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
                max_chunk_size = 4 * calc_size;
-               min_chunk_size = 64 * 1024 * 1024;
-       } else {
-               min_chunk_size = 32 * 1024 * 1024;
+               min_stripe_size = 32 * 1024 * 1024;
+       } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+               calc_size = 8 * 1024 * 1024;
+               max_chunk_size = calc_size * 2;
+               min_stripe_size = 1 * 1024 * 1024;
        }
 
        /* we don't want a chunk larger than 10% of the FS */
        percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
        max_chunk_size = min(percent_max, max_chunk_size);
 
+again:
        if (calc_size * num_stripes > max_chunk_size) {
                calc_size = max_chunk_size;
                do_div(calc_size, num_stripes);
@@ -722,12 +736,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                calc_size *= stripe_len;
        }
        /* we don't want tiny stripes */
-       *num_bytes = chunk_bytes_by_type(type, calc_size,
-                                        num_stripes, sub_stripes);
-       calc_size = max_t(u64, chunk_bytes_by_type(type, min_chunk_size,
-                         num_stripes, sub_stripes), calc_size);
+       calc_size = max_t(u64, min_stripe_size, calc_size);
 
-again:
        do_div(calc_size, stripe_len);
        calc_size *= stripe_len;
 
@@ -740,25 +750,36 @@ again:
        else
                min_free = calc_size;
 
+       /* we add 1MB because we never use the first 1MB of the device */
+       min_free += 1024 * 1024;
+
        /* build a private list of devices we will allocate from */
        while(index < num_stripes) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+               device = list_entry(cur, struct btrfs_device, dev_alloc_list);
 
                avail = device->total_bytes - device->bytes_used;
                cur = cur->next;
-               if (avail > max_avail)
-                       max_avail = avail;
                if (avail >= min_free) {
-                       list_move_tail(&device->dev_list, &private_devs);
+                       list_move_tail(&device->dev_alloc_list, &private_devs);
                        index++;
                        if (type & BTRFS_BLOCK_GROUP_DUP)
                                index++;
-               }
+               } else if (avail > max_avail)
+                       max_avail = avail;
                if (cur == dev_list)
                        break;
        }
        if (index < num_stripes) {
                list_splice(&private_devs, dev_list);
+               if (index >= min_stripes) {
+                       num_stripes = index;
+                       if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
+                               num_stripes /= sub_stripes;
+                               num_stripes *= sub_stripes;
+                       }
+                       looped = 1;
+                       goto again;
+               }
                if (!looped && max_avail > 0) {
                        looped = 1;
                        calc_size = max_avail;
@@ -766,7 +787,6 @@ again:
                }
                return -ENOSPC;
        }
-
        key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        key.type = BTRFS_CHUNK_ITEM_KEY;
        ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
@@ -795,12 +815,12 @@ printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes);
                struct btrfs_stripe *stripe;
                BUG_ON(list_empty(&private_devs));
                cur = private_devs.next;
-               device = list_entry(cur, struct btrfs_device, dev_list);
+               device = list_entry(cur, struct btrfs_device, dev_alloc_list);
 
                /* loop over this device again if we're doing a dup group */
                if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
                    (index == num_stripes - 1))
-                       list_move_tail(&device->dev_list, dev_list);
+                       list_move_tail(&device->dev_alloc_list, dev_list);
 
                ret = btrfs_alloc_dev_extent(trans, device,
                             info->chunk_root->root_key.objectid,
@@ -914,9 +934,10 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
        return ret;
 }
 
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
-                   u64 logical, u64 *length,
-                   struct btrfs_multi_bio **multi_ret, int mirror_num)
+static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+                            u64 logical, u64 *length,
+                            struct btrfs_multi_bio **multi_ret,
+                            int mirror_num, struct page *unplug_page)
 {
        struct extent_map *em;
        struct map_lookup *map;
@@ -928,6 +949,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        int stripes_required = 1;
        int stripe_index;
        int i;
+       int num_stripes;
        struct btrfs_multi_bio *multi = NULL;
 
        if (multi_ret && !(rw & (1 << BIO_RW))) {
@@ -944,10 +966,14 @@ again:
        spin_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, *length);
        spin_unlock(&em_tree->lock);
+
+       if (!em && unplug_page)
+               return 0;
+
        if (!em) {
                printk("unable to find logical %Lu\n", logical);
+               BUG();
        }
-       BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
        map = (struct map_lookup *)em->bdev;
@@ -994,49 +1020,41 @@ again:
        } else {
                *length = em->len - offset;
        }
-       if (!multi_ret)
+
+       if (!multi_ret && !unplug_page)
                goto out;
 
-       multi->num_stripes = 1;
+       num_stripes = 1;
        stripe_index = 0;
        if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (rw & (1 << BIO_RW))
-                       multi->num_stripes = map->num_stripes;
+               if (unplug_page || (rw & (1 << BIO_RW)))
+                       num_stripes = map->num_stripes;
                else if (mirror_num) {
                        stripe_index = mirror_num - 1;
                } else {
-                       int i;
-                       u64 least = (u64)-1;
-                       struct btrfs_device *cur;
-
-                       for (i = 0; i < map->num_stripes; i++) {
-                               cur = map->stripes[i].dev;
-                               spin_lock(&cur->io_lock);
-                               if (cur->total_ios < least) {
-                                       least = cur->total_ios;
-                                       stripe_index = i;
-                               }
-                               spin_unlock(&cur->io_lock);
-                       }
+                       u64 orig_stripe_nr = stripe_nr;
+                       stripe_index = do_div(orig_stripe_nr, num_stripes);
                }
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
                if (rw & (1 << BIO_RW))
-                       multi->num_stripes = map->num_stripes;
+                       num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                int factor = map->num_stripes / map->sub_stripes;
-               int orig_stripe_nr = stripe_nr;
 
                stripe_index = do_div(stripe_nr, factor);
                stripe_index *= map->sub_stripes;
 
-               if (rw & (1 << BIO_RW))
-                       multi->num_stripes = map->sub_stripes;
+               if (unplug_page || (rw & (1 << BIO_RW)))
+                       num_stripes = map->sub_stripes;
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
-               else
-                       stripe_index += orig_stripe_nr % map->sub_stripes;
+               else {
+                       u64 orig_stripe_nr = stripe_nr;
+                       stripe_index += do_div(orig_stripe_nr,
+                                              map->sub_stripes);
+               }
        } else {
                /*
                 * after this do_div call, stripe_nr is the number of stripes
@@ -1047,19 +1065,50 @@ again:
        }
        BUG_ON(stripe_index >= map->num_stripes);
 
-       for (i = 0; i < multi->num_stripes; i++) {
-               multi->stripes[i].physical =
-                       map->stripes[stripe_index].physical + stripe_offset +
-                       stripe_nr * map->stripe_len;
-               multi->stripes[i].dev = map->stripes[stripe_index].dev;
+       for (i = 0; i < num_stripes; i++) {
+               if (unplug_page) {
+                       struct btrfs_device *device;
+                       struct backing_dev_info *bdi;
+
+                       device = map->stripes[stripe_index].dev;
+                       bdi = blk_get_backing_dev_info(device->bdev);
+                       if (bdi->unplug_io_fn) {
+                               bdi->unplug_io_fn(bdi, unplug_page);
+                       }
+               } else {
+                       multi->stripes[i].physical =
+                               map->stripes[stripe_index].physical +
+                               stripe_offset + stripe_nr * map->stripe_len;
+                       multi->stripes[i].dev = map->stripes[stripe_index].dev;
+               }
                stripe_index++;
        }
-       *multi_ret = multi;
+       if (multi_ret) {
+               *multi_ret = multi;
+               multi->num_stripes = num_stripes;
+       }
 out:
        free_extent_map(em);
        return 0;
 }
 
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+                     u64 logical, u64 *length,
+                     struct btrfs_multi_bio **multi_ret, int mirror_num)
+{
+       return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
+                                mirror_num, NULL);
+}
+
+int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
+                     u64 logical, struct page *page)
+{
+       u64 length = PAGE_CACHE_SIZE;
+       return __btrfs_map_block(map_tree, READ, logical, &length,
+                                NULL, 0, page);
+}
+
+
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
 static void end_bio_multi_stripe(struct bio *bio, int err)
 #else
@@ -1106,17 +1155,12 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        u64 logical = bio->bi_sector << 9;
        u64 length = 0;
        u64 map_length;
-       struct bio_vec *bvec;
        struct btrfs_multi_bio *multi = NULL;
-       int i;
        int ret;
        int dev_nr = 0;
        int total_devs = 1;
 
-       bio_for_each_segment(bvec, bio, i) {
-               length += bvec->bv_len;
-       }
-
+       length = bio->bi_size;
        map_tree = &root->fs_info->mapping_tree;
        map_length = length;
 
@@ -1147,6 +1191,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                }
                bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
                dev = multi->stripes[dev_nr].dev;
+
                bio->bi_bdev = dev->bdev;
                spin_lock(&dev->io_lock);
                dev->total_ios++;
@@ -1287,6 +1332,8 @@ static int read_one_dev(struct btrfs_root *root,
                        return -ENOMEM;
                list_add(&device->dev_list,
                         &root->fs_info->fs_devices->devices);
+               list_add(&device->dev_alloc_list,
+                        &root->fs_info->fs_devices->alloc_list);
                device->barriers = 1;
                spin_lock_init(&device->io_lock);
        }
This page took 0.041447 seconds and 5 git commands to generate.