#include <linux/sched.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
if (!fs_devices)
return -ENOMEM;
INIT_LIST_HEAD(&fs_devices->devices);
+ INIT_LIST_HEAD(&fs_devices->alloc_list);
list_add(&fs_devices->list, &fs_uuids);
memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
fs_devices->latest_devid = devid;
return -ENOMEM;
}
list_add(&device->dev_list, &fs_devices->devices);
+ list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
fs_devices->num_devices++;
}
struct btrfs_device *device = NULL;
struct btrfs_chunk *chunk;
struct list_head private_devs;
- struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices;
+ struct list_head *dev_list;
struct list_head *cur;
struct extent_map_tree *em_tree;
struct map_lookup *map;
struct extent_map *em;
- int min_chunk_size = 8 * 1024 * 1024;
+ int min_stripe_size = 1 * 1024 * 1024;
u64 physical;
u64 calc_size = 1024 * 1024 * 1024;
u64 max_chunk_size = calc_size;
u64 max_avail = 0;
u64 percent_max;
int num_stripes = 1;
+ int min_stripes = 1;
int sub_stripes = 0;
int looped = 0;
int ret;
int stripe_len = 64 * 1024;
struct btrfs_key key;
+ dev_list = &extent_root->fs_info->fs_devices->alloc_list;
if (list_empty(dev_list))
return -ENOSPC;
- if (type & (BTRFS_BLOCK_GROUP_RAID0))
+ if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
num_stripes = btrfs_super_num_devices(&info->super_copy);
- if (type & (BTRFS_BLOCK_GROUP_DUP))
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_DUP)) {
num_stripes = 2;
+ min_stripes = 2;
+ }
if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
num_stripes = min_t(u64, 2,
btrfs_super_num_devices(&info->super_copy));
if (num_stripes < 2)
return -ENOSPC;
+ min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
num_stripes = btrfs_super_num_devices(&info->super_copy);
return -ENOSPC;
num_stripes &= ~(u32)1;
sub_stripes = 2;
+ min_stripes = 4;
}
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_chunk_size = 10 * calc_size;
- min_chunk_size = 256 * 1024 * 1024;
+ min_stripe_size = 64 * 1024 * 1024;
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
max_chunk_size = 4 * calc_size;
- min_chunk_size = 64 * 1024 * 1024;
- } else {
- min_chunk_size = 32 * 1024 * 1024;
+ min_stripe_size = 32 * 1024 * 1024;
+ } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+ calc_size = 8 * 1024 * 1024;
+ max_chunk_size = calc_size * 2;
+ min_stripe_size = 1 * 1024 * 1024;
}
/* we don't want a chunk larger than 10% of the FS */
percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
max_chunk_size = min(percent_max, max_chunk_size);
+again:
if (calc_size * num_stripes > max_chunk_size) {
calc_size = max_chunk_size;
do_div(calc_size, num_stripes);
calc_size *= stripe_len;
}
/* we don't want tiny stripes */
- *num_bytes = chunk_bytes_by_type(type, calc_size,
- num_stripes, sub_stripes);
- calc_size = max_t(u64, chunk_bytes_by_type(type, min_chunk_size,
- num_stripes, sub_stripes), calc_size);
+ calc_size = max_t(u64, min_stripe_size, calc_size);
-again:
do_div(calc_size, stripe_len);
calc_size *= stripe_len;
else
min_free = calc_size;
+ /* we add 1MB because we never use the first 1MB of the device */
+ min_free += 1024 * 1024;
+
/* build a private list of devices we will allocate from */
while(index < num_stripes) {
- device = list_entry(cur, struct btrfs_device, dev_list);
+ device = list_entry(cur, struct btrfs_device, dev_alloc_list);
avail = device->total_bytes - device->bytes_used;
cur = cur->next;
- if (avail > max_avail)
- max_avail = avail;
if (avail >= min_free) {
- list_move_tail(&device->dev_list, &private_devs);
+ list_move_tail(&device->dev_alloc_list, &private_devs);
index++;
if (type & BTRFS_BLOCK_GROUP_DUP)
index++;
- }
+ } else if (avail > max_avail)
+ max_avail = avail;
if (cur == dev_list)
break;
}
if (index < num_stripes) {
list_splice(&private_devs, dev_list);
+ if (index >= min_stripes) {
+ num_stripes = index;
+ if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
+ num_stripes /= sub_stripes;
+ num_stripes *= sub_stripes;
+ }
+ looped = 1;
+ goto again;
+ }
if (!looped && max_avail > 0) {
looped = 1;
calc_size = max_avail;
}
return -ENOSPC;
}
-
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
key.type = BTRFS_CHUNK_ITEM_KEY;
ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
struct btrfs_stripe *stripe;
BUG_ON(list_empty(&private_devs));
cur = private_devs.next;
- device = list_entry(cur, struct btrfs_device, dev_list);
+ device = list_entry(cur, struct btrfs_device, dev_alloc_list);
/* loop over this device again if we're doing a dup group */
if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
(index == num_stripes - 1))
- list_move_tail(&device->dev_list, dev_list);
+ list_move_tail(&device->dev_alloc_list, dev_list);
ret = btrfs_alloc_dev_extent(trans, device,
info->chunk_root->root_key.objectid,
return ret;
}
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
- u64 logical, u64 *length,
- struct btrfs_multi_bio **multi_ret, int mirror_num)
+static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret,
+ int mirror_num, struct page *unplug_page)
{
struct extent_map *em;
struct map_lookup *map;
int stripes_required = 1;
int stripe_index;
int i;
+ int num_stripes;
struct btrfs_multi_bio *multi = NULL;
if (multi_ret && !(rw & (1 << BIO_RW))) {
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
spin_unlock(&em_tree->lock);
+
+ if (!em && unplug_page)
+ return 0;
+
if (!em) {
printk("unable to find logical %Lu\n", logical);
+ BUG();
}
- BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
} else {
*length = em->len - offset;
}
- if (!multi_ret)
+
+ if (!multi_ret && !unplug_page)
goto out;
- multi->num_stripes = 1;
+ num_stripes = 1;
stripe_index = 0;
if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (rw & (1 << BIO_RW))
- multi->num_stripes = map->num_stripes;
+ if (unplug_page || (rw & (1 << BIO_RW)))
+ num_stripes = map->num_stripes;
else if (mirror_num) {
stripe_index = mirror_num - 1;
} else {
- int i;
- u64 least = (u64)-1;
- struct btrfs_device *cur;
-
- for (i = 0; i < map->num_stripes; i++) {
- cur = map->stripes[i].dev;
- spin_lock(&cur->io_lock);
- if (cur->total_ios < least) {
- least = cur->total_ios;
- stripe_index = i;
- }
- spin_unlock(&cur->io_lock);
- }
+ u64 orig_stripe_nr = stripe_nr;
+ stripe_index = do_div(orig_stripe_nr, num_stripes);
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
if (rw & (1 << BIO_RW))
- multi->num_stripes = map->num_stripes;
+ num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
int factor = map->num_stripes / map->sub_stripes;
- int orig_stripe_nr = stripe_nr;
stripe_index = do_div(stripe_nr, factor);
stripe_index *= map->sub_stripes;
- if (rw & (1 << BIO_RW))
- multi->num_stripes = map->sub_stripes;
+ if (unplug_page || (rw & (1 << BIO_RW)))
+ num_stripes = map->sub_stripes;
else if (mirror_num)
stripe_index += mirror_num - 1;
- else
- stripe_index += orig_stripe_nr % map->sub_stripes;
+ else {
+ u64 orig_stripe_nr = stripe_nr;
+ stripe_index += do_div(orig_stripe_nr,
+ map->sub_stripes);
+ }
} else {
/*
* after this do_div call, stripe_nr is the number of stripes
}
BUG_ON(stripe_index >= map->num_stripes);
- for (i = 0; i < multi->num_stripes; i++) {
- multi->stripes[i].physical =
- map->stripes[stripe_index].physical + stripe_offset +
- stripe_nr * map->stripe_len;
- multi->stripes[i].dev = map->stripes[stripe_index].dev;
+ for (i = 0; i < num_stripes; i++) {
+ if (unplug_page) {
+ struct btrfs_device *device;
+ struct backing_dev_info *bdi;
+
+ device = map->stripes[stripe_index].dev;
+ bdi = blk_get_backing_dev_info(device->bdev);
+ if (bdi->unplug_io_fn) {
+ bdi->unplug_io_fn(bdi, unplug_page);
+ }
+ } else {
+ multi->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset + stripe_nr * map->stripe_len;
+ multi->stripes[i].dev = map->stripes[stripe_index].dev;
+ }
stripe_index++;
}
- *multi_ret = multi;
+ if (multi_ret) {
+ *multi_ret = multi;
+ multi->num_stripes = num_stripes;
+ }
out:
free_extent_map(em);
return 0;
}
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret, int mirror_num)
+{
+ return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
+ mirror_num, NULL);
+}
+
+int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
+ u64 logical, struct page *page)
+{
+ u64 length = PAGE_CACHE_SIZE;
+ return __btrfs_map_block(map_tree, READ, logical, &length,
+ NULL, 0, page);
+}
+
+
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_multi_stripe(struct bio *bio, int err)
#else
u64 logical = bio->bi_sector << 9;
u64 length = 0;
u64 map_length;
- struct bio_vec *bvec;
struct btrfs_multi_bio *multi = NULL;
- int i;
int ret;
int dev_nr = 0;
int total_devs = 1;
- bio_for_each_segment(bvec, bio, i) {
- length += bvec->bv_len;
- }
-
+ length = bio->bi_size;
map_tree = &root->fs_info->mapping_tree;
map_length = length;
}
bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
dev = multi->stripes[dev_nr].dev;
+
bio->bi_bdev = dev->bdev;
spin_lock(&dev->io_lock);
dev->total_ios++;
return -ENOMEM;
list_add(&device->dev_list,
&root->fs_info->fs_devices->devices);
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
device->barriers = 1;
spin_lock_init(&device->io_lock);
}