#include <linux/sched.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
+#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
#include "volumes.h"
struct map_lookup {
- struct btrfs_device *dev;
- u64 physical;
+ u64 type;
+ int io_align;
+ int io_width;
+ int stripe_len;
+ int sector_size;
+ int num_stripes;
+ int sub_stripes;
+ struct btrfs_bio_stripe stripes[];
};
+
+#define map_lookup_size(n) (sizeof(struct map_lookup) + \
+ (sizeof(struct btrfs_bio_stripe) * (n)))
+
static DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
devices_cur = fs_devices->devices.next;
dev = list_entry(devices_cur, struct btrfs_device,
dev_list);
- printk("uuid cleanup finds %s\n", dev->name);
if (dev->bdev) {
- printk("closing\n");
close_bdev_excl(dev->bdev);
}
list_del(&dev->dev_list);
return 0;
}
-static struct btrfs_device *__find_device(struct list_head *head, u64 devid)
+static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
+ u8 *uuid)
{
struct btrfs_device *dev;
struct list_head *cur;
list_for_each(cur, head) {
dev = list_entry(cur, struct btrfs_device, dev_list);
- if (dev->devid == devid)
+ if (dev->devid == devid &&
+ !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) {
return dev;
+ }
}
return NULL;
}
if (!fs_devices)
return -ENOMEM;
INIT_LIST_HEAD(&fs_devices->devices);
+ INIT_LIST_HEAD(&fs_devices->alloc_list);
list_add(&fs_devices->list, &fs_uuids);
memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
fs_devices->latest_devid = devid;
fs_devices->num_devices = 0;
device = NULL;
} else {
- device = __find_device(&fs_devices->devices, devid);
+ device = __find_device(&fs_devices->devices, devid,
+ disk_super->dev_item.uuid);
}
if (!device) {
device = kzalloc(sizeof(*device), GFP_NOFS);
return -ENOMEM;
}
device->devid = devid;
+ memcpy(device->uuid, disk_super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ device->barriers = 1;
+ spin_lock_init(&device->io_lock);
device->name = kstrdup(path, GFP_NOFS);
if (!device->name) {
kfree(device);
return -ENOMEM;
}
list_add(&device->dev_list, &fs_devices->devices);
+ list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
fs_devices->num_devices++;
}
}
if (fs_devices->lowest_devid > devid) {
fs_devices->lowest_devid = devid;
- printk("lowest devid now %Lu\n", devid);
}
*fs_devices_ret = fs_devices;
return 0;
device = list_entry(cur, struct btrfs_device, dev_list);
if (device->bdev) {
close_bdev_excl(device->bdev);
- printk("close devices closes %s\n", device->name);
}
device->bdev = NULL;
}
list_for_each(cur, head) {
device = list_entry(cur, struct btrfs_device, dev_list);
bdev = open_bdev_excl(device->name, flags, holder);
-printk("opening %s devid %Lu\n", device->name, device->devid);
+
if (IS_ERR(bdev)) {
printk("open %s failed\n", device->name);
ret = PTR_ERR(bdev);
fs_devices->latest_bdev = bdev;
if (device->devid == fs_devices->lowest_devid) {
fs_devices->lowest_bdev = bdev;
-printk("lowest bdev %s\n", device->name);
}
device->bdev = bdev;
}
struct buffer_head *bh;
int ret;
u64 devid;
+ u64 transid;
mutex_lock(&uuid_mutex);
- printk("scan one opens %s\n", path);
bdev = open_bdev_excl(path, flags, holder);
if (IS_ERR(bdev)) {
- printk("open failed\n");
ret = PTR_ERR(bdev);
goto error;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
sizeof(disk_super->magic))) {
- printk("no btrfs found on %s\n", path);
- ret = -ENOENT;
+ ret = -EINVAL;
goto error_brelse;
}
devid = le64_to_cpu(disk_super->dev_item.devid);
- printk("found device %Lu on %s\n", devid, path);
+ transid = btrfs_super_generation(disk_super);
+ if (disk_super->label[0])
+ printk("device label %s ", disk_super->label);
+ else {
+ /* FIXME, make a readl uuid parser */
+ printk("device fsid %llx-%llx ",
+ *(unsigned long long *)disk_super->fsid,
+ *(unsigned long long *)(disk_super->fsid + 8));
+ }
+ printk("devid %Lu transid %Lu %s\n", devid, transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
error_brelse:
brelse(bh);
error_close:
close_bdev_excl(bdev);
- printk("scan one closes bdev %s\n", path);
error:
mutex_unlock(&uuid_mutex);
return ret;
int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device,
- u64 owner, u64 num_bytes, u64 *start)
+ u64 chunk_tree, u64 chunk_objectid,
+ u64 chunk_offset,
+ u64 num_bytes, u64 *start)
{
int ret;
struct btrfs_path *path;
leaf = path->nodes[0];
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_dev_extent);
- btrfs_set_dev_extent_owner(leaf, extent, owner);
+ btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
+
+ write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
+ BTRFS_UUID_SIZE);
+
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
btrfs_mark_buffer_dirty(leaf);
err:
return ret;
}
-static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
+static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
{
struct btrfs_path *path;
int ret;
struct btrfs_key key;
+ struct btrfs_chunk *chunk;
struct btrfs_key found_key;
path = btrfs_alloc_path();
BUG_ON(!path);
- key.objectid = (u64)-1;
+ key.objectid = objectid;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
if (ret) {
- *objectid = 0;
+ *offset = 0;
} else {
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
- *objectid = found_key.objectid + found_key.offset;
+ if (found_key.objectid != objectid)
+ *offset = 0;
+ else {
+ chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_chunk);
+ *offset = found_key.offset +
+ btrfs_chunk_length(path->nodes[0], chunk);
+ }
}
ret = 0;
error:
btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+ btrfs_set_device_group(leaf, dev_item, 0);
+ btrfs_set_device_seek_speed(leaf, dev_item, 0);
+ btrfs_set_device_bandwidth(leaf, dev_item, 0);
ptr = (unsigned long)btrfs_device_uuid(dev_item);
- write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
+ write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
ret = 0;
return 0;
}
+static u64 div_factor(u64 num, int factor)
+{
+ if (factor == 10)
+ return num;
+ num *= factor;
+ do_div(num, 10);
+ return num;
+}
+
+static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
+ int sub_stripes)
+{
+ if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
+ return calc_size;
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
+ return calc_size * (num_stripes / sub_stripes);
+ else
+ return calc_size * num_stripes;
+}
+
+
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 *start,
u64 *num_bytes, u64 type)
{
u64 dev_offset;
+ struct btrfs_fs_info *info = extent_root->fs_info;
struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
struct btrfs_stripe *stripes;
struct btrfs_device *device = NULL;
struct btrfs_chunk *chunk;
struct list_head private_devs;
- struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices;
+ struct list_head *dev_list;
struct list_head *cur;
struct extent_map_tree *em_tree;
struct map_lookup *map;
struct extent_map *em;
+ int min_stripe_size = 1 * 1024 * 1024;
u64 physical;
u64 calc_size = 1024 * 1024 * 1024;
+ u64 max_chunk_size = calc_size;
+ u64 min_free;
u64 avail;
u64 max_avail = 0;
+ u64 percent_max;
int num_stripes = 1;
+ int min_stripes = 1;
+ int sub_stripes = 0;
int looped = 0;
int ret;
int index;
+ int stripe_len = 64 * 1024;
struct btrfs_key key;
+ dev_list = &extent_root->fs_info->fs_devices->alloc_list;
if (list_empty(dev_list))
return -ENOSPC;
+
+ if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
+ num_stripes = btrfs_super_num_devices(&info->super_copy);
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_DUP)) {
+ num_stripes = 2;
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
+ num_stripes = min_t(u64, 2,
+ btrfs_super_num_devices(&info->super_copy));
+ if (num_stripes < 2)
+ return -ENOSPC;
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
+ num_stripes = btrfs_super_num_devices(&info->super_copy);
+ if (num_stripes < 4)
+ return -ENOSPC;
+ num_stripes &= ~(u32)1;
+ sub_stripes = 2;
+ min_stripes = 4;
+ }
+
+ if (type & BTRFS_BLOCK_GROUP_DATA) {
+ max_chunk_size = 10 * calc_size;
+ min_stripe_size = 64 * 1024 * 1024;
+ } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+ max_chunk_size = 4 * calc_size;
+ min_stripe_size = 32 * 1024 * 1024;
+ } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+ calc_size = 8 * 1024 * 1024;
+ max_chunk_size = calc_size * 2;
+ min_stripe_size = 1 * 1024 * 1024;
+ }
+
+ /* we don't want a chunk larger than 10% of the FS */
+ percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
+ max_chunk_size = min(percent_max, max_chunk_size);
+
again:
+ if (calc_size * num_stripes > max_chunk_size) {
+ calc_size = max_chunk_size;
+ do_div(calc_size, num_stripes);
+ do_div(calc_size, stripe_len);
+ calc_size *= stripe_len;
+ }
+ /* we don't want tiny stripes */
+ calc_size = max_t(u64, min_stripe_size, calc_size);
+
+ do_div(calc_size, stripe_len);
+ calc_size *= stripe_len;
+
INIT_LIST_HEAD(&private_devs);
cur = dev_list->next;
index = 0;
+
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_free = calc_size * 2;
+ else
+ min_free = calc_size;
+
+ /* we add 1MB because we never use the first 1MB of the device */
+ min_free += 1024 * 1024;
+
/* build a private list of devices we will allocate from */
while(index < num_stripes) {
- device = list_entry(cur, struct btrfs_device, dev_list);
+ device = list_entry(cur, struct btrfs_device, dev_alloc_list);
+
avail = device->total_bytes - device->bytes_used;
cur = cur->next;
- if (avail > max_avail)
- max_avail = avail;
- if (avail >= calc_size) {
- list_move_tail(&device->dev_list, &private_devs);
+ if (avail >= min_free) {
+ list_move_tail(&device->dev_alloc_list, &private_devs);
index++;
- }
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ index++;
+ } else if (avail > max_avail)
+ max_avail = avail;
if (cur == dev_list)
break;
}
if (index < num_stripes) {
list_splice(&private_devs, dev_list);
+ if (index >= min_stripes) {
+ num_stripes = index;
+ if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
+ num_stripes /= sub_stripes;
+ num_stripes *= sub_stripes;
+ }
+ looped = 1;
+ goto again;
+ }
if (!looped && max_avail > 0) {
looped = 1;
calc_size = max_avail;
}
return -ENOSPC;
}
-
- ret = find_next_chunk(chunk_root, &key.objectid);
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ &key.offset);
if (ret)
return ret;
if (!chunk)
return -ENOMEM;
+ map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map) {
+ kfree(chunk);
+ return -ENOMEM;
+ }
+
stripes = &chunk->stripe;
+ *num_bytes = chunk_bytes_by_type(type, calc_size,
+ num_stripes, sub_stripes);
+
- *num_bytes = calc_size;
index = 0;
+printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes);
while(index < num_stripes) {
+ struct btrfs_stripe *stripe;
BUG_ON(list_empty(&private_devs));
cur = private_devs.next;
- device = list_entry(cur, struct btrfs_device, dev_list);
- list_move_tail(&device->dev_list, dev_list);
+ device = list_entry(cur, struct btrfs_device, dev_alloc_list);
+
+ /* loop over this device again if we're doing a dup group */
+ if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
+ (index == num_stripes - 1))
+ list_move_tail(&device->dev_alloc_list, dev_list);
ret = btrfs_alloc_dev_extent(trans, device,
- key.objectid,
- calc_size, &dev_offset);
+ info->chunk_root->root_key.objectid,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
+ calc_size, &dev_offset);
BUG_ON(ret);
-printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid);
+printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, calc_size, device->devid, type);
device->bytes_used += calc_size;
ret = btrfs_update_device(trans, device);
BUG_ON(ret);
- btrfs_set_stack_stripe_devid(stripes + index, device->devid);
- btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
+ map->stripes[index].dev = device;
+ map->stripes[index].physical = dev_offset;
+ stripe = stripes + index;
+ btrfs_set_stack_stripe_devid(stripe, device->devid);
+ btrfs_set_stack_stripe_offset(stripe, dev_offset);
+ memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
physical = dev_offset;
index++;
}
BUG_ON(!list_empty(&private_devs));
- /* key.objectid was set above */
- key.offset = *num_bytes;
- key.type = BTRFS_CHUNK_ITEM_KEY;
+ /* key was set above */
+ btrfs_set_stack_chunk_length(chunk, *num_bytes);
btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
- btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
+ btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
btrfs_set_stack_chunk_type(chunk, type);
btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
- btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
- btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
+ btrfs_set_stack_chunk_io_align(chunk, stripe_len);
+ btrfs_set_stack_chunk_io_width(chunk, stripe_len);
btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
+ btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
+ map->sector_size = extent_root->sectorsize;
+ map->stripe_len = stripe_len;
+ map->io_align = stripe_len;
+ map->io_width = stripe_len;
+ map->type = type;
+ map->num_stripes = num_stripes;
+ map->sub_stripes = sub_stripes;
ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
btrfs_chunk_item_size(num_stripes));
BUG_ON(ret);
- *start = key.objectid;
+ *start = key.offset;;
em = alloc_extent_map(GFP_NOFS);
if (!em)
return -ENOMEM;
- map = kmalloc(sizeof(*map), GFP_NOFS);
- if (!map) {
- free_extent_map(em);
- return -ENOMEM;
- }
-
em->bdev = (struct block_device *)map;
- em->start = key.objectid;
- em->len = key.offset;
+ em->start = key.offset;
+ em->len = *num_bytes;
em->block_start = 0;
- map->physical = physical;
- map->dev = device;
-
- if (!map->dev) {
- kfree(map);
- free_extent_map(em);
- return -EIO;
- }
kfree(chunk);
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
spin_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- BUG_ON(ret);
spin_unlock(&em_tree->lock);
+ BUG_ON(ret);
free_extent_map(em);
return ret;
}
}
}
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
- u64 logical, u64 *phys, u64 *length,
- struct btrfs_device **dev)
+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
+{
+ struct extent_map *em;
+ struct map_lookup *map;
+ struct extent_map_tree *em_tree = &map_tree->map_tree;
+ int ret;
+
+ spin_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, logical, len);
+ spin_unlock(&em_tree->lock);
+ BUG_ON(!em);
+
+ BUG_ON(em->start > logical || em->start + em->len < logical);
+ map = (struct map_lookup *)em->bdev;
+ if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
+ ret = map->num_stripes;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+ ret = map->sub_stripes;
+ else
+ ret = 1;
+ free_extent_map(em);
+ return ret;
+}
+
+static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret,
+ int mirror_num, struct page *unplug_page)
{
struct extent_map *em;
struct map_lookup *map;
struct extent_map_tree *em_tree = &map_tree->map_tree;
u64 offset;
+ u64 stripe_offset;
+ u64 stripe_nr;
+ int stripes_allocated = 8;
+ int stripes_required = 1;
+ int stripe_index;
+ int i;
+ int num_stripes;
+ struct btrfs_multi_bio *multi = NULL;
+ if (multi_ret && !(rw & (1 << BIO_RW))) {
+ stripes_allocated = 1;
+ }
+again:
+ if (multi_ret) {
+ multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
+ GFP_NOFS);
+ if (!multi)
+ return -ENOMEM;
+ }
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
- BUG_ON(!em);
+ spin_unlock(&em_tree->lock);
+
+ if (!em && unplug_page)
+ return 0;
+
+ if (!em) {
+ printk("unable to find logical %Lu\n", logical);
+ BUG();
+ }
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
offset = logical - em->start;
- *phys = map->physical + offset;
- *length = em->len - offset;
- *dev = map->dev;
+
+ if (mirror_num > map->num_stripes)
+ mirror_num = 0;
+
+ /* if our multi bio struct is too small, back off and try again */
+ if (rw & (1 << BIO_RW)) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ stripes_required = map->num_stripes;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ stripes_required = map->sub_stripes;
+ }
+ }
+ if (multi_ret && rw == WRITE &&
+ stripes_allocated < stripes_required) {
+ stripes_allocated = map->num_stripes;
+ free_extent_map(em);
+ kfree(multi);
+ goto again;
+ }
+ stripe_nr = offset;
+ /*
+ * stripe_nr counts the total number of stripes we have to stride
+ * to get to this block
+ */
+ do_div(stripe_nr, map->stripe_len);
+
+ stripe_offset = stripe_nr * map->stripe_len;
+ BUG_ON(offset < stripe_offset);
+
+ /* stripe_offset is the offset of this block in its stripe*/
+ stripe_offset = offset - stripe_offset;
+
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ /* we limit the length of each bio to what fits in a stripe */
+ *length = min_t(u64, em->len - offset,
+ map->stripe_len - stripe_offset);
+ } else {
+ *length = em->len - offset;
+ }
+
+ if (!multi_ret && !unplug_page)
+ goto out;
+
+ num_stripes = 1;
+ stripe_index = 0;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ if (unplug_page || (rw & (1 << BIO_RW)))
+ num_stripes = map->num_stripes;
+ else if (mirror_num) {
+ stripe_index = mirror_num - 1;
+ } else {
+ u64 orig_stripe_nr = stripe_nr;
+ stripe_index = do_div(orig_stripe_nr, num_stripes);
+ }
+ } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+ if (rw & (1 << BIO_RW))
+ num_stripes = map->num_stripes;
+ else if (mirror_num)
+ stripe_index = mirror_num - 1;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ int factor = map->num_stripes / map->sub_stripes;
+
+ stripe_index = do_div(stripe_nr, factor);
+ stripe_index *= map->sub_stripes;
+
+ if (unplug_page || (rw & (1 << BIO_RW)))
+ num_stripes = map->sub_stripes;
+ else if (mirror_num)
+ stripe_index += mirror_num - 1;
+ else {
+ u64 orig_stripe_nr = stripe_nr;
+ stripe_index += do_div(orig_stripe_nr,
+ map->sub_stripes);
+ }
+ } else {
+ /*
+ * after this do_div call, stripe_nr is the number of stripes
+ * on this device we have to walk to find the data, and
+ * stripe_index is the number of our device in the stripe array
+ */
+ stripe_index = do_div(stripe_nr, map->num_stripes);
+ }
+ BUG_ON(stripe_index >= map->num_stripes);
+
+ for (i = 0; i < num_stripes; i++) {
+ if (unplug_page) {
+ struct btrfs_device *device;
+ struct backing_dev_info *bdi;
+
+ device = map->stripes[stripe_index].dev;
+ bdi = blk_get_backing_dev_info(device->bdev);
+ if (bdi->unplug_io_fn) {
+ bdi->unplug_io_fn(bdi, unplug_page);
+ }
+ } else {
+ multi->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset + stripe_nr * map->stripe_len;
+ multi->stripes[i].dev = map->stripes[stripe_index].dev;
+ }
+ stripe_index++;
+ }
+ if (multi_ret) {
+ *multi_ret = multi;
+ multi->num_stripes = num_stripes;
+ }
+out:
free_extent_map(em);
- spin_unlock(&em_tree->lock);
return 0;
}
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret, int mirror_num)
+{
+ return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
+ mirror_num, NULL);
+}
+
+int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
+ u64 logical, struct page *page)
+{
+ u64 length = PAGE_CACHE_SIZE;
+ return __btrfs_map_block(map_tree, READ, logical, &length,
+ NULL, 0, page);
+}
+
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_multi_stripe(struct bio *bio, int err)
+#else
+static int end_bio_multi_stripe(struct bio *bio,
+ unsigned int bytes_done, int err)
+#endif
+{
+ struct btrfs_multi_bio *multi = bio->bi_private;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ if (bio->bi_size)
+ return 1;
+#endif
+ if (err)
+ multi->error = err;
+
+ if (atomic_dec_and_test(&multi->stripes_pending)) {
+ bio->bi_private = multi->private;
+ bio->bi_end_io = multi->end_io;
+
+ if (!err && multi->error)
+ err = multi->error;
+ kfree(multi);
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ bio_endio(bio, bio->bi_size, err);
+#else
+ bio_endio(bio, err);
+#endif
+ } else {
+ bio_put(bio);
+ }
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ return 0;
+#endif
+}
+
+int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+ int mirror_num)
{
struct btrfs_mapping_tree *map_tree;
struct btrfs_device *dev;
+ struct bio *first_bio = bio;
u64 logical = bio->bi_sector << 9;
- u64 physical;
u64 length = 0;
u64 map_length;
- struct bio_vec *bvec;
- int i;
+ struct btrfs_multi_bio *multi = NULL;
int ret;
+ int dev_nr = 0;
+ int total_devs = 1;
- bio_for_each_segment(bvec, bio, i) {
- length += bvec->bv_len;
- }
+ length = bio->bi_size;
map_tree = &root->fs_info->mapping_tree;
map_length = length;
- ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
+
+ ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
+ mirror_num);
+ BUG_ON(ret);
+
+ total_devs = multi->num_stripes;
if (map_length < length) {
- printk("mapping failed logical %Lu bio len %Lu physical %Lu "
- "len %Lu\n", logical, length, physical, map_length);
+ printk("mapping failed logical %Lu bio len %Lu "
+ "len %Lu\n", logical, length, map_length);
BUG();
}
- BUG_ON(map_length < length);
- bio->bi_sector = physical >> 9;
- bio->bi_bdev = dev->bdev;
- submit_bio(rw, bio);
+ multi->end_io = first_bio->bi_end_io;
+ multi->private = first_bio->bi_private;
+ atomic_set(&multi->stripes_pending, multi->num_stripes);
+
+ while(dev_nr < total_devs) {
+ if (total_devs > 1) {
+ if (dev_nr < total_devs - 1) {
+ bio = bio_clone(first_bio, GFP_NOFS);
+ BUG_ON(!bio);
+ } else {
+ bio = first_bio;
+ }
+ bio->bi_private = multi;
+ bio->bi_end_io = end_bio_multi_stripe;
+ }
+ bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
+ dev = multi->stripes[dev_nr].dev;
+
+ bio->bi_bdev = dev->bdev;
+ spin_lock(&dev->io_lock);
+ dev->total_ios++;
+ spin_unlock(&dev->io_lock);
+ submit_bio(rw, bio);
+ dev_nr++;
+ }
+ if (total_devs == 1)
+ kfree(multi);
return 0;
}
-struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
+ u8 *uuid)
{
struct list_head *head = &root->fs_info->fs_devices->devices;
- return __find_device(head, devid);
+ return __find_device(head, devid, uuid);
}
static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
u64 logical;
u64 length;
u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ int num_stripes;
int ret;
+ int i;
- logical = key->objectid;
- length = key->offset;
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
spin_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
+ spin_unlock(&map_tree->map_tree.lock);
/* already mapped? */
if (em && em->start <= logical && em->start + em->len > logical) {
free_extent_map(em);
- spin_unlock(&map_tree->map_tree.lock);
return 0;
} else if (em) {
free_extent_map(em);
}
- spin_unlock(&map_tree->map_tree.lock);
map = kzalloc(sizeof(*map), GFP_NOFS);
if (!map)
em = alloc_extent_map(GFP_NOFS);
if (!em)
return -ENOMEM;
- map = kmalloc(sizeof(*map), GFP_NOFS);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
return -ENOMEM;
em->len = length;
em->block_start = 0;
- map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
- devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
- map->dev = btrfs_find_device(root, devid);
- if (!map->dev) {
- kfree(map);
- free_extent_map(em);
- return -EIO;
+ map->num_stripes = num_stripes;
+ map->io_width = btrfs_chunk_io_width(leaf, chunk);
+ map->io_align = btrfs_chunk_io_align(leaf, chunk);
+ map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
+ map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ map->type = btrfs_chunk_type(leaf, chunk);
+ map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ for (i = 0; i < num_stripes; i++) {
+ map->stripes[i].physical =
+ btrfs_stripe_offset_nr(leaf, chunk, i);
+ devid = btrfs_stripe_devid_nr(leaf, chunk, i);
+ read_extent_buffer(leaf, uuid, (unsigned long)
+ btrfs_stripe_dev_uuid_nr(chunk, i),
+ BTRFS_UUID_SIZE);
+ map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
+ if (!map->stripes[i].dev) {
+ kfree(map);
+ free_extent_map(em);
+ return -EIO;
+ }
}
spin_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em);
- BUG_ON(ret);
spin_unlock(&map_tree->map_tree.lock);
+ BUG_ON(ret);
free_extent_map(em);
return 0;
device->sector_size = btrfs_device_sector_size(leaf, dev_item);
ptr = (unsigned long)btrfs_device_uuid(dev_item);
- read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
+ read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
return 0;
}
struct btrfs_device *device;
u64 devid;
int ret;
+ u8 dev_uuid[BTRFS_UUID_SIZE];
devid = btrfs_device_id(leaf, dev_item);
- device = btrfs_find_device(root, devid);
+ read_extent_buffer(leaf, dev_uuid,
+ (unsigned long)btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ device = btrfs_find_device(root, devid, dev_uuid);
if (!device) {
printk("warning devid %Lu not found already\n", devid);
- device = kmalloc(sizeof(*device), GFP_NOFS);
+ device = kzalloc(sizeof(*device), GFP_NOFS);
if (!device)
return -ENOMEM;
list_add(&device->dev_list,
&root->fs_info->fs_devices->devices);
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ device->barriers = 1;
+ spin_lock_init(&device->io_lock);
}
fill_device_from_item(leaf, dev_item, device);