2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <asm/div64.h>
23 #include "extent_map.h"
25 #include "transaction.h"
26 #include "print-tree.h"
37 struct btrfs_bio_stripe stripes
[];
40 #define map_lookup_size(n) (sizeof(struct map_lookup) + \
41 (sizeof(struct btrfs_bio_stripe) * (n)))
43 static DEFINE_MUTEX(uuid_mutex
);
44 static LIST_HEAD(fs_uuids
);
46 int btrfs_cleanup_fs_uuids(void)
48 struct btrfs_fs_devices
*fs_devices
;
49 struct list_head
*uuid_cur
;
50 struct list_head
*devices_cur
;
51 struct btrfs_device
*dev
;
53 list_for_each(uuid_cur
, &fs_uuids
) {
54 fs_devices
= list_entry(uuid_cur
, struct btrfs_fs_devices
,
56 while(!list_empty(&fs_devices
->devices
)) {
57 devices_cur
= fs_devices
->devices
.next
;
58 dev
= list_entry(devices_cur
, struct btrfs_device
,
60 printk("uuid cleanup finds %s\n", dev
->name
);
63 close_bdev_excl(dev
->bdev
);
65 list_del(&dev
->dev_list
);
72 static struct btrfs_device
*__find_device(struct list_head
*head
, u64 devid
)
74 struct btrfs_device
*dev
;
75 struct list_head
*cur
;
77 list_for_each(cur
, head
) {
78 dev
= list_entry(cur
, struct btrfs_device
, dev_list
);
79 if (dev
->devid
== devid
)
85 static struct btrfs_fs_devices
*find_fsid(u8
*fsid
)
87 struct list_head
*cur
;
88 struct btrfs_fs_devices
*fs_devices
;
90 list_for_each(cur
, &fs_uuids
) {
91 fs_devices
= list_entry(cur
, struct btrfs_fs_devices
, list
);
92 if (memcmp(fsid
, fs_devices
->fsid
, BTRFS_FSID_SIZE
) == 0)
98 static int device_list_add(const char *path
,
99 struct btrfs_super_block
*disk_super
,
100 u64 devid
, struct btrfs_fs_devices
**fs_devices_ret
)
102 struct btrfs_device
*device
;
103 struct btrfs_fs_devices
*fs_devices
;
104 u64 found_transid
= btrfs_super_generation(disk_super
);
106 fs_devices
= find_fsid(disk_super
->fsid
);
108 fs_devices
= kmalloc(sizeof(*fs_devices
), GFP_NOFS
);
111 INIT_LIST_HEAD(&fs_devices
->devices
);
112 list_add(&fs_devices
->list
, &fs_uuids
);
113 memcpy(fs_devices
->fsid
, disk_super
->fsid
, BTRFS_FSID_SIZE
);
114 fs_devices
->latest_devid
= devid
;
115 fs_devices
->latest_trans
= found_transid
;
116 fs_devices
->lowest_devid
= (u64
)-1;
117 fs_devices
->num_devices
= 0;
120 device
= __find_device(&fs_devices
->devices
, devid
);
123 device
= kzalloc(sizeof(*device
), GFP_NOFS
);
125 /* we can safely leave the fs_devices entry around */
128 device
->devid
= devid
;
129 device
->barriers
= 1;
130 spin_lock_init(&device
->io_lock
);
131 device
->name
= kstrdup(path
, GFP_NOFS
);
136 list_add(&device
->dev_list
, &fs_devices
->devices
);
137 fs_devices
->num_devices
++;
140 if (found_transid
> fs_devices
->latest_trans
) {
141 fs_devices
->latest_devid
= devid
;
142 fs_devices
->latest_trans
= found_transid
;
144 if (fs_devices
->lowest_devid
> devid
) {
145 fs_devices
->lowest_devid
= devid
;
146 printk("lowest devid now %Lu\n", devid
);
148 *fs_devices_ret
= fs_devices
;
152 int btrfs_close_devices(struct btrfs_fs_devices
*fs_devices
)
154 struct list_head
*head
= &fs_devices
->devices
;
155 struct list_head
*cur
;
156 struct btrfs_device
*device
;
158 mutex_lock(&uuid_mutex
);
159 list_for_each(cur
, head
) {
160 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
162 close_bdev_excl(device
->bdev
);
163 printk("close devices closes %s\n", device
->name
);
167 mutex_unlock(&uuid_mutex
);
171 int btrfs_open_devices(struct btrfs_fs_devices
*fs_devices
,
172 int flags
, void *holder
)
174 struct block_device
*bdev
;
175 struct list_head
*head
= &fs_devices
->devices
;
176 struct list_head
*cur
;
177 struct btrfs_device
*device
;
180 mutex_lock(&uuid_mutex
);
181 list_for_each(cur
, head
) {
182 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
183 bdev
= open_bdev_excl(device
->name
, flags
, holder
);
186 printk("open %s failed\n", device
->name
);
190 if (device
->devid
== fs_devices
->latest_devid
)
191 fs_devices
->latest_bdev
= bdev
;
192 if (device
->devid
== fs_devices
->lowest_devid
) {
193 fs_devices
->lowest_bdev
= bdev
;
197 mutex_unlock(&uuid_mutex
);
200 mutex_unlock(&uuid_mutex
);
201 btrfs_close_devices(fs_devices
);
205 int btrfs_scan_one_device(const char *path
, int flags
, void *holder
,
206 struct btrfs_fs_devices
**fs_devices_ret
)
208 struct btrfs_super_block
*disk_super
;
209 struct block_device
*bdev
;
210 struct buffer_head
*bh
;
215 mutex_lock(&uuid_mutex
);
217 printk("scan one opens %s\n", path
);
218 bdev
= open_bdev_excl(path
, flags
, holder
);
221 printk("open failed\n");
226 ret
= set_blocksize(bdev
, 4096);
229 bh
= __bread(bdev
, BTRFS_SUPER_INFO_OFFSET
/ 4096, 4096);
234 disk_super
= (struct btrfs_super_block
*)bh
->b_data
;
235 if (strncmp((char *)(&disk_super
->magic
), BTRFS_MAGIC
,
236 sizeof(disk_super
->magic
))) {
237 printk("no btrfs found on %s\n", path
);
241 devid
= le64_to_cpu(disk_super
->dev_item
.devid
);
242 transid
= btrfs_super_generation(disk_super
);
243 printk("found device %Lu transid %Lu on %s\n", devid
, transid
, path
);
244 ret
= device_list_add(path
, disk_super
, devid
, fs_devices_ret
);
249 close_bdev_excl(bdev
);
251 mutex_unlock(&uuid_mutex
);
256 * this uses a pretty simple search, the expectation is that it is
257 * called very infrequently and that a given device has a small number
260 static int find_free_dev_extent(struct btrfs_trans_handle
*trans
,
261 struct btrfs_device
*device
,
262 struct btrfs_path
*path
,
263 u64 num_bytes
, u64
*start
)
265 struct btrfs_key key
;
266 struct btrfs_root
*root
= device
->dev_root
;
267 struct btrfs_dev_extent
*dev_extent
= NULL
;
270 u64 search_start
= 0;
271 u64 search_end
= device
->total_bytes
;
275 struct extent_buffer
*l
;
280 /* FIXME use last free of some kind */
282 /* we don't want to overwrite the superblock on the drive,
283 * so we make sure to start at an offset of at least 1MB
285 search_start
= max((u64
)1024 * 1024, search_start
);
286 key
.objectid
= device
->devid
;
287 key
.offset
= search_start
;
288 key
.type
= BTRFS_DEV_EXTENT_KEY
;
289 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 0);
292 ret
= btrfs_previous_item(root
, path
, 0, key
.type
);
296 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
299 slot
= path
->slots
[0];
300 if (slot
>= btrfs_header_nritems(l
)) {
301 ret
= btrfs_next_leaf(root
, path
);
308 if (search_start
>= search_end
) {
312 *start
= search_start
;
316 *start
= last_byte
> search_start
?
317 last_byte
: search_start
;
318 if (search_end
<= *start
) {
324 btrfs_item_key_to_cpu(l
, &key
, slot
);
326 if (key
.objectid
< device
->devid
)
329 if (key
.objectid
> device
->devid
)
332 if (key
.offset
>= search_start
&& key
.offset
> last_byte
&&
334 if (last_byte
< search_start
)
335 last_byte
= search_start
;
336 hole_size
= key
.offset
- last_byte
;
337 if (key
.offset
> last_byte
&&
338 hole_size
>= num_bytes
) {
343 if (btrfs_key_type(&key
) != BTRFS_DEV_EXTENT_KEY
) {
348 dev_extent
= btrfs_item_ptr(l
, slot
, struct btrfs_dev_extent
);
349 last_byte
= key
.offset
+ btrfs_dev_extent_length(l
, dev_extent
);
355 /* we have to make sure we didn't find an extent that has already
356 * been allocated by the map tree or the original allocation
358 btrfs_release_path(root
, path
);
359 BUG_ON(*start
< search_start
);
361 if (*start
+ num_bytes
> search_end
) {
365 /* check for pending inserts here */
369 btrfs_release_path(root
, path
);
373 int btrfs_alloc_dev_extent(struct btrfs_trans_handle
*trans
,
374 struct btrfs_device
*device
,
375 u64 chunk_tree
, u64 chunk_objectid
,
377 u64 num_bytes
, u64
*start
)
380 struct btrfs_path
*path
;
381 struct btrfs_root
*root
= device
->dev_root
;
382 struct btrfs_dev_extent
*extent
;
383 struct extent_buffer
*leaf
;
384 struct btrfs_key key
;
386 path
= btrfs_alloc_path();
390 ret
= find_free_dev_extent(trans
, device
, path
, num_bytes
, start
);
395 key
.objectid
= device
->devid
;
397 key
.type
= BTRFS_DEV_EXTENT_KEY
;
398 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
402 leaf
= path
->nodes
[0];
403 extent
= btrfs_item_ptr(leaf
, path
->slots
[0],
404 struct btrfs_dev_extent
);
405 btrfs_set_dev_extent_chunk_tree(leaf
, extent
, chunk_tree
);
406 btrfs_set_dev_extent_chunk_objectid(leaf
, extent
, chunk_objectid
);
407 btrfs_set_dev_extent_chunk_offset(leaf
, extent
, chunk_offset
);
409 write_extent_buffer(leaf
, root
->fs_info
->chunk_tree_uuid
,
410 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent
),
413 btrfs_set_dev_extent_length(leaf
, extent
, num_bytes
);
414 btrfs_mark_buffer_dirty(leaf
);
416 btrfs_free_path(path
);
420 static int find_next_chunk(struct btrfs_root
*root
, u64 objectid
, u64
*offset
)
422 struct btrfs_path
*path
;
424 struct btrfs_key key
;
425 struct btrfs_chunk
*chunk
;
426 struct btrfs_key found_key
;
428 path
= btrfs_alloc_path();
431 key
.objectid
= objectid
;
432 key
.offset
= (u64
)-1;
433 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
435 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
441 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_CHUNK_ITEM_KEY
);
445 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
447 if (found_key
.objectid
!= objectid
)
450 chunk
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
452 *offset
= found_key
.offset
+
453 btrfs_chunk_length(path
->nodes
[0], chunk
);
458 btrfs_free_path(path
);
462 static int find_next_devid(struct btrfs_root
*root
, struct btrfs_path
*path
,
466 struct btrfs_key key
;
467 struct btrfs_key found_key
;
469 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
470 key
.type
= BTRFS_DEV_ITEM_KEY
;
471 key
.offset
= (u64
)-1;
473 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
479 ret
= btrfs_previous_item(root
, path
, BTRFS_DEV_ITEMS_OBJECTID
,
484 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
486 *objectid
= found_key
.offset
+ 1;
490 btrfs_release_path(root
, path
);
495 * the device information is stored in the chunk root
496 * the btrfs_device struct should be fully filled in
498 int btrfs_add_device(struct btrfs_trans_handle
*trans
,
499 struct btrfs_root
*root
,
500 struct btrfs_device
*device
)
503 struct btrfs_path
*path
;
504 struct btrfs_dev_item
*dev_item
;
505 struct extent_buffer
*leaf
;
506 struct btrfs_key key
;
510 root
= root
->fs_info
->chunk_root
;
512 path
= btrfs_alloc_path();
516 ret
= find_next_devid(root
, path
, &free_devid
);
520 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
521 key
.type
= BTRFS_DEV_ITEM_KEY
;
522 key
.offset
= free_devid
;
524 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
529 leaf
= path
->nodes
[0];
530 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
532 device
->devid
= free_devid
;
533 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
534 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
535 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
536 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
537 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
538 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
539 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
540 btrfs_set_device_group(leaf
, dev_item
, 0);
541 btrfs_set_device_seek_speed(leaf
, dev_item
, 0);
542 btrfs_set_device_bandwidth(leaf
, dev_item
, 0);
544 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
545 write_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_UUID_SIZE
);
546 btrfs_mark_buffer_dirty(leaf
);
550 btrfs_free_path(path
);
553 int btrfs_update_device(struct btrfs_trans_handle
*trans
,
554 struct btrfs_device
*device
)
557 struct btrfs_path
*path
;
558 struct btrfs_root
*root
;
559 struct btrfs_dev_item
*dev_item
;
560 struct extent_buffer
*leaf
;
561 struct btrfs_key key
;
563 root
= device
->dev_root
->fs_info
->chunk_root
;
565 path
= btrfs_alloc_path();
569 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
570 key
.type
= BTRFS_DEV_ITEM_KEY
;
571 key
.offset
= device
->devid
;
573 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
582 leaf
= path
->nodes
[0];
583 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
585 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
586 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
587 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
588 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
589 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
590 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
591 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
592 btrfs_mark_buffer_dirty(leaf
);
595 btrfs_free_path(path
);
599 int btrfs_add_system_chunk(struct btrfs_trans_handle
*trans
,
600 struct btrfs_root
*root
,
601 struct btrfs_key
*key
,
602 struct btrfs_chunk
*chunk
, int item_size
)
604 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
605 struct btrfs_disk_key disk_key
;
609 array_size
= btrfs_super_sys_array_size(super_copy
);
610 if (array_size
+ item_size
> BTRFS_SYSTEM_CHUNK_ARRAY_SIZE
)
613 ptr
= super_copy
->sys_chunk_array
+ array_size
;
614 btrfs_cpu_key_to_disk(&disk_key
, key
);
615 memcpy(ptr
, &disk_key
, sizeof(disk_key
));
616 ptr
+= sizeof(disk_key
);
617 memcpy(ptr
, chunk
, item_size
);
618 item_size
+= sizeof(disk_key
);
619 btrfs_set_super_sys_array_size(super_copy
, array_size
+ item_size
);
623 int btrfs_alloc_chunk(struct btrfs_trans_handle
*trans
,
624 struct btrfs_root
*extent_root
, u64
*start
,
625 u64
*num_bytes
, u64 type
)
628 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
629 struct btrfs_root
*chunk_root
= extent_root
->fs_info
->chunk_root
;
630 struct btrfs_stripe
*stripes
;
631 struct btrfs_device
*device
= NULL
;
632 struct btrfs_chunk
*chunk
;
633 struct list_head private_devs
;
634 struct list_head
*dev_list
= &extent_root
->fs_info
->fs_devices
->devices
;
635 struct list_head
*cur
;
636 struct extent_map_tree
*em_tree
;
637 struct map_lookup
*map
;
638 struct extent_map
*em
;
640 u64 calc_size
= 1024 * 1024 * 1024;
641 u64 min_free
= calc_size
;
649 int stripe_len
= 64 * 1024;
650 struct btrfs_key key
;
652 if (list_empty(dev_list
))
655 if (type
& (BTRFS_BLOCK_GROUP_RAID0
))
656 num_stripes
= btrfs_super_num_devices(&info
->super_copy
);
657 if (type
& (BTRFS_BLOCK_GROUP_DUP
))
659 if (type
& (BTRFS_BLOCK_GROUP_RAID1
)) {
660 num_stripes
= min_t(u64
, 2,
661 btrfs_super_num_devices(&info
->super_copy
));
663 if (type
& (BTRFS_BLOCK_GROUP_RAID10
)) {
664 num_stripes
= btrfs_super_num_devices(&info
->super_copy
);
667 num_stripes
&= ~(u32
)1;
671 INIT_LIST_HEAD(&private_devs
);
672 cur
= dev_list
->next
;
675 if (type
& BTRFS_BLOCK_GROUP_DUP
)
676 min_free
= calc_size
* 2;
678 /* build a private list of devices we will allocate from */
679 while(index
< num_stripes
) {
680 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
682 avail
= device
->total_bytes
- device
->bytes_used
;
684 if (avail
> max_avail
)
686 if (avail
>= min_free
) {
687 list_move_tail(&device
->dev_list
, &private_devs
);
689 if (type
& BTRFS_BLOCK_GROUP_DUP
)
695 if (index
< num_stripes
) {
696 list_splice(&private_devs
, dev_list
);
697 if (!looped
&& max_avail
> 0) {
699 calc_size
= max_avail
;
705 key
.objectid
= BTRFS_FIRST_CHUNK_TREE_OBJECTID
;
706 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
707 ret
= find_next_chunk(chunk_root
, BTRFS_FIRST_CHUNK_TREE_OBJECTID
,
712 chunk
= kmalloc(btrfs_chunk_item_size(num_stripes
), GFP_NOFS
);
716 map
= kmalloc(map_lookup_size(num_stripes
), GFP_NOFS
);
722 stripes
= &chunk
->stripe
;
724 if (type
& (BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_DUP
))
725 *num_bytes
= calc_size
;
726 else if (type
& BTRFS_BLOCK_GROUP_RAID10
)
727 *num_bytes
= calc_size
* num_stripes
/ sub_stripes
;
729 *num_bytes
= calc_size
* num_stripes
;
732 printk("new chunk type %Lu start %Lu size %Lu\n", type
, key
.offset
, *num_bytes
);
733 while(index
< num_stripes
) {
734 struct btrfs_stripe
*stripe
;
735 BUG_ON(list_empty(&private_devs
));
736 cur
= private_devs
.next
;
737 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
739 /* loop over this device again if we're doing a dup group */
740 if (!(type
& BTRFS_BLOCK_GROUP_DUP
) ||
741 (index
== num_stripes
- 1))
742 list_move_tail(&device
->dev_list
, dev_list
);
744 ret
= btrfs_alloc_dev_extent(trans
, device
,
745 info
->chunk_root
->root_key
.objectid
,
746 BTRFS_FIRST_CHUNK_TREE_OBJECTID
, key
.offset
,
747 calc_size
, &dev_offset
);
749 printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key
.offset
, calc_size
, device
->devid
, type
);
750 device
->bytes_used
+= calc_size
;
751 ret
= btrfs_update_device(trans
, device
);
754 map
->stripes
[index
].dev
= device
;
755 map
->stripes
[index
].physical
= dev_offset
;
756 stripe
= stripes
+ index
;
757 btrfs_set_stack_stripe_devid(stripe
, device
->devid
);
758 btrfs_set_stack_stripe_offset(stripe
, dev_offset
);
759 memcpy(stripe
->dev_uuid
, device
->uuid
, BTRFS_UUID_SIZE
);
760 physical
= dev_offset
;
763 BUG_ON(!list_empty(&private_devs
));
765 /* key was set above */
766 btrfs_set_stack_chunk_length(chunk
, *num_bytes
);
767 btrfs_set_stack_chunk_owner(chunk
, extent_root
->root_key
.objectid
);
768 btrfs_set_stack_chunk_stripe_len(chunk
, stripe_len
);
769 btrfs_set_stack_chunk_type(chunk
, type
);
770 btrfs_set_stack_chunk_num_stripes(chunk
, num_stripes
);
771 btrfs_set_stack_chunk_io_align(chunk
, stripe_len
);
772 btrfs_set_stack_chunk_io_width(chunk
, stripe_len
);
773 btrfs_set_stack_chunk_sector_size(chunk
, extent_root
->sectorsize
);
774 btrfs_set_stack_chunk_sub_stripes(chunk
, sub_stripes
);
775 map
->sector_size
= extent_root
->sectorsize
;
776 map
->stripe_len
= stripe_len
;
777 map
->io_align
= stripe_len
;
778 map
->io_width
= stripe_len
;
780 map
->num_stripes
= num_stripes
;
781 map
->sub_stripes
= sub_stripes
;
783 ret
= btrfs_insert_item(trans
, chunk_root
, &key
, chunk
,
784 btrfs_chunk_item_size(num_stripes
));
786 *start
= key
.offset
;;
788 em
= alloc_extent_map(GFP_NOFS
);
791 em
->bdev
= (struct block_device
*)map
;
792 em
->start
= key
.offset
;
793 em
->len
= *num_bytes
;
798 em_tree
= &extent_root
->fs_info
->mapping_tree
.map_tree
;
799 spin_lock(&em_tree
->lock
);
800 ret
= add_extent_mapping(em_tree
, em
);
801 spin_unlock(&em_tree
->lock
);
807 void btrfs_mapping_init(struct btrfs_mapping_tree
*tree
)
809 extent_map_tree_init(&tree
->map_tree
, GFP_NOFS
);
812 void btrfs_mapping_tree_free(struct btrfs_mapping_tree
*tree
)
814 struct extent_map
*em
;
817 spin_lock(&tree
->map_tree
.lock
);
818 em
= lookup_extent_mapping(&tree
->map_tree
, 0, (u64
)-1);
820 remove_extent_mapping(&tree
->map_tree
, em
);
821 spin_unlock(&tree
->map_tree
.lock
);
827 /* once for the tree */
832 int btrfs_num_copies(struct btrfs_mapping_tree
*map_tree
, u64 logical
, u64 len
)
834 struct extent_map
*em
;
835 struct map_lookup
*map
;
836 struct extent_map_tree
*em_tree
= &map_tree
->map_tree
;
839 spin_lock(&em_tree
->lock
);
840 em
= lookup_extent_mapping(em_tree
, logical
, len
);
841 spin_unlock(&em_tree
->lock
);
844 BUG_ON(em
->start
> logical
|| em
->start
+ em
->len
< logical
);
845 map
= (struct map_lookup
*)em
->bdev
;
846 if (map
->type
& (BTRFS_BLOCK_GROUP_DUP
| BTRFS_BLOCK_GROUP_RAID1
))
847 ret
= map
->num_stripes
;
848 else if (map
->type
& BTRFS_BLOCK_GROUP_RAID10
)
849 ret
= map
->sub_stripes
;
856 int btrfs_map_block(struct btrfs_mapping_tree
*map_tree
, int rw
,
857 u64 logical
, u64
*length
,
858 struct btrfs_multi_bio
**multi_ret
, int mirror_num
)
860 struct extent_map
*em
;
861 struct map_lookup
*map
;
862 struct extent_map_tree
*em_tree
= &map_tree
->map_tree
;
866 int stripes_allocated
= 8;
867 int stripes_required
= 1;
870 struct btrfs_multi_bio
*multi
= NULL
;
872 if (multi_ret
&& !(rw
& (1 << BIO_RW
))) {
873 stripes_allocated
= 1;
877 multi
= kzalloc(btrfs_multi_bio_size(stripes_allocated
),
883 spin_lock(&em_tree
->lock
);
884 em
= lookup_extent_mapping(em_tree
, logical
, *length
);
885 spin_unlock(&em_tree
->lock
);
888 BUG_ON(em
->start
> logical
|| em
->start
+ em
->len
< logical
);
889 map
= (struct map_lookup
*)em
->bdev
;
890 offset
= logical
- em
->start
;
892 if (mirror_num
> map
->num_stripes
)
895 /* if our multi bio struct is too small, back off and try again */
896 if (rw
& (1 << BIO_RW
)) {
897 if (map
->type
& (BTRFS_BLOCK_GROUP_RAID1
|
898 BTRFS_BLOCK_GROUP_DUP
)) {
899 stripes_required
= map
->num_stripes
;
900 } else if (map
->type
& BTRFS_BLOCK_GROUP_RAID10
) {
901 stripes_required
= map
->sub_stripes
;
904 if (multi_ret
&& rw
== WRITE
&&
905 stripes_allocated
< stripes_required
) {
906 stripes_allocated
= map
->num_stripes
;
913 * stripe_nr counts the total number of stripes we have to stride
914 * to get to this block
916 do_div(stripe_nr
, map
->stripe_len
);
918 stripe_offset
= stripe_nr
* map
->stripe_len
;
919 BUG_ON(offset
< stripe_offset
);
921 /* stripe_offset is the offset of this block in its stripe*/
922 stripe_offset
= offset
- stripe_offset
;
924 if (map
->type
& (BTRFS_BLOCK_GROUP_RAID0
| BTRFS_BLOCK_GROUP_RAID1
|
925 BTRFS_BLOCK_GROUP_RAID10
|
926 BTRFS_BLOCK_GROUP_DUP
)) {
927 /* we limit the length of each bio to what fits in a stripe */
928 *length
= min_t(u64
, em
->len
- offset
,
929 map
->stripe_len
- stripe_offset
);
931 *length
= em
->len
- offset
;
936 multi
->num_stripes
= 1;
938 if (map
->type
& BTRFS_BLOCK_GROUP_RAID1
) {
939 if (rw
& (1 << BIO_RW
))
940 multi
->num_stripes
= map
->num_stripes
;
941 else if (mirror_num
) {
942 stripe_index
= mirror_num
- 1;
946 struct btrfs_device
*cur
;
948 for (i
= 0; i
< map
->num_stripes
; i
++) {
949 cur
= map
->stripes
[i
].dev
;
950 spin_lock(&cur
->io_lock
);
951 if (cur
->total_ios
< least
) {
952 least
= cur
->total_ios
;
955 spin_unlock(&cur
->io_lock
);
958 } else if (map
->type
& BTRFS_BLOCK_GROUP_DUP
) {
959 if (rw
& (1 << BIO_RW
))
960 multi
->num_stripes
= map
->num_stripes
;
962 stripe_index
= mirror_num
- 1;
963 } else if (map
->type
& BTRFS_BLOCK_GROUP_RAID10
) {
964 int factor
= map
->num_stripes
/ map
->sub_stripes
;
965 int orig_stripe_nr
= stripe_nr
;
967 stripe_index
= do_div(stripe_nr
, factor
);
968 stripe_index
*= map
->sub_stripes
;
970 if (rw
& (1 << BIO_RW
))
971 multi
->num_stripes
= map
->sub_stripes
;
973 stripe_index
+= mirror_num
- 1;
975 stripe_index
+= orig_stripe_nr
% map
->sub_stripes
;
978 * after this do_div call, stripe_nr is the number of stripes
979 * on this device we have to walk to find the data, and
980 * stripe_index is the number of our device in the stripe array
982 stripe_index
= do_div(stripe_nr
, map
->num_stripes
);
984 BUG_ON(stripe_index
>= map
->num_stripes
);
986 for (i
= 0; i
< multi
->num_stripes
; i
++) {
987 multi
->stripes
[i
].physical
=
988 map
->stripes
[stripe_index
].physical
+ stripe_offset
+
989 stripe_nr
* map
->stripe_len
;
990 multi
->stripes
[i
].dev
= map
->stripes
[stripe_index
].dev
;
999 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1000 static void end_bio_multi_stripe(struct bio
*bio
, int err
)
1002 static int end_bio_multi_stripe(struct bio
*bio
,
1003 unsigned int bytes_done
, int err
)
1006 struct btrfs_multi_bio
*multi
= bio
->bi_private
;
1008 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1015 if (atomic_dec_and_test(&multi
->stripes_pending
)) {
1016 bio
->bi_private
= multi
->private;
1017 bio
->bi_end_io
= multi
->end_io
;
1019 if (!err
&& multi
->error
)
1023 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1024 bio_endio(bio
, bio
->bi_size
, err
);
1026 bio_endio(bio
, err
);
1031 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1036 int btrfs_map_bio(struct btrfs_root
*root
, int rw
, struct bio
*bio
,
1039 struct btrfs_mapping_tree
*map_tree
;
1040 struct btrfs_device
*dev
;
1041 struct bio
*first_bio
= bio
;
1042 u64 logical
= bio
->bi_sector
<< 9;
1045 struct bio_vec
*bvec
;
1046 struct btrfs_multi_bio
*multi
= NULL
;
1052 bio_for_each_segment(bvec
, bio
, i
) {
1053 length
+= bvec
->bv_len
;
1056 map_tree
= &root
->fs_info
->mapping_tree
;
1057 map_length
= length
;
1059 ret
= btrfs_map_block(map_tree
, rw
, logical
, &map_length
, &multi
,
1063 total_devs
= multi
->num_stripes
;
1064 if (map_length
< length
) {
1065 printk("mapping failed logical %Lu bio len %Lu "
1066 "len %Lu\n", logical
, length
, map_length
);
1069 multi
->end_io
= first_bio
->bi_end_io
;
1070 multi
->private = first_bio
->bi_private
;
1071 atomic_set(&multi
->stripes_pending
, multi
->num_stripes
);
1073 while(dev_nr
< total_devs
) {
1074 if (total_devs
> 1) {
1075 if (dev_nr
< total_devs
- 1) {
1076 bio
= bio_clone(first_bio
, GFP_NOFS
);
1081 bio
->bi_private
= multi
;
1082 bio
->bi_end_io
= end_bio_multi_stripe
;
1084 bio
->bi_sector
= multi
->stripes
[dev_nr
].physical
>> 9;
1085 dev
= multi
->stripes
[dev_nr
].dev
;
1086 bio
->bi_bdev
= dev
->bdev
;
1087 spin_lock(&dev
->io_lock
);
1089 spin_unlock(&dev
->io_lock
);
1090 submit_bio(rw
, bio
);
1093 if (total_devs
== 1)
1098 struct btrfs_device
*btrfs_find_device(struct btrfs_root
*root
, u64 devid
)
1100 struct list_head
*head
= &root
->fs_info
->fs_devices
->devices
;
1102 return __find_device(head
, devid
);
1105 static int read_one_chunk(struct btrfs_root
*root
, struct btrfs_key
*key
,
1106 struct extent_buffer
*leaf
,
1107 struct btrfs_chunk
*chunk
)
1109 struct btrfs_mapping_tree
*map_tree
= &root
->fs_info
->mapping_tree
;
1110 struct map_lookup
*map
;
1111 struct extent_map
*em
;
1119 logical
= key
->offset
;
1120 length
= btrfs_chunk_length(leaf
, chunk
);
1121 spin_lock(&map_tree
->map_tree
.lock
);
1122 em
= lookup_extent_mapping(&map_tree
->map_tree
, logical
, 1);
1123 spin_unlock(&map_tree
->map_tree
.lock
);
1125 /* already mapped? */
1126 if (em
&& em
->start
<= logical
&& em
->start
+ em
->len
> logical
) {
1127 free_extent_map(em
);
1130 free_extent_map(em
);
1133 map
= kzalloc(sizeof(*map
), GFP_NOFS
);
1137 em
= alloc_extent_map(GFP_NOFS
);
1140 num_stripes
= btrfs_chunk_num_stripes(leaf
, chunk
);
1141 map
= kmalloc(map_lookup_size(num_stripes
), GFP_NOFS
);
1143 free_extent_map(em
);
1147 em
->bdev
= (struct block_device
*)map
;
1148 em
->start
= logical
;
1150 em
->block_start
= 0;
1152 map
->num_stripes
= num_stripes
;
1153 map
->io_width
= btrfs_chunk_io_width(leaf
, chunk
);
1154 map
->io_align
= btrfs_chunk_io_align(leaf
, chunk
);
1155 map
->sector_size
= btrfs_chunk_sector_size(leaf
, chunk
);
1156 map
->stripe_len
= btrfs_chunk_stripe_len(leaf
, chunk
);
1157 map
->type
= btrfs_chunk_type(leaf
, chunk
);
1158 map
->sub_stripes
= btrfs_chunk_sub_stripes(leaf
, chunk
);
1159 for (i
= 0; i
< num_stripes
; i
++) {
1160 map
->stripes
[i
].physical
=
1161 btrfs_stripe_offset_nr(leaf
, chunk
, i
);
1162 devid
= btrfs_stripe_devid_nr(leaf
, chunk
, i
);
1163 map
->stripes
[i
].dev
= btrfs_find_device(root
, devid
);
1164 if (!map
->stripes
[i
].dev
) {
1166 free_extent_map(em
);
1171 spin_lock(&map_tree
->map_tree
.lock
);
1172 ret
= add_extent_mapping(&map_tree
->map_tree
, em
);
1173 spin_unlock(&map_tree
->map_tree
.lock
);
1175 free_extent_map(em
);
1180 static int fill_device_from_item(struct extent_buffer
*leaf
,
1181 struct btrfs_dev_item
*dev_item
,
1182 struct btrfs_device
*device
)
1186 device
->devid
= btrfs_device_id(leaf
, dev_item
);
1187 device
->total_bytes
= btrfs_device_total_bytes(leaf
, dev_item
);
1188 device
->bytes_used
= btrfs_device_bytes_used(leaf
, dev_item
);
1189 device
->type
= btrfs_device_type(leaf
, dev_item
);
1190 device
->io_align
= btrfs_device_io_align(leaf
, dev_item
);
1191 device
->io_width
= btrfs_device_io_width(leaf
, dev_item
);
1192 device
->sector_size
= btrfs_device_sector_size(leaf
, dev_item
);
1194 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
1195 read_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_UUID_SIZE
);
1200 static int read_one_dev(struct btrfs_root
*root
,
1201 struct extent_buffer
*leaf
,
1202 struct btrfs_dev_item
*dev_item
)
1204 struct btrfs_device
*device
;
1207 devid
= btrfs_device_id(leaf
, dev_item
);
1208 device
= btrfs_find_device(root
, devid
);
1210 printk("warning devid %Lu not found already\n", devid
);
1211 device
= kzalloc(sizeof(*device
), GFP_NOFS
);
1214 list_add(&device
->dev_list
,
1215 &root
->fs_info
->fs_devices
->devices
);
1216 device
->barriers
= 1;
1217 spin_lock_init(&device
->io_lock
);
1220 fill_device_from_item(leaf
, dev_item
, device
);
1221 device
->dev_root
= root
->fs_info
->dev_root
;
1224 ret
= btrfs_open_device(device
);
1232 int btrfs_read_super_device(struct btrfs_root
*root
, struct extent_buffer
*buf
)
1234 struct btrfs_dev_item
*dev_item
;
1236 dev_item
= (struct btrfs_dev_item
*)offsetof(struct btrfs_super_block
,
1238 return read_one_dev(root
, buf
, dev_item
);
1241 int btrfs_read_sys_array(struct btrfs_root
*root
)
1243 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
1244 struct extent_buffer
*sb
= root
->fs_info
->sb_buffer
;
1245 struct btrfs_disk_key
*disk_key
;
1246 struct btrfs_chunk
*chunk
;
1247 struct btrfs_key key
;
1252 unsigned long sb_ptr
;
1256 array_size
= btrfs_super_sys_array_size(super_copy
);
1259 * we do this loop twice, once for the device items and
1260 * once for all of the chunks. This way there are device
1261 * structs filled in for every chunk
1263 ptr
= super_copy
->sys_chunk_array
;
1264 sb_ptr
= offsetof(struct btrfs_super_block
, sys_chunk_array
);
1267 while (cur
< array_size
) {
1268 disk_key
= (struct btrfs_disk_key
*)ptr
;
1269 btrfs_disk_key_to_cpu(&key
, disk_key
);
1271 len
= sizeof(*disk_key
);
1276 if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
1277 chunk
= (struct btrfs_chunk
*)sb_ptr
;
1278 ret
= read_one_chunk(root
, &key
, sb
, chunk
);
1280 num_stripes
= btrfs_chunk_num_stripes(sb
, chunk
);
1281 len
= btrfs_chunk_item_size(num_stripes
);
1292 int btrfs_read_chunk_tree(struct btrfs_root
*root
)
1294 struct btrfs_path
*path
;
1295 struct extent_buffer
*leaf
;
1296 struct btrfs_key key
;
1297 struct btrfs_key found_key
;
1301 root
= root
->fs_info
->chunk_root
;
1303 path
= btrfs_alloc_path();
1307 /* first we search for all of the device items, and then we
1308 * read in all of the chunk items. This way we can create chunk
1309 * mappings that reference all of the devices that are afound
1311 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
1315 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
1317 leaf
= path
->nodes
[0];
1318 slot
= path
->slots
[0];
1319 if (slot
>= btrfs_header_nritems(leaf
)) {
1320 ret
= btrfs_next_leaf(root
, path
);
1327 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
1328 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
1329 if (found_key
.objectid
!= BTRFS_DEV_ITEMS_OBJECTID
)
1331 if (found_key
.type
== BTRFS_DEV_ITEM_KEY
) {
1332 struct btrfs_dev_item
*dev_item
;
1333 dev_item
= btrfs_item_ptr(leaf
, slot
,
1334 struct btrfs_dev_item
);
1335 ret
= read_one_dev(root
, leaf
, dev_item
);
1338 } else if (found_key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
1339 struct btrfs_chunk
*chunk
;
1340 chunk
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
1341 ret
= read_one_chunk(root
, &found_key
, leaf
, chunk
);
1345 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
1347 btrfs_release_path(root
, path
);
1351 btrfs_free_path(path
);