Btrfs: Allocator fix variety pack
[deliverable/linux.git] / fs / btrfs / volumes.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18 #include <linux/sched.h>
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <linux/blkdev.h>
22 #include <linux/random.h>
23 #include <asm/div64.h>
24 #include "ctree.h"
25 #include "extent_map.h"
26 #include "disk-io.h"
27 #include "transaction.h"
28 #include "print-tree.h"
29 #include "volumes.h"
30
31 struct map_lookup {
32 u64 type;
33 int io_align;
34 int io_width;
35 int stripe_len;
36 int sector_size;
37 int num_stripes;
38 int sub_stripes;
39 struct btrfs_bio_stripe stripes[];
40 };
41
42 #define map_lookup_size(n) (sizeof(struct map_lookup) + \
43 (sizeof(struct btrfs_bio_stripe) * (n)))
44
45 static DEFINE_MUTEX(uuid_mutex);
46 static LIST_HEAD(fs_uuids);
47
48 void btrfs_lock_volumes(void)
49 {
50 mutex_lock(&uuid_mutex);
51 }
52
53 void btrfs_unlock_volumes(void)
54 {
55 mutex_unlock(&uuid_mutex);
56 }
57
58 int btrfs_cleanup_fs_uuids(void)
59 {
60 struct btrfs_fs_devices *fs_devices;
61 struct list_head *uuid_cur;
62 struct list_head *devices_cur;
63 struct btrfs_device *dev;
64
65 list_for_each(uuid_cur, &fs_uuids) {
66 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
67 list);
68 while(!list_empty(&fs_devices->devices)) {
69 devices_cur = fs_devices->devices.next;
70 dev = list_entry(devices_cur, struct btrfs_device,
71 dev_list);
72 if (dev->bdev) {
73 close_bdev_excl(dev->bdev);
74 fs_devices->open_devices--;
75 }
76 list_del(&dev->dev_list);
77 kfree(dev->name);
78 kfree(dev);
79 }
80 }
81 return 0;
82 }
83
84 static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
85 u8 *uuid)
86 {
87 struct btrfs_device *dev;
88 struct list_head *cur;
89
90 list_for_each(cur, head) {
91 dev = list_entry(cur, struct btrfs_device, dev_list);
92 if (dev->devid == devid &&
93 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
94 return dev;
95 }
96 }
97 return NULL;
98 }
99
100 static struct btrfs_fs_devices *find_fsid(u8 *fsid)
101 {
102 struct list_head *cur;
103 struct btrfs_fs_devices *fs_devices;
104
105 list_for_each(cur, &fs_uuids) {
106 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
107 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
108 return fs_devices;
109 }
110 return NULL;
111 }
112
113 static int device_list_add(const char *path,
114 struct btrfs_super_block *disk_super,
115 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
116 {
117 struct btrfs_device *device;
118 struct btrfs_fs_devices *fs_devices;
119 u64 found_transid = btrfs_super_generation(disk_super);
120
121 fs_devices = find_fsid(disk_super->fsid);
122 if (!fs_devices) {
123 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
124 if (!fs_devices)
125 return -ENOMEM;
126 INIT_LIST_HEAD(&fs_devices->devices);
127 INIT_LIST_HEAD(&fs_devices->alloc_list);
128 list_add(&fs_devices->list, &fs_uuids);
129 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
130 fs_devices->latest_devid = devid;
131 fs_devices->latest_trans = found_transid;
132 device = NULL;
133 } else {
134 device = __find_device(&fs_devices->devices, devid,
135 disk_super->dev_item.uuid);
136 }
137 if (!device) {
138 device = kzalloc(sizeof(*device), GFP_NOFS);
139 if (!device) {
140 /* we can safely leave the fs_devices entry around */
141 return -ENOMEM;
142 }
143 device->devid = devid;
144 memcpy(device->uuid, disk_super->dev_item.uuid,
145 BTRFS_UUID_SIZE);
146 device->barriers = 1;
147 spin_lock_init(&device->io_lock);
148 device->name = kstrdup(path, GFP_NOFS);
149 if (!device->name) {
150 kfree(device);
151 return -ENOMEM;
152 }
153 list_add(&device->dev_list, &fs_devices->devices);
154 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
155 fs_devices->num_devices++;
156 }
157
158 if (found_transid > fs_devices->latest_trans) {
159 fs_devices->latest_devid = devid;
160 fs_devices->latest_trans = found_transid;
161 }
162 *fs_devices_ret = fs_devices;
163 return 0;
164 }
165
166 int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
167 {
168 struct list_head *head = &fs_devices->devices;
169 struct list_head *cur;
170 struct btrfs_device *device;
171
172 mutex_lock(&uuid_mutex);
173 again:
174 list_for_each(cur, head) {
175 device = list_entry(cur, struct btrfs_device, dev_list);
176 if (!device->in_fs_metadata) {
177 if (device->bdev) {
178 close_bdev_excl(device->bdev);
179 fs_devices->open_devices--;
180 }
181 list_del(&device->dev_list);
182 list_del(&device->dev_alloc_list);
183 fs_devices->num_devices--;
184 kfree(device->name);
185 kfree(device);
186 goto again;
187 }
188 }
189 mutex_unlock(&uuid_mutex);
190 return 0;
191 }
192
193 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
194 {
195 struct list_head *head = &fs_devices->devices;
196 struct list_head *cur;
197 struct btrfs_device *device;
198
199 mutex_lock(&uuid_mutex);
200 list_for_each(cur, head) {
201 device = list_entry(cur, struct btrfs_device, dev_list);
202 if (device->bdev) {
203 close_bdev_excl(device->bdev);
204 fs_devices->open_devices--;
205 }
206 device->bdev = NULL;
207 device->in_fs_metadata = 0;
208 }
209 fs_devices->mounted = 0;
210 mutex_unlock(&uuid_mutex);
211 return 0;
212 }
213
214 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
215 int flags, void *holder)
216 {
217 struct block_device *bdev;
218 struct list_head *head = &fs_devices->devices;
219 struct list_head *cur;
220 struct btrfs_device *device;
221 struct block_device *latest_bdev = NULL;
222 struct buffer_head *bh;
223 struct btrfs_super_block *disk_super;
224 u64 latest_devid = 0;
225 u64 latest_transid = 0;
226 u64 transid;
227 u64 devid;
228 int ret = 0;
229
230 mutex_lock(&uuid_mutex);
231 if (fs_devices->mounted)
232 goto out;
233
234 list_for_each(cur, head) {
235 device = list_entry(cur, struct btrfs_device, dev_list);
236 if (device->bdev)
237 continue;
238
239 if (!device->name)
240 continue;
241
242 bdev = open_bdev_excl(device->name, flags, holder);
243
244 if (IS_ERR(bdev)) {
245 printk("open %s failed\n", device->name);
246 goto error;
247 }
248 set_blocksize(bdev, 4096);
249
250 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
251 if (!bh)
252 goto error_close;
253
254 disk_super = (struct btrfs_super_block *)bh->b_data;
255 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
256 sizeof(disk_super->magic)))
257 goto error_brelse;
258
259 devid = le64_to_cpu(disk_super->dev_item.devid);
260 if (devid != device->devid)
261 goto error_brelse;
262
263 transid = btrfs_super_generation(disk_super);
264 if (!latest_transid || transid > latest_transid) {
265 latest_devid = devid;
266 latest_transid = transid;
267 latest_bdev = bdev;
268 }
269
270 device->bdev = bdev;
271 device->in_fs_metadata = 0;
272 fs_devices->open_devices++;
273 continue;
274
275 error_brelse:
276 brelse(bh);
277 error_close:
278 close_bdev_excl(bdev);
279 error:
280 continue;
281 }
282 if (fs_devices->open_devices == 0) {
283 ret = -EIO;
284 goto out;
285 }
286 fs_devices->mounted = 1;
287 fs_devices->latest_bdev = latest_bdev;
288 fs_devices->latest_devid = latest_devid;
289 fs_devices->latest_trans = latest_transid;
290 out:
291 mutex_unlock(&uuid_mutex);
292 return ret;
293 }
294
295 int btrfs_scan_one_device(const char *path, int flags, void *holder,
296 struct btrfs_fs_devices **fs_devices_ret)
297 {
298 struct btrfs_super_block *disk_super;
299 struct block_device *bdev;
300 struct buffer_head *bh;
301 int ret;
302 u64 devid;
303 u64 transid;
304
305 mutex_lock(&uuid_mutex);
306
307 bdev = open_bdev_excl(path, flags, holder);
308
309 if (IS_ERR(bdev)) {
310 ret = PTR_ERR(bdev);
311 goto error;
312 }
313
314 ret = set_blocksize(bdev, 4096);
315 if (ret)
316 goto error_close;
317 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
318 if (!bh) {
319 ret = -EIO;
320 goto error_close;
321 }
322 disk_super = (struct btrfs_super_block *)bh->b_data;
323 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
324 sizeof(disk_super->magic))) {
325 ret = -EINVAL;
326 goto error_brelse;
327 }
328 devid = le64_to_cpu(disk_super->dev_item.devid);
329 transid = btrfs_super_generation(disk_super);
330 if (disk_super->label[0])
331 printk("device label %s ", disk_super->label);
332 else {
333 /* FIXME, make a readl uuid parser */
334 printk("device fsid %llx-%llx ",
335 *(unsigned long long *)disk_super->fsid,
336 *(unsigned long long *)(disk_super->fsid + 8));
337 }
338 printk("devid %Lu transid %Lu %s\n", devid, transid, path);
339 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
340
341 error_brelse:
342 brelse(bh);
343 error_close:
344 close_bdev_excl(bdev);
345 error:
346 mutex_unlock(&uuid_mutex);
347 return ret;
348 }
349
350 /*
351 * this uses a pretty simple search, the expectation is that it is
352 * called very infrequently and that a given device has a small number
353 * of extents
354 */
355 static int find_free_dev_extent(struct btrfs_trans_handle *trans,
356 struct btrfs_device *device,
357 struct btrfs_path *path,
358 u64 num_bytes, u64 *start)
359 {
360 struct btrfs_key key;
361 struct btrfs_root *root = device->dev_root;
362 struct btrfs_dev_extent *dev_extent = NULL;
363 u64 hole_size = 0;
364 u64 last_byte = 0;
365 u64 search_start = 0;
366 u64 search_end = device->total_bytes;
367 int ret;
368 int slot = 0;
369 int start_found;
370 struct extent_buffer *l;
371
372 start_found = 0;
373 path->reada = 2;
374
375 /* FIXME use last free of some kind */
376
377 /* we don't want to overwrite the superblock on the drive,
378 * so we make sure to start at an offset of at least 1MB
379 */
380 search_start = max((u64)1024 * 1024, search_start);
381
382 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
383 search_start = max(root->fs_info->alloc_start, search_start);
384
385 key.objectid = device->devid;
386 key.offset = search_start;
387 key.type = BTRFS_DEV_EXTENT_KEY;
388 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
389 if (ret < 0)
390 goto error;
391 ret = btrfs_previous_item(root, path, 0, key.type);
392 if (ret < 0)
393 goto error;
394 l = path->nodes[0];
395 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
396 while (1) {
397 l = path->nodes[0];
398 slot = path->slots[0];
399 if (slot >= btrfs_header_nritems(l)) {
400 ret = btrfs_next_leaf(root, path);
401 if (ret == 0)
402 continue;
403 if (ret < 0)
404 goto error;
405 no_more_items:
406 if (!start_found) {
407 if (search_start >= search_end) {
408 ret = -ENOSPC;
409 goto error;
410 }
411 *start = search_start;
412 start_found = 1;
413 goto check_pending;
414 }
415 *start = last_byte > search_start ?
416 last_byte : search_start;
417 if (search_end <= *start) {
418 ret = -ENOSPC;
419 goto error;
420 }
421 goto check_pending;
422 }
423 btrfs_item_key_to_cpu(l, &key, slot);
424
425 if (key.objectid < device->devid)
426 goto next;
427
428 if (key.objectid > device->devid)
429 goto no_more_items;
430
431 if (key.offset >= search_start && key.offset > last_byte &&
432 start_found) {
433 if (last_byte < search_start)
434 last_byte = search_start;
435 hole_size = key.offset - last_byte;
436 if (key.offset > last_byte &&
437 hole_size >= num_bytes) {
438 *start = last_byte;
439 goto check_pending;
440 }
441 }
442 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
443 goto next;
444 }
445
446 start_found = 1;
447 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
448 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
449 next:
450 path->slots[0]++;
451 cond_resched();
452 }
453 check_pending:
454 /* we have to make sure we didn't find an extent that has already
455 * been allocated by the map tree or the original allocation
456 */
457 btrfs_release_path(root, path);
458 BUG_ON(*start < search_start);
459
460 if (*start + num_bytes > search_end) {
461 ret = -ENOSPC;
462 goto error;
463 }
464 /* check for pending inserts here */
465 return 0;
466
467 error:
468 btrfs_release_path(root, path);
469 return ret;
470 }
471
472 int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
473 struct btrfs_device *device,
474 u64 start)
475 {
476 int ret;
477 struct btrfs_path *path;
478 struct btrfs_root *root = device->dev_root;
479 struct btrfs_key key;
480 struct btrfs_key found_key;
481 struct extent_buffer *leaf = NULL;
482 struct btrfs_dev_extent *extent = NULL;
483
484 path = btrfs_alloc_path();
485 if (!path)
486 return -ENOMEM;
487
488 key.objectid = device->devid;
489 key.offset = start;
490 key.type = BTRFS_DEV_EXTENT_KEY;
491
492 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
493 if (ret > 0) {
494 ret = btrfs_previous_item(root, path, key.objectid,
495 BTRFS_DEV_EXTENT_KEY);
496 BUG_ON(ret);
497 leaf = path->nodes[0];
498 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
499 extent = btrfs_item_ptr(leaf, path->slots[0],
500 struct btrfs_dev_extent);
501 BUG_ON(found_key.offset > start || found_key.offset +
502 btrfs_dev_extent_length(leaf, extent) < start);
503 ret = 0;
504 } else if (ret == 0) {
505 leaf = path->nodes[0];
506 extent = btrfs_item_ptr(leaf, path->slots[0],
507 struct btrfs_dev_extent);
508 }
509 BUG_ON(ret);
510
511 if (device->bytes_used > 0)
512 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
513 ret = btrfs_del_item(trans, root, path);
514 BUG_ON(ret);
515
516 btrfs_free_path(path);
517 return ret;
518 }
519
520 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
521 struct btrfs_device *device,
522 u64 chunk_tree, u64 chunk_objectid,
523 u64 chunk_offset,
524 u64 num_bytes, u64 *start)
525 {
526 int ret;
527 struct btrfs_path *path;
528 struct btrfs_root *root = device->dev_root;
529 struct btrfs_dev_extent *extent;
530 struct extent_buffer *leaf;
531 struct btrfs_key key;
532
533 WARN_ON(!device->in_fs_metadata);
534 path = btrfs_alloc_path();
535 if (!path)
536 return -ENOMEM;
537
538 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
539 if (ret) {
540 goto err;
541 }
542
543 key.objectid = device->devid;
544 key.offset = *start;
545 key.type = BTRFS_DEV_EXTENT_KEY;
546 ret = btrfs_insert_empty_item(trans, root, path, &key,
547 sizeof(*extent));
548 BUG_ON(ret);
549
550 leaf = path->nodes[0];
551 extent = btrfs_item_ptr(leaf, path->slots[0],
552 struct btrfs_dev_extent);
553 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
554 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
555 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
556
557 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
558 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
559 BTRFS_UUID_SIZE);
560
561 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
562 btrfs_mark_buffer_dirty(leaf);
563 err:
564 btrfs_free_path(path);
565 return ret;
566 }
567
568 static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
569 {
570 struct btrfs_path *path;
571 int ret;
572 struct btrfs_key key;
573 struct btrfs_chunk *chunk;
574 struct btrfs_key found_key;
575
576 path = btrfs_alloc_path();
577 BUG_ON(!path);
578
579 key.objectid = objectid;
580 key.offset = (u64)-1;
581 key.type = BTRFS_CHUNK_ITEM_KEY;
582
583 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
584 if (ret < 0)
585 goto error;
586
587 BUG_ON(ret == 0);
588
589 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
590 if (ret) {
591 *offset = 0;
592 } else {
593 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
594 path->slots[0]);
595 if (found_key.objectid != objectid)
596 *offset = 0;
597 else {
598 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
599 struct btrfs_chunk);
600 *offset = found_key.offset +
601 btrfs_chunk_length(path->nodes[0], chunk);
602 }
603 }
604 ret = 0;
605 error:
606 btrfs_free_path(path);
607 return ret;
608 }
609
610 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
611 u64 *objectid)
612 {
613 int ret;
614 struct btrfs_key key;
615 struct btrfs_key found_key;
616
617 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
618 key.type = BTRFS_DEV_ITEM_KEY;
619 key.offset = (u64)-1;
620
621 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
622 if (ret < 0)
623 goto error;
624
625 BUG_ON(ret == 0);
626
627 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
628 BTRFS_DEV_ITEM_KEY);
629 if (ret) {
630 *objectid = 1;
631 } else {
632 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
633 path->slots[0]);
634 *objectid = found_key.offset + 1;
635 }
636 ret = 0;
637 error:
638 btrfs_release_path(root, path);
639 return ret;
640 }
641
642 /*
643 * the device information is stored in the chunk root
644 * the btrfs_device struct should be fully filled in
645 */
646 int btrfs_add_device(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root,
648 struct btrfs_device *device)
649 {
650 int ret;
651 struct btrfs_path *path;
652 struct btrfs_dev_item *dev_item;
653 struct extent_buffer *leaf;
654 struct btrfs_key key;
655 unsigned long ptr;
656 u64 free_devid = 0;
657
658 root = root->fs_info->chunk_root;
659
660 path = btrfs_alloc_path();
661 if (!path)
662 return -ENOMEM;
663
664 ret = find_next_devid(root, path, &free_devid);
665 if (ret)
666 goto out;
667
668 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
669 key.type = BTRFS_DEV_ITEM_KEY;
670 key.offset = free_devid;
671
672 ret = btrfs_insert_empty_item(trans, root, path, &key,
673 sizeof(*dev_item));
674 if (ret)
675 goto out;
676
677 leaf = path->nodes[0];
678 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
679
680 device->devid = free_devid;
681 btrfs_set_device_id(leaf, dev_item, device->devid);
682 btrfs_set_device_type(leaf, dev_item, device->type);
683 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
684 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
685 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
686 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
687 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
688 btrfs_set_device_group(leaf, dev_item, 0);
689 btrfs_set_device_seek_speed(leaf, dev_item, 0);
690 btrfs_set_device_bandwidth(leaf, dev_item, 0);
691
692 ptr = (unsigned long)btrfs_device_uuid(dev_item);
693 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
694 btrfs_mark_buffer_dirty(leaf);
695 ret = 0;
696
697 out:
698 btrfs_free_path(path);
699 return ret;
700 }
701
702 static int btrfs_rm_dev_item(struct btrfs_root *root,
703 struct btrfs_device *device)
704 {
705 int ret;
706 struct btrfs_path *path;
707 struct block_device *bdev = device->bdev;
708 struct btrfs_device *next_dev;
709 struct btrfs_key key;
710 u64 total_bytes;
711 struct btrfs_fs_devices *fs_devices;
712 struct btrfs_trans_handle *trans;
713
714 root = root->fs_info->chunk_root;
715
716 path = btrfs_alloc_path();
717 if (!path)
718 return -ENOMEM;
719
720 trans = btrfs_start_transaction(root, 1);
721 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
722 key.type = BTRFS_DEV_ITEM_KEY;
723 key.offset = device->devid;
724
725 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
726 if (ret < 0)
727 goto out;
728
729 if (ret > 0) {
730 ret = -ENOENT;
731 goto out;
732 }
733
734 ret = btrfs_del_item(trans, root, path);
735 if (ret)
736 goto out;
737
738 /*
739 * at this point, the device is zero sized. We want to
740 * remove it from the devices list and zero out the old super
741 */
742 list_del_init(&device->dev_list);
743 list_del_init(&device->dev_alloc_list);
744 fs_devices = root->fs_info->fs_devices;
745
746 next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
747 dev_list);
748 if (bdev == root->fs_info->sb->s_bdev)
749 root->fs_info->sb->s_bdev = next_dev->bdev;
750 if (bdev == fs_devices->latest_bdev)
751 fs_devices->latest_bdev = next_dev->bdev;
752
753 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
754 btrfs_set_super_num_devices(&root->fs_info->super_copy,
755 total_bytes - 1);
756 out:
757 btrfs_free_path(path);
758 btrfs_commit_transaction(trans, root);
759 return ret;
760 }
761
762 int btrfs_rm_device(struct btrfs_root *root, char *device_path)
763 {
764 struct btrfs_device *device;
765 struct block_device *bdev;
766 struct buffer_head *bh = NULL;
767 struct btrfs_super_block *disk_super;
768 u64 all_avail;
769 u64 devid;
770 int ret = 0;
771
772 mutex_lock(&root->fs_info->fs_mutex);
773 mutex_lock(&uuid_mutex);
774
775 all_avail = root->fs_info->avail_data_alloc_bits |
776 root->fs_info->avail_system_alloc_bits |
777 root->fs_info->avail_metadata_alloc_bits;
778
779 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
780 btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
781 printk("btrfs: unable to go below four devices on raid10\n");
782 ret = -EINVAL;
783 goto out;
784 }
785
786 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
787 btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
788 printk("btrfs: unable to go below two devices on raid1\n");
789 ret = -EINVAL;
790 goto out;
791 }
792
793 if (strcmp(device_path, "missing") == 0) {
794 struct list_head *cur;
795 struct list_head *devices;
796 struct btrfs_device *tmp;
797
798 device = NULL;
799 devices = &root->fs_info->fs_devices->devices;
800 list_for_each(cur, devices) {
801 tmp = list_entry(cur, struct btrfs_device, dev_list);
802 if (tmp->in_fs_metadata && !tmp->bdev) {
803 device = tmp;
804 break;
805 }
806 }
807 bdev = NULL;
808 bh = NULL;
809 disk_super = NULL;
810 if (!device) {
811 printk("btrfs: no missing devices found to remove\n");
812 goto out;
813 }
814
815 } else {
816 bdev = open_bdev_excl(device_path, 0,
817 root->fs_info->bdev_holder);
818 if (IS_ERR(bdev)) {
819 ret = PTR_ERR(bdev);
820 goto out;
821 }
822
823 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
824 if (!bh) {
825 ret = -EIO;
826 goto error_close;
827 }
828 disk_super = (struct btrfs_super_block *)bh->b_data;
829 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
830 sizeof(disk_super->magic))) {
831 ret = -ENOENT;
832 goto error_brelse;
833 }
834 if (memcmp(disk_super->fsid, root->fs_info->fsid,
835 BTRFS_FSID_SIZE)) {
836 ret = -ENOENT;
837 goto error_brelse;
838 }
839 devid = le64_to_cpu(disk_super->dev_item.devid);
840 device = btrfs_find_device(root, devid, NULL);
841 if (!device) {
842 ret = -ENOENT;
843 goto error_brelse;
844 }
845
846 }
847 root->fs_info->fs_devices->num_devices--;
848 root->fs_info->fs_devices->open_devices--;
849
850 ret = btrfs_shrink_device(device, 0);
851 if (ret)
852 goto error_brelse;
853
854
855 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
856 if (ret)
857 goto error_brelse;
858
859 if (bh) {
860 /* make sure this device isn't detected as part of
861 * the FS anymore
862 */
863 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
864 set_buffer_dirty(bh);
865 sync_dirty_buffer(bh);
866
867 brelse(bh);
868 }
869
870 if (device->bdev) {
871 /* one close for the device struct or super_block */
872 close_bdev_excl(device->bdev);
873 }
874 if (bdev) {
875 /* one close for us */
876 close_bdev_excl(bdev);
877 }
878 kfree(device->name);
879 kfree(device);
880 ret = 0;
881 goto out;
882
883 error_brelse:
884 brelse(bh);
885 error_close:
886 if (bdev)
887 close_bdev_excl(bdev);
888 out:
889 mutex_unlock(&uuid_mutex);
890 mutex_unlock(&root->fs_info->fs_mutex);
891 return ret;
892 }
893
894 int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
895 {
896 struct btrfs_trans_handle *trans;
897 struct btrfs_device *device;
898 struct block_device *bdev;
899 struct list_head *cur;
900 struct list_head *devices;
901 u64 total_bytes;
902 int ret = 0;
903
904
905 bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
906 if (!bdev) {
907 return -EIO;
908 }
909 mutex_lock(&root->fs_info->fs_mutex);
910 trans = btrfs_start_transaction(root, 1);
911 devices = &root->fs_info->fs_devices->devices;
912 list_for_each(cur, devices) {
913 device = list_entry(cur, struct btrfs_device, dev_list);
914 if (device->bdev == bdev) {
915 ret = -EEXIST;
916 goto out;
917 }
918 }
919
920 device = kzalloc(sizeof(*device), GFP_NOFS);
921 if (!device) {
922 /* we can safely leave the fs_devices entry around */
923 ret = -ENOMEM;
924 goto out_close_bdev;
925 }
926
927 device->barriers = 1;
928 generate_random_uuid(device->uuid);
929 spin_lock_init(&device->io_lock);
930 device->name = kstrdup(device_path, GFP_NOFS);
931 if (!device->name) {
932 kfree(device);
933 goto out_close_bdev;
934 }
935 device->io_width = root->sectorsize;
936 device->io_align = root->sectorsize;
937 device->sector_size = root->sectorsize;
938 device->total_bytes = i_size_read(bdev->bd_inode);
939 device->dev_root = root->fs_info->dev_root;
940 device->bdev = bdev;
941 device->in_fs_metadata = 1;
942
943 ret = btrfs_add_device(trans, root, device);
944 if (ret)
945 goto out_close_bdev;
946
947 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
948 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
949 total_bytes + device->total_bytes);
950
951 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
952 btrfs_set_super_num_devices(&root->fs_info->super_copy,
953 total_bytes + 1);
954
955 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
956 list_add(&device->dev_alloc_list,
957 &root->fs_info->fs_devices->alloc_list);
958 root->fs_info->fs_devices->num_devices++;
959 root->fs_info->fs_devices->open_devices++;
960 out:
961 btrfs_end_transaction(trans, root);
962 mutex_unlock(&root->fs_info->fs_mutex);
963 return ret;
964
965 out_close_bdev:
966 close_bdev_excl(bdev);
967 goto out;
968 }
969
970 int btrfs_update_device(struct btrfs_trans_handle *trans,
971 struct btrfs_device *device)
972 {
973 int ret;
974 struct btrfs_path *path;
975 struct btrfs_root *root;
976 struct btrfs_dev_item *dev_item;
977 struct extent_buffer *leaf;
978 struct btrfs_key key;
979
980 root = device->dev_root->fs_info->chunk_root;
981
982 path = btrfs_alloc_path();
983 if (!path)
984 return -ENOMEM;
985
986 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
987 key.type = BTRFS_DEV_ITEM_KEY;
988 key.offset = device->devid;
989
990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
991 if (ret < 0)
992 goto out;
993
994 if (ret > 0) {
995 ret = -ENOENT;
996 goto out;
997 }
998
999 leaf = path->nodes[0];
1000 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1001
1002 btrfs_set_device_id(leaf, dev_item, device->devid);
1003 btrfs_set_device_type(leaf, dev_item, device->type);
1004 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1005 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1006 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1007 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1008 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1009 btrfs_mark_buffer_dirty(leaf);
1010
1011 out:
1012 btrfs_free_path(path);
1013 return ret;
1014 }
1015
1016 int btrfs_grow_device(struct btrfs_trans_handle *trans,
1017 struct btrfs_device *device, u64 new_size)
1018 {
1019 struct btrfs_super_block *super_copy =
1020 &device->dev_root->fs_info->super_copy;
1021 u64 old_total = btrfs_super_total_bytes(super_copy);
1022 u64 diff = new_size - device->total_bytes;
1023
1024 btrfs_set_super_total_bytes(super_copy, old_total + diff);
1025 return btrfs_update_device(trans, device);
1026 }
1027
1028 static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1029 struct btrfs_root *root,
1030 u64 chunk_tree, u64 chunk_objectid,
1031 u64 chunk_offset)
1032 {
1033 int ret;
1034 struct btrfs_path *path;
1035 struct btrfs_key key;
1036
1037 root = root->fs_info->chunk_root;
1038 path = btrfs_alloc_path();
1039 if (!path)
1040 return -ENOMEM;
1041
1042 key.objectid = chunk_objectid;
1043 key.offset = chunk_offset;
1044 key.type = BTRFS_CHUNK_ITEM_KEY;
1045
1046 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1047 BUG_ON(ret);
1048
1049 ret = btrfs_del_item(trans, root, path);
1050 BUG_ON(ret);
1051
1052 btrfs_free_path(path);
1053 return 0;
1054 }
1055
1056 int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1057 chunk_offset)
1058 {
1059 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1060 struct btrfs_disk_key *disk_key;
1061 struct btrfs_chunk *chunk;
1062 u8 *ptr;
1063 int ret = 0;
1064 u32 num_stripes;
1065 u32 array_size;
1066 u32 len = 0;
1067 u32 cur;
1068 struct btrfs_key key;
1069
1070 array_size = btrfs_super_sys_array_size(super_copy);
1071
1072 ptr = super_copy->sys_chunk_array;
1073 cur = 0;
1074
1075 while (cur < array_size) {
1076 disk_key = (struct btrfs_disk_key *)ptr;
1077 btrfs_disk_key_to_cpu(&key, disk_key);
1078
1079 len = sizeof(*disk_key);
1080
1081 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1082 chunk = (struct btrfs_chunk *)(ptr + len);
1083 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1084 len += btrfs_chunk_item_size(num_stripes);
1085 } else {
1086 ret = -EIO;
1087 break;
1088 }
1089 if (key.objectid == chunk_objectid &&
1090 key.offset == chunk_offset) {
1091 memmove(ptr, ptr + len, array_size - (cur + len));
1092 array_size -= len;
1093 btrfs_set_super_sys_array_size(super_copy, array_size);
1094 } else {
1095 ptr += len;
1096 cur += len;
1097 }
1098 }
1099 return ret;
1100 }
1101
1102
1103 int btrfs_relocate_chunk(struct btrfs_root *root,
1104 u64 chunk_tree, u64 chunk_objectid,
1105 u64 chunk_offset)
1106 {
1107 struct extent_map_tree *em_tree;
1108 struct btrfs_root *extent_root;
1109 struct btrfs_trans_handle *trans;
1110 struct extent_map *em;
1111 struct map_lookup *map;
1112 int ret;
1113 int i;
1114
1115 printk("btrfs relocating chunk %llu\n",
1116 (unsigned long long)chunk_offset);
1117 root = root->fs_info->chunk_root;
1118 extent_root = root->fs_info->extent_root;
1119 em_tree = &root->fs_info->mapping_tree.map_tree;
1120
1121 /* step one, relocate all the extents inside this chunk */
1122 ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
1123 BUG_ON(ret);
1124
1125 trans = btrfs_start_transaction(root, 1);
1126 BUG_ON(!trans);
1127
1128 /*
1129 * step two, delete the device extents and the
1130 * chunk tree entries
1131 */
1132 spin_lock(&em_tree->lock);
1133 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1134 spin_unlock(&em_tree->lock);
1135
1136 BUG_ON(em->start > chunk_offset ||
1137 em->start + em->len < chunk_offset);
1138 map = (struct map_lookup *)em->bdev;
1139
1140 for (i = 0; i < map->num_stripes; i++) {
1141 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1142 map->stripes[i].physical);
1143 BUG_ON(ret);
1144
1145 if (map->stripes[i].dev) {
1146 ret = btrfs_update_device(trans, map->stripes[i].dev);
1147 BUG_ON(ret);
1148 }
1149 }
1150 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1151 chunk_offset);
1152
1153 BUG_ON(ret);
1154
1155 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1156 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1157 BUG_ON(ret);
1158 }
1159
1160 spin_lock(&em_tree->lock);
1161 remove_extent_mapping(em_tree, em);
1162 kfree(map);
1163 em->bdev = NULL;
1164
1165 /* once for the tree */
1166 free_extent_map(em);
1167 spin_unlock(&em_tree->lock);
1168
1169 /* once for us */
1170 free_extent_map(em);
1171
1172 btrfs_end_transaction(trans, root);
1173 return 0;
1174 }
1175
1176 static u64 div_factor(u64 num, int factor)
1177 {
1178 if (factor == 10)
1179 return num;
1180 num *= factor;
1181 do_div(num, 10);
1182 return num;
1183 }
1184
1185
1186 int btrfs_balance(struct btrfs_root *dev_root)
1187 {
1188 int ret;
1189 struct list_head *cur;
1190 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1191 struct btrfs_device *device;
1192 u64 old_size;
1193 u64 size_to_free;
1194 struct btrfs_path *path;
1195 struct btrfs_key key;
1196 struct btrfs_chunk *chunk;
1197 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1198 struct btrfs_trans_handle *trans;
1199 struct btrfs_key found_key;
1200
1201
1202 dev_root = dev_root->fs_info->dev_root;
1203
1204 mutex_lock(&dev_root->fs_info->fs_mutex);
1205 /* step one make some room on all the devices */
1206 list_for_each(cur, devices) {
1207 device = list_entry(cur, struct btrfs_device, dev_list);
1208 old_size = device->total_bytes;
1209 size_to_free = div_factor(old_size, 1);
1210 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1211 if (device->total_bytes - device->bytes_used > size_to_free)
1212 continue;
1213
1214 ret = btrfs_shrink_device(device, old_size - size_to_free);
1215 BUG_ON(ret);
1216
1217 trans = btrfs_start_transaction(dev_root, 1);
1218 BUG_ON(!trans);
1219
1220 ret = btrfs_grow_device(trans, device, old_size);
1221 BUG_ON(ret);
1222
1223 btrfs_end_transaction(trans, dev_root);
1224 }
1225
1226 /* step two, relocate all the chunks */
1227 path = btrfs_alloc_path();
1228 BUG_ON(!path);
1229
1230 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1231 key.offset = (u64)-1;
1232 key.type = BTRFS_CHUNK_ITEM_KEY;
1233
1234 while(1) {
1235 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1236 if (ret < 0)
1237 goto error;
1238
1239 /*
1240 * this shouldn't happen, it means the last relocate
1241 * failed
1242 */
1243 if (ret == 0)
1244 break;
1245
1246 ret = btrfs_previous_item(chunk_root, path, 0,
1247 BTRFS_CHUNK_ITEM_KEY);
1248 if (ret) {
1249 break;
1250 }
1251 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1252 path->slots[0]);
1253 if (found_key.objectid != key.objectid)
1254 break;
1255 chunk = btrfs_item_ptr(path->nodes[0],
1256 path->slots[0],
1257 struct btrfs_chunk);
1258 key.offset = found_key.offset;
1259 /* chunk zero is special */
1260 if (key.offset == 0)
1261 break;
1262
1263 ret = btrfs_relocate_chunk(chunk_root,
1264 chunk_root->root_key.objectid,
1265 found_key.objectid,
1266 found_key.offset);
1267 BUG_ON(ret);
1268 btrfs_release_path(chunk_root, path);
1269 }
1270 ret = 0;
1271 error:
1272 btrfs_free_path(path);
1273 mutex_unlock(&dev_root->fs_info->fs_mutex);
1274 return ret;
1275 }
1276
1277 /*
1278 * shrinking a device means finding all of the device extents past
1279 * the new size, and then following the back refs to the chunks.
1280 * The chunk relocation code actually frees the device extent
1281 */
1282 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1283 {
1284 struct btrfs_trans_handle *trans;
1285 struct btrfs_root *root = device->dev_root;
1286 struct btrfs_dev_extent *dev_extent = NULL;
1287 struct btrfs_path *path;
1288 u64 length;
1289 u64 chunk_tree;
1290 u64 chunk_objectid;
1291 u64 chunk_offset;
1292 int ret;
1293 int slot;
1294 struct extent_buffer *l;
1295 struct btrfs_key key;
1296 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1297 u64 old_total = btrfs_super_total_bytes(super_copy);
1298 u64 diff = device->total_bytes - new_size;
1299
1300
1301 path = btrfs_alloc_path();
1302 if (!path)
1303 return -ENOMEM;
1304
1305 trans = btrfs_start_transaction(root, 1);
1306 if (!trans) {
1307 ret = -ENOMEM;
1308 goto done;
1309 }
1310
1311 path->reada = 2;
1312
1313 device->total_bytes = new_size;
1314 ret = btrfs_update_device(trans, device);
1315 if (ret) {
1316 btrfs_end_transaction(trans, root);
1317 goto done;
1318 }
1319 WARN_ON(diff > old_total);
1320 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1321 btrfs_end_transaction(trans, root);
1322
1323 key.objectid = device->devid;
1324 key.offset = (u64)-1;
1325 key.type = BTRFS_DEV_EXTENT_KEY;
1326
1327 while (1) {
1328 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1329 if (ret < 0)
1330 goto done;
1331
1332 ret = btrfs_previous_item(root, path, 0, key.type);
1333 if (ret < 0)
1334 goto done;
1335 if (ret) {
1336 ret = 0;
1337 goto done;
1338 }
1339
1340 l = path->nodes[0];
1341 slot = path->slots[0];
1342 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1343
1344 if (key.objectid != device->devid)
1345 goto done;
1346
1347 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1348 length = btrfs_dev_extent_length(l, dev_extent);
1349
1350 if (key.offset + length <= new_size)
1351 goto done;
1352
1353 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1354 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1355 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1356 btrfs_release_path(root, path);
1357
1358 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1359 chunk_offset);
1360 if (ret)
1361 goto done;
1362 }
1363
1364 done:
1365 btrfs_free_path(path);
1366 return ret;
1367 }
1368
1369 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1370 struct btrfs_root *root,
1371 struct btrfs_key *key,
1372 struct btrfs_chunk *chunk, int item_size)
1373 {
1374 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1375 struct btrfs_disk_key disk_key;
1376 u32 array_size;
1377 u8 *ptr;
1378
1379 array_size = btrfs_super_sys_array_size(super_copy);
1380 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1381 return -EFBIG;
1382
1383 ptr = super_copy->sys_chunk_array + array_size;
1384 btrfs_cpu_key_to_disk(&disk_key, key);
1385 memcpy(ptr, &disk_key, sizeof(disk_key));
1386 ptr += sizeof(disk_key);
1387 memcpy(ptr, chunk, item_size);
1388 item_size += sizeof(disk_key);
1389 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1390 return 0;
1391 }
1392
1393 static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
1394 int sub_stripes)
1395 {
1396 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1397 return calc_size;
1398 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1399 return calc_size * (num_stripes / sub_stripes);
1400 else
1401 return calc_size * num_stripes;
1402 }
1403
1404
1405 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1406 struct btrfs_root *extent_root, u64 *start,
1407 u64 *num_bytes, u64 type)
1408 {
1409 u64 dev_offset;
1410 struct btrfs_fs_info *info = extent_root->fs_info;
1411 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
1412 struct btrfs_path *path;
1413 struct btrfs_stripe *stripes;
1414 struct btrfs_device *device = NULL;
1415 struct btrfs_chunk *chunk;
1416 struct list_head private_devs;
1417 struct list_head *dev_list;
1418 struct list_head *cur;
1419 struct extent_map_tree *em_tree;
1420 struct map_lookup *map;
1421 struct extent_map *em;
1422 int min_stripe_size = 1 * 1024 * 1024;
1423 u64 physical;
1424 u64 calc_size = 1024 * 1024 * 1024;
1425 u64 max_chunk_size = calc_size;
1426 u64 min_free;
1427 u64 avail;
1428 u64 max_avail = 0;
1429 u64 percent_max;
1430 int num_stripes = 1;
1431 int min_stripes = 1;
1432 int sub_stripes = 0;
1433 int looped = 0;
1434 int ret;
1435 int index;
1436 int stripe_len = 64 * 1024;
1437 struct btrfs_key key;
1438
1439 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1440 (type & BTRFS_BLOCK_GROUP_DUP)) {
1441 WARN_ON(1);
1442 type &= ~BTRFS_BLOCK_GROUP_DUP;
1443 }
1444 dev_list = &extent_root->fs_info->fs_devices->alloc_list;
1445 if (list_empty(dev_list))
1446 return -ENOSPC;
1447
1448 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
1449 num_stripes = extent_root->fs_info->fs_devices->open_devices;
1450 min_stripes = 2;
1451 }
1452 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
1453 num_stripes = 2;
1454 min_stripes = 2;
1455 }
1456 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1457 num_stripes = min_t(u64, 2,
1458 extent_root->fs_info->fs_devices->open_devices);
1459 if (num_stripes < 2)
1460 return -ENOSPC;
1461 min_stripes = 2;
1462 }
1463 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1464 num_stripes = extent_root->fs_info->fs_devices->open_devices;
1465 if (num_stripes < 4)
1466 return -ENOSPC;
1467 num_stripes &= ~(u32)1;
1468 sub_stripes = 2;
1469 min_stripes = 4;
1470 }
1471
1472 if (type & BTRFS_BLOCK_GROUP_DATA) {
1473 max_chunk_size = 10 * calc_size;
1474 min_stripe_size = 64 * 1024 * 1024;
1475 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1476 max_chunk_size = 4 * calc_size;
1477 min_stripe_size = 32 * 1024 * 1024;
1478 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1479 calc_size = 8 * 1024 * 1024;
1480 max_chunk_size = calc_size * 2;
1481 min_stripe_size = 1 * 1024 * 1024;
1482 }
1483
1484 path = btrfs_alloc_path();
1485 if (!path)
1486 return -ENOMEM;
1487
1488 /* we don't want a chunk larger than 10% of the FS */
1489 percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
1490 max_chunk_size = min(percent_max, max_chunk_size);
1491
1492 again:
1493 if (calc_size * num_stripes > max_chunk_size) {
1494 calc_size = max_chunk_size;
1495 do_div(calc_size, num_stripes);
1496 do_div(calc_size, stripe_len);
1497 calc_size *= stripe_len;
1498 }
1499 /* we don't want tiny stripes */
1500 calc_size = max_t(u64, min_stripe_size, calc_size);
1501
1502 do_div(calc_size, stripe_len);
1503 calc_size *= stripe_len;
1504
1505 INIT_LIST_HEAD(&private_devs);
1506 cur = dev_list->next;
1507 index = 0;
1508
1509 if (type & BTRFS_BLOCK_GROUP_DUP)
1510 min_free = calc_size * 2;
1511 else
1512 min_free = calc_size;
1513
1514 /* we add 1MB because we never use the first 1MB of the device */
1515 min_free += 1024 * 1024;
1516
1517 /* build a private list of devices we will allocate from */
1518 while(index < num_stripes) {
1519 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
1520
1521 if (device->total_bytes > device->bytes_used)
1522 avail = device->total_bytes - device->bytes_used;
1523 else
1524 avail = 0;
1525 cur = cur->next;
1526
1527 if (device->in_fs_metadata && avail >= min_free) {
1528 u64 ignored_start = 0;
1529 ret = find_free_dev_extent(trans, device, path,
1530 min_free,
1531 &ignored_start);
1532 if (ret == 0) {
1533 list_move_tail(&device->dev_alloc_list,
1534 &private_devs);
1535 index++;
1536 if (type & BTRFS_BLOCK_GROUP_DUP)
1537 index++;
1538 }
1539 } else if (device->in_fs_metadata && avail > max_avail)
1540 max_avail = avail;
1541 if (cur == dev_list)
1542 break;
1543 }
1544 if (index < num_stripes) {
1545 list_splice(&private_devs, dev_list);
1546 if (index >= min_stripes) {
1547 num_stripes = index;
1548 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1549 num_stripes /= sub_stripes;
1550 num_stripes *= sub_stripes;
1551 }
1552 looped = 1;
1553 goto again;
1554 }
1555 if (!looped && max_avail > 0) {
1556 looped = 1;
1557 calc_size = max_avail;
1558 goto again;
1559 }
1560 btrfs_free_path(path);
1561 return -ENOSPC;
1562 }
1563 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1564 key.type = BTRFS_CHUNK_ITEM_KEY;
1565 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1566 &key.offset);
1567 if (ret) {
1568 btrfs_free_path(path);
1569 return ret;
1570 }
1571
1572 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
1573 if (!chunk) {
1574 btrfs_free_path(path);
1575 return -ENOMEM;
1576 }
1577
1578 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1579 if (!map) {
1580 kfree(chunk);
1581 btrfs_free_path(path);
1582 return -ENOMEM;
1583 }
1584 btrfs_free_path(path);
1585 path = NULL;
1586
1587 stripes = &chunk->stripe;
1588 *num_bytes = chunk_bytes_by_type(type, calc_size,
1589 num_stripes, sub_stripes);
1590
1591 index = 0;
1592 while(index < num_stripes) {
1593 struct btrfs_stripe *stripe;
1594 BUG_ON(list_empty(&private_devs));
1595 cur = private_devs.next;
1596 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
1597
1598 /* loop over this device again if we're doing a dup group */
1599 if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
1600 (index == num_stripes - 1))
1601 list_move_tail(&device->dev_alloc_list, dev_list);
1602
1603 ret = btrfs_alloc_dev_extent(trans, device,
1604 info->chunk_root->root_key.objectid,
1605 BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
1606 calc_size, &dev_offset);
1607 BUG_ON(ret);
1608 device->bytes_used += calc_size;
1609 ret = btrfs_update_device(trans, device);
1610 BUG_ON(ret);
1611
1612 map->stripes[index].dev = device;
1613 map->stripes[index].physical = dev_offset;
1614 stripe = stripes + index;
1615 btrfs_set_stack_stripe_devid(stripe, device->devid);
1616 btrfs_set_stack_stripe_offset(stripe, dev_offset);
1617 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1618 physical = dev_offset;
1619 index++;
1620 }
1621 BUG_ON(!list_empty(&private_devs));
1622
1623 /* key was set above */
1624 btrfs_set_stack_chunk_length(chunk, *num_bytes);
1625 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
1626 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
1627 btrfs_set_stack_chunk_type(chunk, type);
1628 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
1629 btrfs_set_stack_chunk_io_align(chunk, stripe_len);
1630 btrfs_set_stack_chunk_io_width(chunk, stripe_len);
1631 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
1632 btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
1633 map->sector_size = extent_root->sectorsize;
1634 map->stripe_len = stripe_len;
1635 map->io_align = stripe_len;
1636 map->io_width = stripe_len;
1637 map->type = type;
1638 map->num_stripes = num_stripes;
1639 map->sub_stripes = sub_stripes;
1640
1641 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
1642 btrfs_chunk_item_size(num_stripes));
1643 BUG_ON(ret);
1644 *start = key.offset;;
1645
1646 em = alloc_extent_map(GFP_NOFS);
1647 if (!em)
1648 return -ENOMEM;
1649 em->bdev = (struct block_device *)map;
1650 em->start = key.offset;
1651 em->len = *num_bytes;
1652 em->block_start = 0;
1653
1654 if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1655 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
1656 chunk, btrfs_chunk_item_size(num_stripes));
1657 BUG_ON(ret);
1658 }
1659 kfree(chunk);
1660
1661 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
1662 spin_lock(&em_tree->lock);
1663 ret = add_extent_mapping(em_tree, em);
1664 spin_unlock(&em_tree->lock);
1665 BUG_ON(ret);
1666 free_extent_map(em);
1667 return ret;
1668 }
1669
1670 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
1671 {
1672 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
1673 }
1674
1675 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
1676 {
1677 struct extent_map *em;
1678
1679 while(1) {
1680 spin_lock(&tree->map_tree.lock);
1681 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
1682 if (em)
1683 remove_extent_mapping(&tree->map_tree, em);
1684 spin_unlock(&tree->map_tree.lock);
1685 if (!em)
1686 break;
1687 kfree(em->bdev);
1688 /* once for us */
1689 free_extent_map(em);
1690 /* once for the tree */
1691 free_extent_map(em);
1692 }
1693 }
1694
1695 int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
1696 {
1697 struct extent_map *em;
1698 struct map_lookup *map;
1699 struct extent_map_tree *em_tree = &map_tree->map_tree;
1700 int ret;
1701
1702 spin_lock(&em_tree->lock);
1703 em = lookup_extent_mapping(em_tree, logical, len);
1704 spin_unlock(&em_tree->lock);
1705 BUG_ON(!em);
1706
1707 BUG_ON(em->start > logical || em->start + em->len < logical);
1708 map = (struct map_lookup *)em->bdev;
1709 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
1710 ret = map->num_stripes;
1711 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
1712 ret = map->sub_stripes;
1713 else
1714 ret = 1;
1715 free_extent_map(em);
1716 return ret;
1717 }
1718
1719 static int find_live_mirror(struct map_lookup *map, int first, int num,
1720 int optimal)
1721 {
1722 int i;
1723 if (map->stripes[optimal].dev->bdev)
1724 return optimal;
1725 for (i = first; i < first + num; i++) {
1726 if (map->stripes[i].dev->bdev)
1727 return i;
1728 }
1729 /* we couldn't find one that doesn't fail. Just return something
1730 * and the io error handling code will clean up eventually
1731 */
1732 return optimal;
1733 }
1734
1735 static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1736 u64 logical, u64 *length,
1737 struct btrfs_multi_bio **multi_ret,
1738 int mirror_num, struct page *unplug_page)
1739 {
1740 struct extent_map *em;
1741 struct map_lookup *map;
1742 struct extent_map_tree *em_tree = &map_tree->map_tree;
1743 u64 offset;
1744 u64 stripe_offset;
1745 u64 stripe_nr;
1746 int stripes_allocated = 8;
1747 int stripes_required = 1;
1748 int stripe_index;
1749 int i;
1750 int num_stripes;
1751 int max_errors = 0;
1752 struct btrfs_multi_bio *multi = NULL;
1753
1754 if (multi_ret && !(rw & (1 << BIO_RW))) {
1755 stripes_allocated = 1;
1756 }
1757 again:
1758 if (multi_ret) {
1759 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
1760 GFP_NOFS);
1761 if (!multi)
1762 return -ENOMEM;
1763
1764 atomic_set(&multi->error, 0);
1765 }
1766
1767 spin_lock(&em_tree->lock);
1768 em = lookup_extent_mapping(em_tree, logical, *length);
1769 spin_unlock(&em_tree->lock);
1770
1771 if (!em && unplug_page)
1772 return 0;
1773
1774 if (!em) {
1775 printk("unable to find logical %Lu len %Lu\n", logical, *length);
1776 BUG();
1777 }
1778
1779 BUG_ON(em->start > logical || em->start + em->len < logical);
1780 map = (struct map_lookup *)em->bdev;
1781 offset = logical - em->start;
1782
1783 if (mirror_num > map->num_stripes)
1784 mirror_num = 0;
1785
1786 /* if our multi bio struct is too small, back off and try again */
1787 if (rw & (1 << BIO_RW)) {
1788 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
1789 BTRFS_BLOCK_GROUP_DUP)) {
1790 stripes_required = map->num_stripes;
1791 max_errors = 1;
1792 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1793 stripes_required = map->sub_stripes;
1794 max_errors = 1;
1795 }
1796 }
1797 if (multi_ret && rw == WRITE &&
1798 stripes_allocated < stripes_required) {
1799 stripes_allocated = map->num_stripes;
1800 free_extent_map(em);
1801 kfree(multi);
1802 goto again;
1803 }
1804 stripe_nr = offset;
1805 /*
1806 * stripe_nr counts the total number of stripes we have to stride
1807 * to get to this block
1808 */
1809 do_div(stripe_nr, map->stripe_len);
1810
1811 stripe_offset = stripe_nr * map->stripe_len;
1812 BUG_ON(offset < stripe_offset);
1813
1814 /* stripe_offset is the offset of this block in its stripe*/
1815 stripe_offset = offset - stripe_offset;
1816
1817 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
1818 BTRFS_BLOCK_GROUP_RAID10 |
1819 BTRFS_BLOCK_GROUP_DUP)) {
1820 /* we limit the length of each bio to what fits in a stripe */
1821 *length = min_t(u64, em->len - offset,
1822 map->stripe_len - stripe_offset);
1823 } else {
1824 *length = em->len - offset;
1825 }
1826
1827 if (!multi_ret && !unplug_page)
1828 goto out;
1829
1830 num_stripes = 1;
1831 stripe_index = 0;
1832 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
1833 if (unplug_page || (rw & (1 << BIO_RW)))
1834 num_stripes = map->num_stripes;
1835 else if (mirror_num)
1836 stripe_index = mirror_num - 1;
1837 else {
1838 stripe_index = find_live_mirror(map, 0,
1839 map->num_stripes,
1840 current->pid % map->num_stripes);
1841 }
1842
1843 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
1844 if (rw & (1 << BIO_RW))
1845 num_stripes = map->num_stripes;
1846 else if (mirror_num)
1847 stripe_index = mirror_num - 1;
1848
1849 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1850 int factor = map->num_stripes / map->sub_stripes;
1851
1852 stripe_index = do_div(stripe_nr, factor);
1853 stripe_index *= map->sub_stripes;
1854
1855 if (unplug_page || (rw & (1 << BIO_RW)))
1856 num_stripes = map->sub_stripes;
1857 else if (mirror_num)
1858 stripe_index += mirror_num - 1;
1859 else {
1860 stripe_index = find_live_mirror(map, stripe_index,
1861 map->sub_stripes, stripe_index +
1862 current->pid % map->sub_stripes);
1863 }
1864 } else {
1865 /*
1866 * after this do_div call, stripe_nr is the number of stripes
1867 * on this device we have to walk to find the data, and
1868 * stripe_index is the number of our device in the stripe array
1869 */
1870 stripe_index = do_div(stripe_nr, map->num_stripes);
1871 }
1872 BUG_ON(stripe_index >= map->num_stripes);
1873
1874 for (i = 0; i < num_stripes; i++) {
1875 if (unplug_page) {
1876 struct btrfs_device *device;
1877 struct backing_dev_info *bdi;
1878
1879 device = map->stripes[stripe_index].dev;
1880 if (device->bdev) {
1881 bdi = blk_get_backing_dev_info(device->bdev);
1882 if (bdi->unplug_io_fn) {
1883 bdi->unplug_io_fn(bdi, unplug_page);
1884 }
1885 }
1886 } else {
1887 multi->stripes[i].physical =
1888 map->stripes[stripe_index].physical +
1889 stripe_offset + stripe_nr * map->stripe_len;
1890 multi->stripes[i].dev = map->stripes[stripe_index].dev;
1891 }
1892 stripe_index++;
1893 }
1894 if (multi_ret) {
1895 *multi_ret = multi;
1896 multi->num_stripes = num_stripes;
1897 multi->max_errors = max_errors;
1898 }
1899 out:
1900 free_extent_map(em);
1901 return 0;
1902 }
1903
1904 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1905 u64 logical, u64 *length,
1906 struct btrfs_multi_bio **multi_ret, int mirror_num)
1907 {
1908 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
1909 mirror_num, NULL);
1910 }
1911
1912 int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
1913 u64 logical, struct page *page)
1914 {
1915 u64 length = PAGE_CACHE_SIZE;
1916 return __btrfs_map_block(map_tree, READ, logical, &length,
1917 NULL, 0, page);
1918 }
1919
1920
1921 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1922 static void end_bio_multi_stripe(struct bio *bio, int err)
1923 #else
1924 static int end_bio_multi_stripe(struct bio *bio,
1925 unsigned int bytes_done, int err)
1926 #endif
1927 {
1928 struct btrfs_multi_bio *multi = bio->bi_private;
1929
1930 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1931 if (bio->bi_size)
1932 return 1;
1933 #endif
1934 if (err)
1935 atomic_inc(&multi->error);
1936
1937 if (atomic_dec_and_test(&multi->stripes_pending)) {
1938 bio->bi_private = multi->private;
1939 bio->bi_end_io = multi->end_io;
1940 /* only send an error to the higher layers if it is
1941 * beyond the tolerance of the multi-bio
1942 */
1943 if (atomic_read(&multi->error) > multi->max_errors) {
1944 err = -EIO;
1945 } else if (err) {
1946 /*
1947 * this bio is actually up to date, we didn't
1948 * go over the max number of errors
1949 */
1950 set_bit(BIO_UPTODATE, &bio->bi_flags);
1951 err = 0;
1952 }
1953 kfree(multi);
1954
1955 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1956 bio_endio(bio, bio->bi_size, err);
1957 #else
1958 bio_endio(bio, err);
1959 #endif
1960 } else {
1961 bio_put(bio);
1962 }
1963 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1964 return 0;
1965 #endif
1966 }
1967
1968 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
1969 int mirror_num)
1970 {
1971 struct btrfs_mapping_tree *map_tree;
1972 struct btrfs_device *dev;
1973 struct bio *first_bio = bio;
1974 u64 logical = bio->bi_sector << 9;
1975 u64 length = 0;
1976 u64 map_length;
1977 struct btrfs_multi_bio *multi = NULL;
1978 int ret;
1979 int dev_nr = 0;
1980 int total_devs = 1;
1981
1982 length = bio->bi_size;
1983 map_tree = &root->fs_info->mapping_tree;
1984 map_length = length;
1985
1986 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
1987 mirror_num);
1988 BUG_ON(ret);
1989
1990 total_devs = multi->num_stripes;
1991 if (map_length < length) {
1992 printk("mapping failed logical %Lu bio len %Lu "
1993 "len %Lu\n", logical, length, map_length);
1994 BUG();
1995 }
1996 multi->end_io = first_bio->bi_end_io;
1997 multi->private = first_bio->bi_private;
1998 atomic_set(&multi->stripes_pending, multi->num_stripes);
1999
2000 while(dev_nr < total_devs) {
2001 if (total_devs > 1) {
2002 if (dev_nr < total_devs - 1) {
2003 bio = bio_clone(first_bio, GFP_NOFS);
2004 BUG_ON(!bio);
2005 } else {
2006 bio = first_bio;
2007 }
2008 bio->bi_private = multi;
2009 bio->bi_end_io = end_bio_multi_stripe;
2010 }
2011 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2012 dev = multi->stripes[dev_nr].dev;
2013 if (dev && dev->bdev) {
2014 bio->bi_bdev = dev->bdev;
2015 spin_lock(&dev->io_lock);
2016 dev->total_ios++;
2017 spin_unlock(&dev->io_lock);
2018 submit_bio(rw, bio);
2019 } else {
2020 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2021 bio->bi_sector = logical >> 9;
2022 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
2023 bio_endio(bio, bio->bi_size, -EIO);
2024 #else
2025 bio_endio(bio, -EIO);
2026 #endif
2027 }
2028 dev_nr++;
2029 }
2030 if (total_devs == 1)
2031 kfree(multi);
2032 return 0;
2033 }
2034
2035 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2036 u8 *uuid)
2037 {
2038 struct list_head *head = &root->fs_info->fs_devices->devices;
2039
2040 return __find_device(head, devid, uuid);
2041 }
2042
2043 static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2044 u64 devid, u8 *dev_uuid)
2045 {
2046 struct btrfs_device *device;
2047 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2048
2049 device = kzalloc(sizeof(*device), GFP_NOFS);
2050 list_add(&device->dev_list,
2051 &fs_devices->devices);
2052 list_add(&device->dev_alloc_list,
2053 &fs_devices->alloc_list);
2054 device->barriers = 1;
2055 device->dev_root = root->fs_info->dev_root;
2056 device->devid = devid;
2057 fs_devices->num_devices++;
2058 spin_lock_init(&device->io_lock);
2059 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2060 return device;
2061 }
2062
2063
2064 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2065 struct extent_buffer *leaf,
2066 struct btrfs_chunk *chunk)
2067 {
2068 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2069 struct map_lookup *map;
2070 struct extent_map *em;
2071 u64 logical;
2072 u64 length;
2073 u64 devid;
2074 u8 uuid[BTRFS_UUID_SIZE];
2075 int num_stripes;
2076 int ret;
2077 int i;
2078
2079 logical = key->offset;
2080 length = btrfs_chunk_length(leaf, chunk);
2081
2082 spin_lock(&map_tree->map_tree.lock);
2083 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
2084 spin_unlock(&map_tree->map_tree.lock);
2085
2086 /* already mapped? */
2087 if (em && em->start <= logical && em->start + em->len > logical) {
2088 free_extent_map(em);
2089 return 0;
2090 } else if (em) {
2091 free_extent_map(em);
2092 }
2093
2094 map = kzalloc(sizeof(*map), GFP_NOFS);
2095 if (!map)
2096 return -ENOMEM;
2097
2098 em = alloc_extent_map(GFP_NOFS);
2099 if (!em)
2100 return -ENOMEM;
2101 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2102 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2103 if (!map) {
2104 free_extent_map(em);
2105 return -ENOMEM;
2106 }
2107
2108 em->bdev = (struct block_device *)map;
2109 em->start = logical;
2110 em->len = length;
2111 em->block_start = 0;
2112
2113 map->num_stripes = num_stripes;
2114 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2115 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2116 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2117 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2118 map->type = btrfs_chunk_type(leaf, chunk);
2119 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
2120 for (i = 0; i < num_stripes; i++) {
2121 map->stripes[i].physical =
2122 btrfs_stripe_offset_nr(leaf, chunk, i);
2123 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
2124 read_extent_buffer(leaf, uuid, (unsigned long)
2125 btrfs_stripe_dev_uuid_nr(chunk, i),
2126 BTRFS_UUID_SIZE);
2127 map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
2128
2129 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
2130 kfree(map);
2131 free_extent_map(em);
2132 return -EIO;
2133 }
2134 if (!map->stripes[i].dev) {
2135 map->stripes[i].dev =
2136 add_missing_dev(root, devid, uuid);
2137 if (!map->stripes[i].dev) {
2138 kfree(map);
2139 free_extent_map(em);
2140 return -EIO;
2141 }
2142 }
2143 map->stripes[i].dev->in_fs_metadata = 1;
2144 }
2145
2146 spin_lock(&map_tree->map_tree.lock);
2147 ret = add_extent_mapping(&map_tree->map_tree, em);
2148 spin_unlock(&map_tree->map_tree.lock);
2149 BUG_ON(ret);
2150 free_extent_map(em);
2151
2152 return 0;
2153 }
2154
2155 static int fill_device_from_item(struct extent_buffer *leaf,
2156 struct btrfs_dev_item *dev_item,
2157 struct btrfs_device *device)
2158 {
2159 unsigned long ptr;
2160
2161 device->devid = btrfs_device_id(leaf, dev_item);
2162 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2163 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2164 device->type = btrfs_device_type(leaf, dev_item);
2165 device->io_align = btrfs_device_io_align(leaf, dev_item);
2166 device->io_width = btrfs_device_io_width(leaf, dev_item);
2167 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
2168
2169 ptr = (unsigned long)btrfs_device_uuid(dev_item);
2170 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
2171
2172 return 0;
2173 }
2174
2175 static int read_one_dev(struct btrfs_root *root,
2176 struct extent_buffer *leaf,
2177 struct btrfs_dev_item *dev_item)
2178 {
2179 struct btrfs_device *device;
2180 u64 devid;
2181 int ret;
2182 u8 dev_uuid[BTRFS_UUID_SIZE];
2183
2184 devid = btrfs_device_id(leaf, dev_item);
2185 read_extent_buffer(leaf, dev_uuid,
2186 (unsigned long)btrfs_device_uuid(dev_item),
2187 BTRFS_UUID_SIZE);
2188 device = btrfs_find_device(root, devid, dev_uuid);
2189 if (!device) {
2190 printk("warning devid %Lu missing\n", devid);
2191 device = add_missing_dev(root, devid, dev_uuid);
2192 if (!device)
2193 return -ENOMEM;
2194 }
2195
2196 fill_device_from_item(leaf, dev_item, device);
2197 device->dev_root = root->fs_info->dev_root;
2198 device->in_fs_metadata = 1;
2199 ret = 0;
2200 #if 0
2201 ret = btrfs_open_device(device);
2202 if (ret) {
2203 kfree(device);
2204 }
2205 #endif
2206 return ret;
2207 }
2208
2209 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2210 {
2211 struct btrfs_dev_item *dev_item;
2212
2213 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2214 dev_item);
2215 return read_one_dev(root, buf, dev_item);
2216 }
2217
2218 int btrfs_read_sys_array(struct btrfs_root *root)
2219 {
2220 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
2221 struct extent_buffer *sb;
2222 struct btrfs_disk_key *disk_key;
2223 struct btrfs_chunk *chunk;
2224 u8 *ptr;
2225 unsigned long sb_ptr;
2226 int ret = 0;
2227 u32 num_stripes;
2228 u32 array_size;
2229 u32 len = 0;
2230 u32 cur;
2231 struct btrfs_key key;
2232
2233 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
2234 BTRFS_SUPER_INFO_SIZE);
2235 if (!sb)
2236 return -ENOMEM;
2237 btrfs_set_buffer_uptodate(sb);
2238 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
2239 array_size = btrfs_super_sys_array_size(super_copy);
2240
2241 ptr = super_copy->sys_chunk_array;
2242 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
2243 cur = 0;
2244
2245 while (cur < array_size) {
2246 disk_key = (struct btrfs_disk_key *)ptr;
2247 btrfs_disk_key_to_cpu(&key, disk_key);
2248
2249 len = sizeof(*disk_key); ptr += len;
2250 sb_ptr += len;
2251 cur += len;
2252
2253 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2254 chunk = (struct btrfs_chunk *)sb_ptr;
2255 ret = read_one_chunk(root, &key, sb, chunk);
2256 if (ret)
2257 break;
2258 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
2259 len = btrfs_chunk_item_size(num_stripes);
2260 } else {
2261 ret = -EIO;
2262 break;
2263 }
2264 ptr += len;
2265 sb_ptr += len;
2266 cur += len;
2267 }
2268 free_extent_buffer(sb);
2269 return ret;
2270 }
2271
2272 int btrfs_read_chunk_tree(struct btrfs_root *root)
2273 {
2274 struct btrfs_path *path;
2275 struct extent_buffer *leaf;
2276 struct btrfs_key key;
2277 struct btrfs_key found_key;
2278 int ret;
2279 int slot;
2280
2281 root = root->fs_info->chunk_root;
2282
2283 path = btrfs_alloc_path();
2284 if (!path)
2285 return -ENOMEM;
2286
2287 /* first we search for all of the device items, and then we
2288 * read in all of the chunk items. This way we can create chunk
2289 * mappings that reference all of the devices that are afound
2290 */
2291 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2292 key.offset = 0;
2293 key.type = 0;
2294 again:
2295 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2296 while(1) {
2297 leaf = path->nodes[0];
2298 slot = path->slots[0];
2299 if (slot >= btrfs_header_nritems(leaf)) {
2300 ret = btrfs_next_leaf(root, path);
2301 if (ret == 0)
2302 continue;
2303 if (ret < 0)
2304 goto error;
2305 break;
2306 }
2307 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2308 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2309 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
2310 break;
2311 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
2312 struct btrfs_dev_item *dev_item;
2313 dev_item = btrfs_item_ptr(leaf, slot,
2314 struct btrfs_dev_item);
2315 ret = read_one_dev(root, leaf, dev_item);
2316 BUG_ON(ret);
2317 }
2318 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
2319 struct btrfs_chunk *chunk;
2320 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2321 ret = read_one_chunk(root, &found_key, leaf, chunk);
2322 }
2323 path->slots[0]++;
2324 }
2325 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2326 key.objectid = 0;
2327 btrfs_release_path(root, path);
2328 goto again;
2329 }
2330
2331 btrfs_free_path(path);
2332 ret = 0;
2333 error:
2334 return ret;
2335 }
2336
This page took 0.105828 seconds and 5 git commands to generate.