9e41ea93ebcef5f011f60cd915393c5271a805e5
[deliverable/linux.git] / fs / btrfs / disk-io.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/fs.h>
20 #include <linux/blkdev.h>
21 #include <linux/scatterlist.h>
22 #include <linux/swap.h>
23 #include <linux/radix-tree.h>
24 #include <linux/writeback.h>
25 #include <linux/buffer_head.h> // for block_sync_page
26 #include <linux/workqueue.h>
27 #include "crc32c.h"
28 #include "ctree.h"
29 #include "disk-io.h"
30 #include "transaction.h"
31 #include "btrfs_inode.h"
32 #include "volumes.h"
33 #include "print-tree.h"
34
35 #if 0
36 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
37 {
38 if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
39 printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
40 (unsigned long long)extent_buffer_blocknr(buf),
41 (unsigned long long)btrfs_header_blocknr(buf));
42 return 1;
43 }
44 return 0;
45 }
46 #endif
47
48 static struct extent_io_ops btree_extent_io_ops;
49 static struct workqueue_struct *end_io_workqueue;
50 static struct workqueue_struct *async_submit_workqueue;
51
52 struct end_io_wq {
53 struct bio *bio;
54 bio_end_io_t *end_io;
55 void *private;
56 struct btrfs_fs_info *info;
57 int error;
58 int metadata;
59 struct list_head list;
60 };
61
62 struct async_submit_bio {
63 struct inode *inode;
64 struct bio *bio;
65 struct list_head list;
66 extent_submit_bio_hook_t *submit_bio_hook;
67 int rw;
68 int mirror_num;
69 };
70
71 struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
72 size_t page_offset, u64 start, u64 len,
73 int create)
74 {
75 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
76 struct extent_map *em;
77 int ret;
78
79 again:
80 spin_lock(&em_tree->lock);
81 em = lookup_extent_mapping(em_tree, start, len);
82 spin_unlock(&em_tree->lock);
83 if (em) {
84 goto out;
85 }
86 em = alloc_extent_map(GFP_NOFS);
87 if (!em) {
88 em = ERR_PTR(-ENOMEM);
89 goto out;
90 }
91 em->start = 0;
92 em->len = i_size_read(inode);
93 em->block_start = 0;
94 em->bdev = inode->i_sb->s_bdev;
95
96 spin_lock(&em_tree->lock);
97 ret = add_extent_mapping(em_tree, em);
98 spin_unlock(&em_tree->lock);
99
100 if (ret == -EEXIST) {
101 free_extent_map(em);
102 em = NULL;
103 goto again;
104 } else if (ret) {
105 em = ERR_PTR(ret);
106 }
107 out:
108 return em;
109 }
110
111 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
112 {
113 return btrfs_crc32c(seed, data, len);
114 }
115
116 void btrfs_csum_final(u32 crc, char *result)
117 {
118 *(__le32 *)result = ~cpu_to_le32(crc);
119 }
120
121 static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
122 int verify)
123 {
124 char result[BTRFS_CRC32_SIZE];
125 unsigned long len;
126 unsigned long cur_len;
127 unsigned long offset = BTRFS_CSUM_SIZE;
128 char *map_token = NULL;
129 char *kaddr;
130 unsigned long map_start;
131 unsigned long map_len;
132 int err;
133 u32 crc = ~(u32)0;
134
135 len = buf->len - offset;
136 while(len > 0) {
137 err = map_private_extent_buffer(buf, offset, 32,
138 &map_token, &kaddr,
139 &map_start, &map_len, KM_USER0);
140 if (err) {
141 printk("failed to map extent buffer! %lu\n",
142 offset);
143 return 1;
144 }
145 cur_len = min(len, map_len - (offset - map_start));
146 crc = btrfs_csum_data(root, kaddr + offset - map_start,
147 crc, cur_len);
148 len -= cur_len;
149 offset += cur_len;
150 unmap_extent_buffer(buf, map_token, KM_USER0);
151 }
152 btrfs_csum_final(crc, result);
153
154 if (verify) {
155 int from_this_trans = 0;
156
157 if (root->fs_info->running_transaction &&
158 btrfs_header_generation(buf) ==
159 root->fs_info->running_transaction->transid)
160 from_this_trans = 1;
161
162 /* FIXME, this is not good */
163 if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
164 u32 val;
165 u32 found = 0;
166 memcpy(&found, result, BTRFS_CRC32_SIZE);
167
168 read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
169 printk("btrfs: %s checksum verify failed on %llu "
170 "wanted %X found %X from_this_trans %d "
171 "level %d\n",
172 root->fs_info->sb->s_id,
173 buf->start, val, found, from_this_trans,
174 btrfs_header_level(buf));
175 return 1;
176 }
177 } else {
178 write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
179 }
180 return 0;
181 }
182
183 static int btree_read_extent_buffer_pages(struct btrfs_root *root,
184 struct extent_buffer *eb,
185 u64 start)
186 {
187 struct extent_io_tree *io_tree;
188 int ret;
189 int num_copies = 0;
190 int mirror_num = 0;
191
192 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
193 while (1) {
194 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
195 btree_get_extent, mirror_num);
196 if (!ret) {
197 if (mirror_num)
198 printk("good read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies);
199 return ret;
200 }
201 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
202 eb->start, eb->len);
203 printk("failed to read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies);
204 if (num_copies == 1) {
205 printk("reading %Lu failed only one copy\n", eb->start);
206 return ret;
207 }
208 mirror_num++;
209 if (mirror_num > num_copies) {
210 printk("bailing at mirror %d of %d\n", mirror_num, num_copies);
211 return ret;
212 }
213 }
214 printk("read extent buffer page last\n");
215 return -EIO;
216 }
217
218 int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
219 {
220 struct extent_io_tree *tree;
221 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
222 u64 found_start;
223 int found_level;
224 unsigned long len;
225 struct extent_buffer *eb;
226 int ret;
227
228 tree = &BTRFS_I(page->mapping->host)->io_tree;
229
230 if (page->private == EXTENT_PAGE_PRIVATE)
231 goto out;
232 if (!page->private)
233 goto out;
234 len = page->private >> 2;
235 if (len == 0) {
236 WARN_ON(1);
237 }
238 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
239 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE);
240 BUG_ON(ret);
241 btrfs_clear_buffer_defrag(eb);
242 found_start = btrfs_header_bytenr(eb);
243 if (found_start != start) {
244 printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
245 start, found_start, len);
246 WARN_ON(1);
247 goto err;
248 }
249 if (eb->first_page != page) {
250 printk("bad first page %lu %lu\n", eb->first_page->index,
251 page->index);
252 WARN_ON(1);
253 goto err;
254 }
255 if (!PageUptodate(page)) {
256 printk("csum not up to date page %lu\n", page->index);
257 WARN_ON(1);
258 goto err;
259 }
260 found_level = btrfs_header_level(eb);
261 spin_lock(&root->fs_info->hash_lock);
262 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
263 spin_unlock(&root->fs_info->hash_lock);
264 csum_tree_block(root, eb, 0);
265 err:
266 free_extent_buffer(eb);
267 out:
268 return 0;
269 }
270
271 static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
272 {
273 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
274
275 csum_dirty_buffer(root, page);
276 return 0;
277 }
278
279 int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
280 struct extent_state *state)
281 {
282 struct extent_io_tree *tree;
283 u64 found_start;
284 int found_level;
285 unsigned long len;
286 struct extent_buffer *eb;
287 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
288 int ret = 0;
289
290 tree = &BTRFS_I(page->mapping->host)->io_tree;
291 if (page->private == EXTENT_PAGE_PRIVATE)
292 goto out;
293 if (!page->private)
294 goto out;
295 len = page->private >> 2;
296 if (len == 0) {
297 WARN_ON(1);
298 }
299 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
300
301 btrfs_clear_buffer_defrag(eb);
302 found_start = btrfs_header_bytenr(eb);
303 if (found_start != start) {
304 printk("bad start on %Lu found %Lu\n", eb->start, found_start);
305 ret = -EIO;
306 goto err;
307 }
308 if (eb->first_page != page) {
309 printk("bad first page %lu %lu\n", eb->first_page->index,
310 page->index);
311 WARN_ON(1);
312 ret = -EIO;
313 goto err;
314 }
315 found_level = btrfs_header_level(eb);
316
317 ret = csum_tree_block(root, eb, 1);
318 if (ret)
319 ret = -EIO;
320
321 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
322 end = eb->start + end - 1;
323 release_extent_buffer_tail_pages(eb);
324 err:
325 free_extent_buffer(eb);
326 out:
327 return ret;
328 }
329
330 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
331 static void end_workqueue_bio(struct bio *bio, int err)
332 #else
333 static int end_workqueue_bio(struct bio *bio,
334 unsigned int bytes_done, int err)
335 #endif
336 {
337 struct end_io_wq *end_io_wq = bio->bi_private;
338 struct btrfs_fs_info *fs_info;
339 unsigned long flags;
340
341 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
342 if (bio->bi_size)
343 return 1;
344 #endif
345
346 fs_info = end_io_wq->info;
347 spin_lock_irqsave(&fs_info->end_io_work_lock, flags);
348 end_io_wq->error = err;
349 list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list);
350 spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags);
351 queue_work(end_io_workqueue, &fs_info->end_io_work);
352
353 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
354 return 0;
355 #endif
356 }
357
358 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
359 int metadata)
360 {
361 struct end_io_wq *end_io_wq;
362 end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
363 if (!end_io_wq)
364 return -ENOMEM;
365
366 end_io_wq->private = bio->bi_private;
367 end_io_wq->end_io = bio->bi_end_io;
368 end_io_wq->info = info;
369 end_io_wq->error = 0;
370 end_io_wq->bio = bio;
371 end_io_wq->metadata = metadata;
372
373 bio->bi_private = end_io_wq;
374 bio->bi_end_io = end_workqueue_bio;
375 return 0;
376 }
377
378 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
379 int rw, struct bio *bio, int mirror_num,
380 extent_submit_bio_hook_t *submit_bio_hook)
381 {
382 struct async_submit_bio *async;
383
384 async = kmalloc(sizeof(*async), GFP_NOFS);
385 if (!async)
386 return -ENOMEM;
387
388 async->inode = inode;
389 async->rw = rw;
390 async->bio = bio;
391 async->mirror_num = mirror_num;
392 async->submit_bio_hook = submit_bio_hook;
393
394 spin_lock(&fs_info->async_submit_work_lock);
395 list_add_tail(&async->list, &fs_info->async_submit_work_list);
396 spin_unlock(&fs_info->async_submit_work_lock);
397
398 queue_work(async_submit_workqueue, &fs_info->async_submit_work);
399 return 0;
400 }
401
402 static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
403 int mirror_num)
404 {
405 struct btrfs_root *root = BTRFS_I(inode)->root;
406 u64 offset;
407 int ret;
408
409 offset = bio->bi_sector << 9;
410
411 if (rw & (1 << BIO_RW)) {
412 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
413 }
414
415 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1);
416 BUG_ON(ret);
417
418 if (offset == BTRFS_SUPER_INFO_OFFSET) {
419 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
420 submit_bio(rw, bio);
421 return 0;
422 }
423 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
424 }
425
426 static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
427 int mirror_num)
428 {
429 if (!(rw & (1 << BIO_RW))) {
430 return __btree_submit_bio_hook(inode, rw, bio, mirror_num);
431 }
432 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
433 inode, rw, bio, mirror_num,
434 __btree_submit_bio_hook);
435 }
436
437 static int btree_writepage(struct page *page, struct writeback_control *wbc)
438 {
439 struct extent_io_tree *tree;
440 tree = &BTRFS_I(page->mapping->host)->io_tree;
441 return extent_write_full_page(tree, page, btree_get_extent, wbc);
442 }
443
444 static int btree_writepages(struct address_space *mapping,
445 struct writeback_control *wbc)
446 {
447 struct extent_io_tree *tree;
448 tree = &BTRFS_I(mapping->host)->io_tree;
449 if (wbc->sync_mode == WB_SYNC_NONE) {
450 u64 num_dirty;
451 u64 start = 0;
452 unsigned long thresh = 96 * 1024 * 1024;
453
454 if (wbc->for_kupdate)
455 return 0;
456
457 if (current_is_pdflush()) {
458 thresh = 96 * 1024 * 1024;
459 } else {
460 thresh = 8 * 1024 * 1024;
461 }
462 num_dirty = count_range_bits(tree, &start, (u64)-1,
463 thresh, EXTENT_DIRTY);
464 if (num_dirty < thresh) {
465 return 0;
466 }
467 }
468 return extent_writepages(tree, mapping, btree_get_extent, wbc);
469 }
470
471 int btree_readpage(struct file *file, struct page *page)
472 {
473 struct extent_io_tree *tree;
474 tree = &BTRFS_I(page->mapping->host)->io_tree;
475 return extent_read_full_page(tree, page, btree_get_extent);
476 }
477
478 static int btree_releasepage(struct page *page, gfp_t gfp_flags)
479 {
480 struct extent_io_tree *tree;
481 struct extent_map_tree *map;
482 int ret;
483
484 if (page_count(page) > 3) {
485 /* once for page->private, once for the caller, once
486 * once for the page cache
487 */
488 return 0;
489 }
490 tree = &BTRFS_I(page->mapping->host)->io_tree;
491 map = &BTRFS_I(page->mapping->host)->extent_tree;
492 ret = try_release_extent_mapping(map, tree, page, gfp_flags);
493 if (ret == 1) {
494 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
495 ClearPagePrivate(page);
496 set_page_private(page, 0);
497 page_cache_release(page);
498 }
499 return ret;
500 }
501
502 static void btree_invalidatepage(struct page *page, unsigned long offset)
503 {
504 struct extent_io_tree *tree;
505 tree = &BTRFS_I(page->mapping->host)->io_tree;
506 extent_invalidatepage(tree, page, offset);
507 btree_releasepage(page, GFP_NOFS);
508 }
509
510 #if 0
511 static int btree_writepage(struct page *page, struct writeback_control *wbc)
512 {
513 struct buffer_head *bh;
514 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
515 struct buffer_head *head;
516 if (!page_has_buffers(page)) {
517 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
518 (1 << BH_Dirty)|(1 << BH_Uptodate));
519 }
520 head = page_buffers(page);
521 bh = head;
522 do {
523 if (buffer_dirty(bh))
524 csum_tree_block(root, bh, 0);
525 bh = bh->b_this_page;
526 } while (bh != head);
527 return block_write_full_page(page, btree_get_block, wbc);
528 }
529 #endif
530
531 static struct address_space_operations btree_aops = {
532 .readpage = btree_readpage,
533 .writepage = btree_writepage,
534 .writepages = btree_writepages,
535 .releasepage = btree_releasepage,
536 .invalidatepage = btree_invalidatepage,
537 .sync_page = block_sync_page,
538 };
539
540 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
541 {
542 struct extent_buffer *buf = NULL;
543 struct inode *btree_inode = root->fs_info->btree_inode;
544 int ret = 0;
545
546 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
547 if (!buf)
548 return 0;
549 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
550 buf, 0, 0, btree_get_extent, 0);
551 free_extent_buffer(buf);
552 return ret;
553 }
554
555 static int close_all_devices(struct btrfs_fs_info *fs_info)
556 {
557 struct list_head *list;
558 struct list_head *next;
559 struct btrfs_device *device;
560
561 list = &fs_info->fs_devices->devices;
562 list_for_each(next, list) {
563 device = list_entry(next, struct btrfs_device, dev_list);
564 if (device->bdev && device->bdev != fs_info->sb->s_bdev)
565 close_bdev_excl(device->bdev);
566 device->bdev = NULL;
567 }
568 return 0;
569 }
570
571 int btrfs_verify_block_csum(struct btrfs_root *root,
572 struct extent_buffer *buf)
573 {
574 return btrfs_buffer_uptodate(buf);
575 }
576
577 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
578 u64 bytenr, u32 blocksize)
579 {
580 struct inode *btree_inode = root->fs_info->btree_inode;
581 struct extent_buffer *eb;
582 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
583 bytenr, blocksize, GFP_NOFS);
584 return eb;
585 }
586
587 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
588 u64 bytenr, u32 blocksize)
589 {
590 struct inode *btree_inode = root->fs_info->btree_inode;
591 struct extent_buffer *eb;
592
593 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
594 bytenr, blocksize, NULL, GFP_NOFS);
595 return eb;
596 }
597
598
599 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
600 u32 blocksize)
601 {
602 struct extent_buffer *buf = NULL;
603 struct inode *btree_inode = root->fs_info->btree_inode;
604 struct extent_io_tree *io_tree;
605 int ret;
606
607 io_tree = &BTRFS_I(btree_inode)->io_tree;
608
609 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
610 if (!buf)
611 return NULL;
612
613 ret = btree_read_extent_buffer_pages(root, buf, 0);
614
615 if (ret == 0) {
616 buf->flags |= EXTENT_UPTODATE;
617 }
618 return buf;
619
620 }
621
622 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
623 struct extent_buffer *buf)
624 {
625 struct inode *btree_inode = root->fs_info->btree_inode;
626 if (btrfs_header_generation(buf) ==
627 root->fs_info->running_transaction->transid)
628 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
629 buf);
630 return 0;
631 }
632
633 int wait_on_tree_block_writeback(struct btrfs_root *root,
634 struct extent_buffer *buf)
635 {
636 struct inode *btree_inode = root->fs_info->btree_inode;
637 wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree,
638 buf);
639 return 0;
640 }
641
642 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
643 u32 stripesize, struct btrfs_root *root,
644 struct btrfs_fs_info *fs_info,
645 u64 objectid)
646 {
647 root->node = NULL;
648 root->inode = NULL;
649 root->commit_root = NULL;
650 root->sectorsize = sectorsize;
651 root->nodesize = nodesize;
652 root->leafsize = leafsize;
653 root->stripesize = stripesize;
654 root->ref_cows = 0;
655 root->track_dirty = 0;
656
657 root->fs_info = fs_info;
658 root->objectid = objectid;
659 root->last_trans = 0;
660 root->highest_inode = 0;
661 root->last_inode_alloc = 0;
662 root->name = NULL;
663 root->in_sysfs = 0;
664
665 INIT_LIST_HEAD(&root->dirty_list);
666 memset(&root->root_key, 0, sizeof(root->root_key));
667 memset(&root->root_item, 0, sizeof(root->root_item));
668 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
669 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
670 init_completion(&root->kobj_unregister);
671 root->defrag_running = 0;
672 root->defrag_level = 0;
673 root->root_key.objectid = objectid;
674 return 0;
675 }
676
677 static int find_and_setup_root(struct btrfs_root *tree_root,
678 struct btrfs_fs_info *fs_info,
679 u64 objectid,
680 struct btrfs_root *root)
681 {
682 int ret;
683 u32 blocksize;
684
685 __setup_root(tree_root->nodesize, tree_root->leafsize,
686 tree_root->sectorsize, tree_root->stripesize,
687 root, fs_info, objectid);
688 ret = btrfs_find_last_root(tree_root, objectid,
689 &root->root_item, &root->root_key);
690 BUG_ON(ret);
691
692 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
693 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
694 blocksize);
695 BUG_ON(!root->node);
696 return 0;
697 }
698
699 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
700 struct btrfs_key *location)
701 {
702 struct btrfs_root *root;
703 struct btrfs_root *tree_root = fs_info->tree_root;
704 struct btrfs_path *path;
705 struct extent_buffer *l;
706 u64 highest_inode;
707 u32 blocksize;
708 int ret = 0;
709
710 root = kzalloc(sizeof(*root), GFP_NOFS);
711 if (!root)
712 return ERR_PTR(-ENOMEM);
713 if (location->offset == (u64)-1) {
714 ret = find_and_setup_root(tree_root, fs_info,
715 location->objectid, root);
716 if (ret) {
717 kfree(root);
718 return ERR_PTR(ret);
719 }
720 goto insert;
721 }
722
723 __setup_root(tree_root->nodesize, tree_root->leafsize,
724 tree_root->sectorsize, tree_root->stripesize,
725 root, fs_info, location->objectid);
726
727 path = btrfs_alloc_path();
728 BUG_ON(!path);
729 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
730 if (ret != 0) {
731 if (ret > 0)
732 ret = -ENOENT;
733 goto out;
734 }
735 l = path->nodes[0];
736 read_extent_buffer(l, &root->root_item,
737 btrfs_item_ptr_offset(l, path->slots[0]),
738 sizeof(root->root_item));
739 memcpy(&root->root_key, location, sizeof(*location));
740 ret = 0;
741 out:
742 btrfs_release_path(root, path);
743 btrfs_free_path(path);
744 if (ret) {
745 kfree(root);
746 return ERR_PTR(ret);
747 }
748 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
749 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
750 blocksize);
751 BUG_ON(!root->node);
752 insert:
753 root->ref_cows = 1;
754 ret = btrfs_find_highest_inode(root, &highest_inode);
755 if (ret == 0) {
756 root->highest_inode = highest_inode;
757 root->last_inode_alloc = highest_inode;
758 }
759 return root;
760 }
761
762 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
763 u64 root_objectid)
764 {
765 struct btrfs_root *root;
766
767 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
768 return fs_info->tree_root;
769 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
770 return fs_info->extent_root;
771
772 root = radix_tree_lookup(&fs_info->fs_roots_radix,
773 (unsigned long)root_objectid);
774 return root;
775 }
776
777 struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
778 struct btrfs_key *location)
779 {
780 struct btrfs_root *root;
781 int ret;
782
783 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
784 return fs_info->tree_root;
785 if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
786 return fs_info->extent_root;
787
788 root = radix_tree_lookup(&fs_info->fs_roots_radix,
789 (unsigned long)location->objectid);
790 if (root)
791 return root;
792
793 root = btrfs_read_fs_root_no_radix(fs_info, location);
794 if (IS_ERR(root))
795 return root;
796 ret = radix_tree_insert(&fs_info->fs_roots_radix,
797 (unsigned long)root->root_key.objectid,
798 root);
799 if (ret) {
800 free_extent_buffer(root->node);
801 kfree(root);
802 return ERR_PTR(ret);
803 }
804 ret = btrfs_find_dead_roots(fs_info->tree_root,
805 root->root_key.objectid, root);
806 BUG_ON(ret);
807
808 return root;
809 }
810
811 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
812 struct btrfs_key *location,
813 const char *name, int namelen)
814 {
815 struct btrfs_root *root;
816 int ret;
817
818 root = btrfs_read_fs_root_no_name(fs_info, location);
819 if (!root)
820 return NULL;
821
822 if (root->in_sysfs)
823 return root;
824
825 ret = btrfs_set_root_name(root, name, namelen);
826 if (ret) {
827 free_extent_buffer(root->node);
828 kfree(root);
829 return ERR_PTR(ret);
830 }
831
832 ret = btrfs_sysfs_add_root(root);
833 if (ret) {
834 free_extent_buffer(root->node);
835 kfree(root->name);
836 kfree(root);
837 return ERR_PTR(ret);
838 }
839 root->in_sysfs = 1;
840 return root;
841 }
842 #if 0
843 static int add_hasher(struct btrfs_fs_info *info, char *type) {
844 struct btrfs_hasher *hasher;
845
846 hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
847 if (!hasher)
848 return -ENOMEM;
849 hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
850 if (!hasher->hash_tfm) {
851 kfree(hasher);
852 return -EINVAL;
853 }
854 spin_lock(&info->hash_lock);
855 list_add(&hasher->list, &info->hashers);
856 spin_unlock(&info->hash_lock);
857 return 0;
858 }
859 #endif
860
861 static int btrfs_congested_fn(void *congested_data, int bdi_bits)
862 {
863 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
864 int ret = 0;
865 struct list_head *cur;
866 struct btrfs_device *device;
867 struct backing_dev_info *bdi;
868
869 list_for_each(cur, &info->fs_devices->devices) {
870 device = list_entry(cur, struct btrfs_device, dev_list);
871 bdi = blk_get_backing_dev_info(device->bdev);
872 if (bdi && bdi_congested(bdi, bdi_bits)) {
873 ret = 1;
874 break;
875 }
876 }
877 return ret;
878 }
879
880 void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
881 {
882 struct list_head *cur;
883 struct btrfs_device *device;
884 struct btrfs_fs_info *info;
885
886 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
887 list_for_each(cur, &info->fs_devices->devices) {
888 device = list_entry(cur, struct btrfs_device, dev_list);
889 bdi = blk_get_backing_dev_info(device->bdev);
890 if (bdi->unplug_io_fn) {
891 bdi->unplug_io_fn(bdi, page);
892 }
893 }
894 }
895
896 static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
897 {
898 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
899 bdi_init(bdi);
900 #endif
901 bdi->ra_pages = default_backing_dev_info.ra_pages * 4;
902 bdi->state = 0;
903 bdi->capabilities = default_backing_dev_info.capabilities;
904 bdi->unplug_io_fn = btrfs_unplug_io_fn;
905 bdi->unplug_io_data = info;
906 bdi->congested_fn = btrfs_congested_fn;
907 bdi->congested_data = info;
908 return 0;
909 }
910
911 static int bio_ready_for_csum(struct bio *bio)
912 {
913 u64 length = 0;
914 u64 buf_len = 0;
915 u64 start = 0;
916 struct page *page;
917 struct extent_io_tree *io_tree = NULL;
918 struct btrfs_fs_info *info = NULL;
919 struct bio_vec *bvec;
920 int i;
921 int ret;
922
923 bio_for_each_segment(bvec, bio, i) {
924 page = bvec->bv_page;
925 if (page->private == EXTENT_PAGE_PRIVATE) {
926 length += bvec->bv_len;
927 continue;
928 }
929 if (!page->private) {
930 length += bvec->bv_len;
931 continue;
932 }
933 length = bvec->bv_len;
934 buf_len = page->private >> 2;
935 start = page_offset(page) + bvec->bv_offset;
936 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
937 info = BTRFS_I(page->mapping->host)->root->fs_info;
938 }
939 /* are we fully contained in this bio? */
940 if (buf_len <= length)
941 return 1;
942
943 ret = extent_range_uptodate(io_tree, start + length,
944 start + buf_len - 1);
945 if (ret == 1)
946 return ret;
947 return ret;
948 }
949
950 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
951 static void btrfs_end_io_csum(void *p)
952 #else
953 static void btrfs_end_io_csum(struct work_struct *work)
954 #endif
955 {
956 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
957 struct btrfs_fs_info *fs_info = p;
958 #else
959 struct btrfs_fs_info *fs_info = container_of(work,
960 struct btrfs_fs_info,
961 end_io_work);
962 #endif
963 unsigned long flags;
964 struct end_io_wq *end_io_wq;
965 struct bio *bio;
966 struct list_head *next;
967 int error;
968 int was_empty;
969
970 while(1) {
971 spin_lock_irqsave(&fs_info->end_io_work_lock, flags);
972 if (list_empty(&fs_info->end_io_work_list)) {
973 spin_unlock_irqrestore(&fs_info->end_io_work_lock,
974 flags);
975 return;
976 }
977 next = fs_info->end_io_work_list.next;
978 list_del(next);
979 spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags);
980
981 end_io_wq = list_entry(next, struct end_io_wq, list);
982
983 bio = end_io_wq->bio;
984 if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
985 spin_lock_irqsave(&fs_info->end_io_work_lock, flags);
986 was_empty = list_empty(&fs_info->end_io_work_list);
987 list_add_tail(&end_io_wq->list,
988 &fs_info->end_io_work_list);
989 spin_unlock_irqrestore(&fs_info->end_io_work_lock,
990 flags);
991 if (was_empty)
992 return;
993 continue;
994 }
995 error = end_io_wq->error;
996 bio->bi_private = end_io_wq->private;
997 bio->bi_end_io = end_io_wq->end_io;
998 kfree(end_io_wq);
999 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1000 bio_endio(bio, bio->bi_size, error);
1001 #else
1002 bio_endio(bio, error);
1003 #endif
1004 }
1005 }
1006
1007 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1008 static void btrfs_async_submit_work(void *p)
1009 #else
1010 static void btrfs_async_submit_work(struct work_struct *work)
1011 #endif
1012 {
1013 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1014 struct btrfs_fs_info *fs_info = p;
1015 #else
1016 struct btrfs_fs_info *fs_info = container_of(work,
1017 struct btrfs_fs_info,
1018 async_submit_work);
1019 #endif
1020 struct async_submit_bio *async;
1021 struct list_head *next;
1022
1023 while(1) {
1024 spin_lock(&fs_info->async_submit_work_lock);
1025 if (list_empty(&fs_info->async_submit_work_list)) {
1026 spin_unlock(&fs_info->async_submit_work_lock);
1027 return;
1028 }
1029 next = fs_info->async_submit_work_list.next;
1030 list_del(next);
1031 spin_unlock(&fs_info->async_submit_work_lock);
1032
1033 async = list_entry(next, struct async_submit_bio, list);
1034 async->submit_bio_hook(async->inode, async->rw, async->bio,
1035 async->mirror_num);
1036 kfree(async);
1037 }
1038 }
1039
1040 struct btrfs_root *open_ctree(struct super_block *sb,
1041 struct btrfs_fs_devices *fs_devices)
1042 {
1043 u32 sectorsize;
1044 u32 nodesize;
1045 u32 leafsize;
1046 u32 blocksize;
1047 u32 stripesize;
1048 struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
1049 GFP_NOFS);
1050 struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
1051 GFP_NOFS);
1052 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
1053 GFP_NOFS);
1054 struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
1055 GFP_NOFS);
1056 struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root),
1057 GFP_NOFS);
1058 int ret;
1059 int err = -EINVAL;
1060 struct btrfs_super_block *disk_super;
1061
1062 if (!extent_root || !tree_root || !fs_info) {
1063 err = -ENOMEM;
1064 goto fail;
1065 }
1066 end_io_workqueue = create_workqueue("btrfs-end-io");
1067 BUG_ON(!end_io_workqueue);
1068 async_submit_workqueue = create_workqueue("btrfs-async-submit");
1069
1070 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
1071 INIT_LIST_HEAD(&fs_info->trans_list);
1072 INIT_LIST_HEAD(&fs_info->dead_roots);
1073 INIT_LIST_HEAD(&fs_info->hashers);
1074 INIT_LIST_HEAD(&fs_info->end_io_work_list);
1075 INIT_LIST_HEAD(&fs_info->async_submit_work_list);
1076 spin_lock_init(&fs_info->hash_lock);
1077 spin_lock_init(&fs_info->end_io_work_lock);
1078 spin_lock_init(&fs_info->async_submit_work_lock);
1079 spin_lock_init(&fs_info->delalloc_lock);
1080 spin_lock_init(&fs_info->new_trans_lock);
1081
1082 init_completion(&fs_info->kobj_unregister);
1083 sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE);
1084 fs_info->tree_root = tree_root;
1085 fs_info->extent_root = extent_root;
1086 fs_info->chunk_root = chunk_root;
1087 fs_info->dev_root = dev_root;
1088 fs_info->fs_devices = fs_devices;
1089 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
1090 INIT_LIST_HEAD(&fs_info->space_info);
1091 btrfs_mapping_init(&fs_info->mapping_tree);
1092 fs_info->sb = sb;
1093 fs_info->max_extent = (u64)-1;
1094 fs_info->max_inline = 8192 * 1024;
1095 setup_bdi(fs_info, &fs_info->bdi);
1096 fs_info->btree_inode = new_inode(sb);
1097 fs_info->btree_inode->i_ino = 1;
1098 fs_info->btree_inode->i_nlink = 1;
1099 fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
1100 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1101 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1102
1103 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1104 fs_info->btree_inode->i_mapping,
1105 GFP_NOFS);
1106 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
1107 GFP_NOFS);
1108
1109 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1110
1111 extent_io_tree_init(&fs_info->free_space_cache,
1112 fs_info->btree_inode->i_mapping, GFP_NOFS);
1113 extent_io_tree_init(&fs_info->block_group_cache,
1114 fs_info->btree_inode->i_mapping, GFP_NOFS);
1115 extent_io_tree_init(&fs_info->pinned_extents,
1116 fs_info->btree_inode->i_mapping, GFP_NOFS);
1117 extent_io_tree_init(&fs_info->pending_del,
1118 fs_info->btree_inode->i_mapping, GFP_NOFS);
1119 extent_io_tree_init(&fs_info->extent_ins,
1120 fs_info->btree_inode->i_mapping, GFP_NOFS);
1121 fs_info->do_barriers = 1;
1122
1123 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1124 INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info);
1125 INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work,
1126 fs_info);
1127 INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
1128 #else
1129 INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum);
1130 INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work);
1131 INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
1132 #endif
1133 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1134 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1135 sizeof(struct btrfs_key));
1136 insert_inode_hash(fs_info->btree_inode);
1137 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
1138
1139 mutex_init(&fs_info->trans_mutex);
1140 mutex_init(&fs_info->fs_mutex);
1141
1142 #if 0
1143 ret = add_hasher(fs_info, "crc32c");
1144 if (ret) {
1145 printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
1146 err = -ENOMEM;
1147 goto fail_iput;
1148 }
1149 #endif
1150 __setup_root(4096, 4096, 4096, 4096, tree_root,
1151 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1152
1153 fs_info->sb_buffer = read_tree_block(tree_root,
1154 BTRFS_SUPER_INFO_OFFSET,
1155 4096);
1156
1157 if (!fs_info->sb_buffer)
1158 goto fail_iput;
1159
1160 read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
1161 sizeof(fs_info->super_copy));
1162
1163 read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
1164 (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
1165 BTRFS_FSID_SIZE);
1166
1167 disk_super = &fs_info->super_copy;
1168 if (!btrfs_super_root(disk_super))
1169 goto fail_sb_buffer;
1170
1171 if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) {
1172 printk("Btrfs: wanted %llu devices, but found %llu\n",
1173 (unsigned long long)btrfs_super_num_devices(disk_super),
1174 (unsigned long long)fs_devices->num_devices);
1175 goto fail_sb_buffer;
1176 }
1177 nodesize = btrfs_super_nodesize(disk_super);
1178 leafsize = btrfs_super_leafsize(disk_super);
1179 sectorsize = btrfs_super_sectorsize(disk_super);
1180 stripesize = btrfs_super_stripesize(disk_super);
1181 tree_root->nodesize = nodesize;
1182 tree_root->leafsize = leafsize;
1183 tree_root->sectorsize = sectorsize;
1184 tree_root->stripesize = stripesize;
1185 sb_set_blocksize(sb, sectorsize);
1186
1187 i_size_write(fs_info->btree_inode,
1188 btrfs_super_total_bytes(disk_super));
1189
1190 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
1191 sizeof(disk_super->magic))) {
1192 printk("btrfs: valid FS not found on %s\n", sb->s_id);
1193 goto fail_sb_buffer;
1194 }
1195
1196 mutex_lock(&fs_info->fs_mutex);
1197
1198 ret = btrfs_read_sys_array(tree_root);
1199 BUG_ON(ret);
1200
1201 blocksize = btrfs_level_size(tree_root,
1202 btrfs_super_chunk_root_level(disk_super));
1203
1204 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1205 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
1206
1207 chunk_root->node = read_tree_block(chunk_root,
1208 btrfs_super_chunk_root(disk_super),
1209 blocksize);
1210 BUG_ON(!chunk_root->node);
1211
1212 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1213 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
1214 BTRFS_UUID_SIZE);
1215
1216 ret = btrfs_read_chunk_tree(chunk_root);
1217 BUG_ON(ret);
1218
1219 blocksize = btrfs_level_size(tree_root,
1220 btrfs_super_root_level(disk_super));
1221
1222
1223 tree_root->node = read_tree_block(tree_root,
1224 btrfs_super_root(disk_super),
1225 blocksize);
1226 if (!tree_root->node)
1227 goto fail_sb_buffer;
1228
1229
1230 ret = find_and_setup_root(tree_root, fs_info,
1231 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
1232 if (ret)
1233 goto fail_tree_root;
1234 extent_root->track_dirty = 1;
1235
1236 ret = find_and_setup_root(tree_root, fs_info,
1237 BTRFS_DEV_TREE_OBJECTID, dev_root);
1238 dev_root->track_dirty = 1;
1239
1240 if (ret)
1241 goto fail_extent_root;
1242
1243 btrfs_read_block_groups(extent_root);
1244
1245 fs_info->generation = btrfs_super_generation(disk_super) + 1;
1246 fs_info->data_alloc_profile = (u64)-1;
1247 fs_info->metadata_alloc_profile = (u64)-1;
1248 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1249
1250 mutex_unlock(&fs_info->fs_mutex);
1251 return tree_root;
1252
1253 fail_extent_root:
1254 free_extent_buffer(extent_root->node);
1255 fail_tree_root:
1256 mutex_unlock(&fs_info->fs_mutex);
1257 free_extent_buffer(tree_root->node);
1258 fail_sb_buffer:
1259 free_extent_buffer(fs_info->sb_buffer);
1260 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1261 fail_iput:
1262 iput(fs_info->btree_inode);
1263 fail:
1264 close_all_devices(fs_info);
1265 kfree(extent_root);
1266 kfree(tree_root);
1267 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
1268 bdi_destroy(&fs_info->bdi);
1269 #endif
1270 kfree(fs_info);
1271 return ERR_PTR(err);
1272 }
1273
1274 static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
1275 {
1276 char b[BDEVNAME_SIZE];
1277
1278 if (uptodate) {
1279 set_buffer_uptodate(bh);
1280 } else {
1281 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
1282 printk(KERN_WARNING "lost page write due to "
1283 "I/O error on %s\n",
1284 bdevname(bh->b_bdev, b));
1285 }
1286 set_buffer_write_io_error(bh);
1287 clear_buffer_uptodate(bh);
1288 }
1289 unlock_buffer(bh);
1290 put_bh(bh);
1291 }
1292
1293 int write_all_supers(struct btrfs_root *root)
1294 {
1295 struct list_head *cur;
1296 struct list_head *head = &root->fs_info->fs_devices->devices;
1297 struct btrfs_device *dev;
1298 struct extent_buffer *sb;
1299 struct btrfs_dev_item *dev_item;
1300 struct buffer_head *bh;
1301 int ret;
1302 int do_barriers;
1303
1304 do_barriers = !btrfs_test_opt(root, NOBARRIER);
1305
1306 sb = root->fs_info->sb_buffer;
1307 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
1308 dev_item);
1309 list_for_each(cur, head) {
1310 dev = list_entry(cur, struct btrfs_device, dev_list);
1311 btrfs_set_device_type(sb, dev_item, dev->type);
1312 btrfs_set_device_id(sb, dev_item, dev->devid);
1313 btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes);
1314 btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used);
1315 btrfs_set_device_io_align(sb, dev_item, dev->io_align);
1316 btrfs_set_device_io_width(sb, dev_item, dev->io_width);
1317 btrfs_set_device_sector_size(sb, dev_item, dev->sector_size);
1318 write_extent_buffer(sb, dev->uuid,
1319 (unsigned long)btrfs_device_uuid(dev_item),
1320 BTRFS_UUID_SIZE);
1321
1322 btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN);
1323 csum_tree_block(root, sb, 0);
1324
1325 bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET /
1326 root->fs_info->sb->s_blocksize,
1327 BTRFS_SUPER_INFO_SIZE);
1328
1329 read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE);
1330 dev->pending_io = bh;
1331
1332 get_bh(bh);
1333 set_buffer_uptodate(bh);
1334 lock_buffer(bh);
1335 bh->b_end_io = btrfs_end_buffer_write_sync;
1336
1337 if (do_barriers && dev->barriers) {
1338 ret = submit_bh(WRITE_BARRIER, bh);
1339 if (ret == -EOPNOTSUPP) {
1340 printk("btrfs: disabling barriers on dev %s\n",
1341 dev->name);
1342 set_buffer_uptodate(bh);
1343 dev->barriers = 0;
1344 get_bh(bh);
1345 lock_buffer(bh);
1346 ret = submit_bh(WRITE, bh);
1347 }
1348 } else {
1349 ret = submit_bh(WRITE, bh);
1350 }
1351 BUG_ON(ret);
1352 }
1353
1354 list_for_each(cur, head) {
1355 dev = list_entry(cur, struct btrfs_device, dev_list);
1356 BUG_ON(!dev->pending_io);
1357 bh = dev->pending_io;
1358 wait_on_buffer(bh);
1359 if (!buffer_uptodate(dev->pending_io)) {
1360 if (do_barriers && dev->barriers) {
1361 printk("btrfs: disabling barriers on dev %s\n",
1362 dev->name);
1363 set_buffer_uptodate(bh);
1364 get_bh(bh);
1365 lock_buffer(bh);
1366 dev->barriers = 0;
1367 ret = submit_bh(WRITE, bh);
1368 BUG_ON(ret);
1369 wait_on_buffer(bh);
1370 BUG_ON(!buffer_uptodate(bh));
1371 } else {
1372 BUG();
1373 }
1374
1375 }
1376 dev->pending_io = NULL;
1377 brelse(bh);
1378 }
1379 return 0;
1380 }
1381
1382 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
1383 *root)
1384 {
1385 int ret;
1386
1387 ret = write_all_supers(root);
1388 #if 0
1389 if (!btrfs_test_opt(root, NOBARRIER))
1390 blkdev_issue_flush(sb->s_bdev, NULL);
1391 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super);
1392 ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
1393 super->start, super->len);
1394 if (!btrfs_test_opt(root, NOBARRIER))
1395 blkdev_issue_flush(sb->s_bdev, NULL);
1396 #endif
1397 return ret;
1398 }
1399
1400 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
1401 {
1402 radix_tree_delete(&fs_info->fs_roots_radix,
1403 (unsigned long)root->root_key.objectid);
1404 if (root->in_sysfs)
1405 btrfs_sysfs_del_root(root);
1406 if (root->inode)
1407 iput(root->inode);
1408 if (root->node)
1409 free_extent_buffer(root->node);
1410 if (root->commit_root)
1411 free_extent_buffer(root->commit_root);
1412 if (root->name)
1413 kfree(root->name);
1414 kfree(root);
1415 return 0;
1416 }
1417
1418 static int del_fs_roots(struct btrfs_fs_info *fs_info)
1419 {
1420 int ret;
1421 struct btrfs_root *gang[8];
1422 int i;
1423
1424 while(1) {
1425 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
1426 (void **)gang, 0,
1427 ARRAY_SIZE(gang));
1428 if (!ret)
1429 break;
1430 for (i = 0; i < ret; i++)
1431 btrfs_free_fs_root(fs_info, gang[i]);
1432 }
1433 return 0;
1434 }
1435
1436 int close_ctree(struct btrfs_root *root)
1437 {
1438 int ret;
1439 struct btrfs_trans_handle *trans;
1440 struct btrfs_fs_info *fs_info = root->fs_info;
1441
1442 fs_info->closing = 1;
1443 btrfs_transaction_flush_work(root);
1444 mutex_lock(&fs_info->fs_mutex);
1445 btrfs_defrag_dirty_roots(root->fs_info);
1446 trans = btrfs_start_transaction(root, 1);
1447 ret = btrfs_commit_transaction(trans, root);
1448 /* run commit again to drop the original snapshot */
1449 trans = btrfs_start_transaction(root, 1);
1450 btrfs_commit_transaction(trans, root);
1451 ret = btrfs_write_and_wait_transaction(NULL, root);
1452 BUG_ON(ret);
1453 write_ctree_super(NULL, root);
1454 mutex_unlock(&fs_info->fs_mutex);
1455
1456 if (fs_info->delalloc_bytes) {
1457 printk("btrfs: at unmount delalloc count %Lu\n",
1458 fs_info->delalloc_bytes);
1459 }
1460 if (fs_info->extent_root->node)
1461 free_extent_buffer(fs_info->extent_root->node);
1462
1463 if (fs_info->tree_root->node)
1464 free_extent_buffer(fs_info->tree_root->node);
1465
1466 if (root->fs_info->chunk_root->node);
1467 free_extent_buffer(root->fs_info->chunk_root->node);
1468
1469 if (root->fs_info->dev_root->node);
1470 free_extent_buffer(root->fs_info->dev_root->node);
1471
1472 free_extent_buffer(fs_info->sb_buffer);
1473
1474 btrfs_free_block_groups(root->fs_info);
1475 del_fs_roots(fs_info);
1476
1477 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
1478
1479 extent_io_tree_empty_lru(&fs_info->free_space_cache);
1480 extent_io_tree_empty_lru(&fs_info->block_group_cache);
1481 extent_io_tree_empty_lru(&fs_info->pinned_extents);
1482 extent_io_tree_empty_lru(&fs_info->pending_del);
1483 extent_io_tree_empty_lru(&fs_info->extent_ins);
1484 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1485
1486 truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
1487 flush_workqueue(end_io_workqueue);
1488 destroy_workqueue(end_io_workqueue);
1489
1490 flush_workqueue(async_submit_workqueue);
1491 destroy_workqueue(async_submit_workqueue);
1492
1493 iput(fs_info->btree_inode);
1494 #if 0
1495 while(!list_empty(&fs_info->hashers)) {
1496 struct btrfs_hasher *hasher;
1497 hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
1498 hashers);
1499 list_del(&hasher->hashers);
1500 crypto_free_hash(&fs_info->hash_tfm);
1501 kfree(hasher);
1502 }
1503 #endif
1504 close_all_devices(fs_info);
1505 btrfs_mapping_tree_free(&fs_info->mapping_tree);
1506
1507 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
1508 bdi_destroy(&fs_info->bdi);
1509 #endif
1510
1511 kfree(fs_info->extent_root);
1512 kfree(fs_info->tree_root);
1513 kfree(fs_info->chunk_root);
1514 kfree(fs_info->dev_root);
1515 return 0;
1516 }
1517
1518 int btrfs_buffer_uptodate(struct extent_buffer *buf)
1519 {
1520 struct inode *btree_inode = buf->first_page->mapping->host;
1521 return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
1522 }
1523
1524 int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
1525 {
1526 struct inode *btree_inode = buf->first_page->mapping->host;
1527 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
1528 buf);
1529 }
1530
1531 void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
1532 {
1533 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1534 u64 transid = btrfs_header_generation(buf);
1535 struct inode *btree_inode = root->fs_info->btree_inode;
1536
1537 if (transid != root->fs_info->generation) {
1538 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
1539 (unsigned long long)buf->start,
1540 transid, root->fs_info->generation);
1541 WARN_ON(1);
1542 }
1543 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
1544 }
1545
1546 void btrfs_throttle(struct btrfs_root *root)
1547 {
1548 struct backing_dev_info *bdi;
1549
1550 bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
1551 if (root->fs_info->throttles && bdi_write_congested(bdi)) {
1552 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
1553 congestion_wait(WRITE, HZ/20);
1554 #else
1555 blk_congestion_wait(WRITE, HZ/20);
1556 #endif
1557 }
1558 }
1559
1560 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
1561 {
1562 balance_dirty_pages_ratelimited_nr(
1563 root->fs_info->btree_inode->i_mapping, 1);
1564 }
1565
1566 void btrfs_set_buffer_defrag(struct extent_buffer *buf)
1567 {
1568 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1569 struct inode *btree_inode = root->fs_info->btree_inode;
1570 set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
1571 buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
1572 }
1573
1574 void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
1575 {
1576 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1577 struct inode *btree_inode = root->fs_info->btree_inode;
1578 set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
1579 buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
1580 GFP_NOFS);
1581 }
1582
1583 int btrfs_buffer_defrag(struct extent_buffer *buf)
1584 {
1585 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1586 struct inode *btree_inode = root->fs_info->btree_inode;
1587 return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
1588 buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
1589 }
1590
1591 int btrfs_buffer_defrag_done(struct extent_buffer *buf)
1592 {
1593 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1594 struct inode *btree_inode = root->fs_info->btree_inode;
1595 return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
1596 buf->start, buf->start + buf->len - 1,
1597 EXTENT_DEFRAG_DONE, 0);
1598 }
1599
1600 int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
1601 {
1602 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1603 struct inode *btree_inode = root->fs_info->btree_inode;
1604 return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
1605 buf->start, buf->start + buf->len - 1,
1606 EXTENT_DEFRAG_DONE, GFP_NOFS);
1607 }
1608
1609 int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
1610 {
1611 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1612 struct inode *btree_inode = root->fs_info->btree_inode;
1613 return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
1614 buf->start, buf->start + buf->len - 1,
1615 EXTENT_DEFRAG, GFP_NOFS);
1616 }
1617
1618 int btrfs_read_buffer(struct extent_buffer *buf)
1619 {
1620 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
1621 int ret;
1622 ret = btree_read_extent_buffer_pages(root, buf, 0);
1623 if (ret == 0) {
1624 buf->flags |= EXTENT_UPTODATE;
1625 }
1626 return ret;
1627 }
1628
1629 static struct extent_io_ops btree_extent_io_ops = {
1630 .writepage_io_hook = btree_writepage_io_hook,
1631 .readpage_end_io_hook = btree_readpage_end_io_hook,
1632 .submit_bio_hook = btree_submit_bio_hook,
1633 /* note we're sharing with inode.c for the merge bio hook */
1634 .merge_bio_hook = btrfs_merge_bio_hook,
1635 };
This page took 0.062511 seconds and 4 git commands to generate.