2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
25 #include "transaction.h"
27 static int total_trans
= 0;
28 extern struct kmem_cache
*btrfs_trans_handle_cachep
;
29 extern struct kmem_cache
*btrfs_transaction_cachep
;
31 static struct workqueue_struct
*trans_wq
;
33 #define BTRFS_ROOT_TRANS_TAG 0
34 #define BTRFS_ROOT_DEFRAG_TAG 1
36 static void put_transaction(struct btrfs_transaction
*transaction
)
38 WARN_ON(transaction
->use_count
== 0);
39 transaction
->use_count
--;
40 if (transaction
->use_count
== 0) {
41 WARN_ON(total_trans
== 0);
43 list_del_init(&transaction
->list
);
44 memset(transaction
, 0, sizeof(*transaction
));
45 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
49 static int join_transaction(struct btrfs_root
*root
)
51 struct btrfs_transaction
*cur_trans
;
52 cur_trans
= root
->fs_info
->running_transaction
;
54 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
58 root
->fs_info
->generation
++;
59 root
->fs_info
->running_transaction
= cur_trans
;
60 cur_trans
->num_writers
= 1;
61 cur_trans
->num_joined
= 0;
62 cur_trans
->transid
= root
->fs_info
->generation
;
63 init_waitqueue_head(&cur_trans
->writer_wait
);
64 init_waitqueue_head(&cur_trans
->commit_wait
);
65 cur_trans
->in_commit
= 0;
66 cur_trans
->use_count
= 1;
67 cur_trans
->commit_done
= 0;
68 cur_trans
->start_time
= get_seconds();
69 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
70 extent_map_tree_init(&cur_trans
->dirty_pages
,
71 root
->fs_info
->btree_inode
->i_mapping
,
74 cur_trans
->num_writers
++;
75 cur_trans
->num_joined
++;
81 static int record_root_in_trans(struct btrfs_root
*root
)
83 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
84 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
85 WARN_ON(root
== root
->fs_info
->extent_root
);
86 if (root
->root_item
.refs
!= 0) {
87 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
88 (unsigned long)root
->root_key
.objectid
,
89 BTRFS_ROOT_TRANS_TAG
);
90 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
91 (unsigned long)root
->root_key
.objectid
,
92 BTRFS_ROOT_DEFRAG_TAG
);
93 root
->commit_root
= root
->node
;
94 extent_buffer_get(root
->node
);
98 root
->last_trans
= running_trans_id
;
103 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
106 struct btrfs_trans_handle
*h
=
107 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
110 mutex_lock(&root
->fs_info
->trans_mutex
);
111 ret
= join_transaction(root
);
114 record_root_in_trans(root
);
115 h
->transid
= root
->fs_info
->running_transaction
->transid
;
116 h
->transaction
= root
->fs_info
->running_transaction
;
117 h
->blocks_reserved
= num_blocks
;
119 h
->block_group
= NULL
;
120 h
->alloc_exclude_nr
= 0;
121 h
->alloc_exclude_start
= 0;
122 root
->fs_info
->running_transaction
->use_count
++;
123 mutex_unlock(&root
->fs_info
->trans_mutex
);
127 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
128 struct btrfs_root
*root
)
130 struct btrfs_transaction
*cur_trans
;
132 mutex_lock(&root
->fs_info
->trans_mutex
);
133 cur_trans
= root
->fs_info
->running_transaction
;
134 WARN_ON(cur_trans
!= trans
->transaction
);
135 WARN_ON(cur_trans
->num_writers
< 1);
136 cur_trans
->num_writers
--;
137 if (waitqueue_active(&cur_trans
->writer_wait
))
138 wake_up(&cur_trans
->writer_wait
);
139 put_transaction(cur_trans
);
140 mutex_unlock(&root
->fs_info
->trans_mutex
);
141 memset(trans
, 0, sizeof(*trans
));
142 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
147 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
148 struct btrfs_root
*root
)
153 struct extent_map_tree
*dirty_pages
;
155 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
160 if (!trans
|| !trans
->transaction
) {
161 return filemap_write_and_wait(btree_inode
->i_mapping
);
163 dirty_pages
= &trans
->transaction
->dirty_pages
;
165 ret
= find_first_extent_bit(dirty_pages
, 0, &start
, &end
,
169 clear_extent_dirty(dirty_pages
, start
, end
, GFP_NOFS
);
170 while(start
<= end
) {
171 index
= start
>> PAGE_CACHE_SHIFT
;
172 start
= (index
+ 1) << PAGE_CACHE_SHIFT
;
173 page
= find_lock_page(btree_inode
->i_mapping
, index
);
176 if (PageWriteback(page
)) {
178 wait_on_page_writeback(page
);
181 page_cache_release(page
);
185 err
= write_one_page(page
, 0);
188 page_cache_release(page
);
191 err
= filemap_fdatawait(btree_inode
->i_mapping
);
197 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
198 struct btrfs_root
*root
)
201 u64 old_extent_block
;
202 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
203 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
204 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
206 btrfs_write_dirty_block_groups(trans
, extent_root
);
208 old_extent_block
= btrfs_root_bytenr(&extent_root
->root_item
);
209 if (old_extent_block
== extent_root
->node
->start
)
211 btrfs_set_root_bytenr(&extent_root
->root_item
,
212 extent_root
->node
->start
);
213 btrfs_set_root_level(&extent_root
->root_item
,
214 btrfs_header_level(extent_root
->node
));
215 ret
= btrfs_update_root(trans
, tree_root
,
216 &extent_root
->root_key
,
217 &extent_root
->root_item
);
219 btrfs_write_dirty_block_groups(trans
, extent_root
);
224 static int wait_for_commit(struct btrfs_root
*root
,
225 struct btrfs_transaction
*commit
)
228 mutex_lock(&root
->fs_info
->trans_mutex
);
229 while(!commit
->commit_done
) {
230 prepare_to_wait(&commit
->commit_wait
, &wait
,
231 TASK_UNINTERRUPTIBLE
);
232 if (commit
->commit_done
)
234 mutex_unlock(&root
->fs_info
->trans_mutex
);
236 mutex_lock(&root
->fs_info
->trans_mutex
);
238 mutex_unlock(&root
->fs_info
->trans_mutex
);
239 finish_wait(&commit
->commit_wait
, &wait
);
244 struct list_head list
;
245 struct btrfs_root
*root
;
246 struct btrfs_root
*latest_root
;
249 int btrfs_add_dead_root(struct btrfs_root
*root
,
250 struct btrfs_root
*latest
,
251 struct list_head
*dead_list
)
253 struct dirty_root
*dirty
;
255 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
259 dirty
->latest_root
= latest
;
260 list_add(&dirty
->list
, dead_list
);
264 static int add_dirty_roots(struct btrfs_trans_handle
*trans
,
265 struct radix_tree_root
*radix
,
266 struct list_head
*list
)
268 struct dirty_root
*dirty
;
269 struct btrfs_root
*gang
[8];
270 struct btrfs_root
*root
;
277 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
279 BTRFS_ROOT_TRANS_TAG
);
282 for (i
= 0; i
< ret
; i
++) {
284 radix_tree_tag_clear(radix
,
285 (unsigned long)root
->root_key
.objectid
,
286 BTRFS_ROOT_TRANS_TAG
);
287 if (root
->commit_root
== root
->node
) {
288 WARN_ON(root
->node
->start
!=
289 btrfs_root_bytenr(&root
->root_item
));
290 free_extent_buffer(root
->commit_root
);
291 root
->commit_root
= NULL
;
293 /* make sure to update the root on disk
294 * so we get any updates to the block used
297 err
= btrfs_update_root(trans
,
298 root
->fs_info
->tree_root
,
303 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
305 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
306 BUG_ON(!dirty
->root
);
308 memset(&root
->root_item
.drop_progress
, 0,
309 sizeof(struct btrfs_disk_key
));
310 root
->root_item
.drop_level
= 0;
312 memcpy(dirty
->root
, root
, sizeof(*root
));
313 dirty
->root
->node
= root
->commit_root
;
314 dirty
->latest_root
= root
;
315 root
->commit_root
= NULL
;
317 root
->root_key
.offset
= root
->fs_info
->generation
;
318 btrfs_set_root_bytenr(&root
->root_item
,
320 btrfs_set_root_level(&root
->root_item
,
321 btrfs_header_level(root
->node
));
322 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
328 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
329 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
330 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
331 &dirty
->root
->root_key
,
332 &dirty
->root
->root_item
);
336 list_add(&dirty
->list
, list
);
347 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
349 struct btrfs_fs_info
*info
= root
->fs_info
;
351 struct btrfs_trans_handle
*trans
;
354 if (root
->defrag_running
)
357 trans
= btrfs_start_transaction(root
, 1);
359 root
->defrag_running
= 1;
360 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
361 nr
= trans
->blocks_used
;
362 btrfs_end_transaction(trans
, root
);
363 mutex_unlock(&info
->fs_mutex
);
365 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
368 mutex_lock(&info
->fs_mutex
);
369 trans
= btrfs_start_transaction(root
, 1);
373 root
->defrag_running
= 0;
374 radix_tree_tag_clear(&info
->fs_roots_radix
,
375 (unsigned long)root
->root_key
.objectid
,
376 BTRFS_ROOT_DEFRAG_TAG
);
377 btrfs_end_transaction(trans
, root
);
381 int btrfs_defrag_dirty_roots(struct btrfs_fs_info
*info
)
383 struct btrfs_root
*gang
[1];
384 struct btrfs_root
*root
;
391 ret
= radix_tree_gang_lookup_tag(&info
->fs_roots_radix
,
394 BTRFS_ROOT_DEFRAG_TAG
);
397 for (i
= 0; i
< ret
; i
++) {
399 last
= root
->root_key
.objectid
+ 1;
400 btrfs_defrag_root(root
, 1);
403 // btrfs_defrag_root(info->extent_root, 1);
407 static int drop_dirty_roots(struct btrfs_root
*tree_root
,
408 struct list_head
*list
)
410 struct dirty_root
*dirty
;
411 struct btrfs_trans_handle
*trans
;
418 while(!list_empty(list
)) {
419 struct btrfs_root
*root
;
421 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
422 dirty
= list_entry(list
->next
, struct dirty_root
, list
);
423 list_del_init(&dirty
->list
);
425 num_bytes
= btrfs_root_used(&dirty
->root
->root_item
);
426 root
= dirty
->latest_root
;
429 trans
= btrfs_start_transaction(tree_root
, 1);
430 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
431 if (ret
!= -EAGAIN
) {
435 err
= btrfs_update_root(trans
,
437 &dirty
->root
->root_key
,
438 &dirty
->root
->root_item
);
441 nr
= trans
->blocks_used
;
442 ret
= btrfs_end_transaction(trans
, tree_root
);
444 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
445 btrfs_btree_balance_dirty(tree_root
, nr
);
448 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
452 num_bytes
-= btrfs_root_used(&dirty
->root
->root_item
);
453 bytes_used
= btrfs_root_used(&root
->root_item
);
455 record_root_in_trans(root
);
456 btrfs_set_root_used(&root
->root_item
,
457 bytes_used
- num_bytes
);
459 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
464 nr
= trans
->blocks_used
;
465 ret
= btrfs_end_transaction(trans
, tree_root
);
468 free_extent_buffer(dirty
->root
->node
);
471 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
473 btrfs_btree_balance_dirty(tree_root
, nr
);
479 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
480 struct btrfs_root
*root
)
482 unsigned long joined
= 0;
483 unsigned long timeout
= 1;
484 struct btrfs_transaction
*cur_trans
;
485 struct btrfs_transaction
*prev_trans
= NULL
;
486 struct list_head dirty_fs_roots
;
487 struct extent_map_tree pinned_copy
;
491 extent_map_tree_init(&pinned_copy
,
492 root
->fs_info
->btree_inode
->i_mapping
, GFP_NOFS
);
493 INIT_LIST_HEAD(&dirty_fs_roots
);
495 mutex_lock(&root
->fs_info
->trans_mutex
);
496 if (trans
->transaction
->in_commit
) {
497 cur_trans
= trans
->transaction
;
498 trans
->transaction
->use_count
++;
499 mutex_unlock(&root
->fs_info
->trans_mutex
);
500 btrfs_end_transaction(trans
, root
);
502 mutex_unlock(&root
->fs_info
->fs_mutex
);
503 ret
= wait_for_commit(root
, cur_trans
);
506 mutex_lock(&root
->fs_info
->trans_mutex
);
507 put_transaction(cur_trans
);
508 mutex_unlock(&root
->fs_info
->trans_mutex
);
510 mutex_lock(&root
->fs_info
->fs_mutex
);
513 trans
->transaction
->in_commit
= 1;
514 cur_trans
= trans
->transaction
;
515 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
516 prev_trans
= list_entry(cur_trans
->list
.prev
,
517 struct btrfs_transaction
, list
);
518 if (!prev_trans
->commit_done
) {
519 prev_trans
->use_count
++;
520 mutex_unlock(&root
->fs_info
->fs_mutex
);
521 mutex_unlock(&root
->fs_info
->trans_mutex
);
523 wait_for_commit(root
, prev_trans
);
525 mutex_lock(&root
->fs_info
->fs_mutex
);
526 mutex_lock(&root
->fs_info
->trans_mutex
);
527 put_transaction(prev_trans
);
532 joined
= cur_trans
->num_joined
;
533 WARN_ON(cur_trans
!= trans
->transaction
);
534 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
535 TASK_UNINTERRUPTIBLE
);
537 if (cur_trans
->num_writers
> 1)
538 timeout
= MAX_SCHEDULE_TIMEOUT
;
542 mutex_unlock(&root
->fs_info
->fs_mutex
);
543 mutex_unlock(&root
->fs_info
->trans_mutex
);
545 schedule_timeout(timeout
);
547 mutex_lock(&root
->fs_info
->fs_mutex
);
548 mutex_lock(&root
->fs_info
->trans_mutex
);
549 finish_wait(&cur_trans
->writer_wait
, &wait
);
550 } while (cur_trans
->num_writers
> 1 ||
551 (cur_trans
->num_joined
!= joined
));
553 WARN_ON(cur_trans
!= trans
->transaction
);
554 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
558 ret
= btrfs_commit_tree_roots(trans
, root
);
561 cur_trans
= root
->fs_info
->running_transaction
;
562 root
->fs_info
->running_transaction
= NULL
;
563 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
565 btrfs_set_super_root(&root
->fs_info
->super_copy
,
566 root
->fs_info
->tree_root
->node
->start
);
567 btrfs_set_super_root_level(&root
->fs_info
->super_copy
,
568 btrfs_header_level(root
->fs_info
->tree_root
->node
));
570 write_extent_buffer(root
->fs_info
->sb_buffer
,
571 &root
->fs_info
->super_copy
, 0,
572 sizeof(root
->fs_info
->super_copy
));
574 btrfs_copy_pinned(root
, &pinned_copy
);
576 mutex_unlock(&root
->fs_info
->trans_mutex
);
577 mutex_unlock(&root
->fs_info
->fs_mutex
);
578 ret
= btrfs_write_and_wait_transaction(trans
, root
);
580 write_ctree_super(trans
, root
);
581 mutex_lock(&root
->fs_info
->fs_mutex
);
582 btrfs_finish_extent_commit(trans
, root
, &pinned_copy
);
583 mutex_lock(&root
->fs_info
->trans_mutex
);
584 cur_trans
->commit_done
= 1;
585 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
586 wake_up(&cur_trans
->commit_wait
);
587 put_transaction(cur_trans
);
588 put_transaction(cur_trans
);
590 if (root
->fs_info
->closing
)
591 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
593 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
595 mutex_unlock(&root
->fs_info
->trans_mutex
);
596 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
598 if (root
->fs_info
->closing
) {
599 mutex_unlock(&root
->fs_info
->fs_mutex
);
600 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
601 mutex_lock(&root
->fs_info
->fs_mutex
);
606 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
608 struct list_head dirty_roots
;
609 INIT_LIST_HEAD(&dirty_roots
);
611 mutex_lock(&root
->fs_info
->trans_mutex
);
612 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
613 mutex_unlock(&root
->fs_info
->trans_mutex
);
615 if (!list_empty(&dirty_roots
)) {
616 drop_dirty_roots(root
, &dirty_roots
);
620 void btrfs_transaction_cleaner(struct work_struct
*work
)
622 struct btrfs_fs_info
*fs_info
= container_of(work
,
623 struct btrfs_fs_info
,
626 struct btrfs_root
*root
= fs_info
->tree_root
;
627 struct btrfs_transaction
*cur
;
628 struct btrfs_trans_handle
*trans
;
630 unsigned long delay
= HZ
* 30;
633 mutex_lock(&root
->fs_info
->fs_mutex
);
634 mutex_lock(&root
->fs_info
->trans_mutex
);
635 cur
= root
->fs_info
->running_transaction
;
637 mutex_unlock(&root
->fs_info
->trans_mutex
);
641 if (now
< cur
->start_time
|| now
- cur
->start_time
< 30) {
642 mutex_unlock(&root
->fs_info
->trans_mutex
);
646 mutex_unlock(&root
->fs_info
->trans_mutex
);
647 btrfs_defrag_dirty_roots(root
->fs_info
);
648 trans
= btrfs_start_transaction(root
, 1);
649 ret
= btrfs_commit_transaction(trans
, root
);
651 mutex_unlock(&root
->fs_info
->fs_mutex
);
652 btrfs_clean_old_snapshots(root
);
653 btrfs_transaction_queue_work(root
, delay
);
656 void btrfs_transaction_queue_work(struct btrfs_root
*root
, int delay
)
658 queue_delayed_work(trans_wq
, &root
->fs_info
->trans_work
, delay
);
661 void btrfs_transaction_flush_work(struct btrfs_root
*root
)
663 cancel_rearming_delayed_workqueue(trans_wq
, &root
->fs_info
->trans_work
);
664 flush_workqueue(trans_wq
);
667 void __init
btrfs_init_transaction_sys(void)
669 trans_wq
= create_workqueue("btrfs");
672 void __exit
btrfs_exit_transaction_sys(void)
674 destroy_workqueue(trans_wq
);