2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
23 #include "transaction.h"
25 static int total_trans
= 0;
26 extern struct kmem_cache
*btrfs_trans_handle_cachep
;
27 extern struct kmem_cache
*btrfs_transaction_cachep
;
29 static struct workqueue_struct
*trans_wq
;
31 #define BTRFS_ROOT_TRANS_TAG 0
32 #define BTRFS_ROOT_DEFRAG_TAG 1
34 static void put_transaction(struct btrfs_transaction
*transaction
)
36 WARN_ON(transaction
->use_count
== 0);
37 transaction
->use_count
--;
38 if (transaction
->use_count
== 0) {
39 WARN_ON(total_trans
== 0);
41 list_del_init(&transaction
->list
);
42 memset(transaction
, 0, sizeof(*transaction
));
43 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
47 static int join_transaction(struct btrfs_root
*root
)
49 struct btrfs_transaction
*cur_trans
;
50 cur_trans
= root
->fs_info
->running_transaction
;
52 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
56 root
->fs_info
->generation
++;
57 root
->fs_info
->running_transaction
= cur_trans
;
58 cur_trans
->num_writers
= 1;
59 cur_trans
->num_joined
= 0;
60 cur_trans
->transid
= root
->fs_info
->generation
;
61 init_waitqueue_head(&cur_trans
->writer_wait
);
62 init_waitqueue_head(&cur_trans
->commit_wait
);
63 cur_trans
->in_commit
= 0;
64 cur_trans
->use_count
= 1;
65 cur_trans
->commit_done
= 0;
66 cur_trans
->start_time
= get_seconds();
67 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
68 init_bit_radix(&cur_trans
->dirty_pages
);
70 cur_trans
->num_writers
++;
71 cur_trans
->num_joined
++;
77 static int record_root_in_trans(struct btrfs_root
*root
)
79 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
80 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
81 WARN_ON(root
== root
->fs_info
->extent_root
);
82 if (root
->root_item
.refs
!= 0) {
83 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
84 (unsigned long)root
->root_key
.objectid
,
85 BTRFS_ROOT_TRANS_TAG
);
86 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
87 (unsigned long)root
->root_key
.objectid
,
88 BTRFS_ROOT_DEFRAG_TAG
);
89 root
->commit_root
= root
->node
;
94 root
->last_trans
= running_trans_id
;
99 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
102 struct btrfs_trans_handle
*h
=
103 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
106 mutex_lock(&root
->fs_info
->trans_mutex
);
107 ret
= join_transaction(root
);
110 record_root_in_trans(root
);
111 h
->transid
= root
->fs_info
->running_transaction
->transid
;
112 h
->transaction
= root
->fs_info
->running_transaction
;
113 h
->blocks_reserved
= num_blocks
;
115 h
->block_group
= NULL
;
116 h
->alloc_exclude_nr
= 0;
117 h
->alloc_exclude_start
= 0;
118 root
->fs_info
->running_transaction
->use_count
++;
119 mutex_unlock(&root
->fs_info
->trans_mutex
);
123 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
124 struct btrfs_root
*root
)
126 struct btrfs_transaction
*cur_trans
;
128 mutex_lock(&root
->fs_info
->trans_mutex
);
129 cur_trans
= root
->fs_info
->running_transaction
;
130 WARN_ON(cur_trans
!= trans
->transaction
);
131 WARN_ON(cur_trans
->num_writers
< 1);
132 cur_trans
->num_writers
--;
133 if (waitqueue_active(&cur_trans
->writer_wait
))
134 wake_up(&cur_trans
->writer_wait
);
135 put_transaction(cur_trans
);
136 mutex_unlock(&root
->fs_info
->trans_mutex
);
137 memset(trans
, 0, sizeof(*trans
));
138 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
143 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
144 struct btrfs_root
*root
)
146 unsigned long gang
[16];
152 struct radix_tree_root
*dirty_pages
;
153 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
155 if (!trans
|| !trans
->transaction
) {
156 return filemap_write_and_wait(btree_inode
->i_mapping
);
158 dirty_pages
= &trans
->transaction
->dirty_pages
;
160 ret
= find_first_radix_bit(dirty_pages
, gang
,
161 0, ARRAY_SIZE(gang
));
164 for (i
= 0; i
< ret
; i
++) {
166 clear_radix_bit(dirty_pages
, gang
[i
]);
167 page
= find_lock_page(btree_inode
->i_mapping
,
171 if (PageWriteback(page
)) {
173 wait_on_page_writeback(page
);
176 page_cache_release(page
);
180 err
= write_one_page(page
, 0);
183 page_cache_release(page
);
186 err
= filemap_fdatawait(btree_inode
->i_mapping
);
192 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
193 struct btrfs_root
*root
)
196 u64 old_extent_block
;
197 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
198 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
199 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
201 btrfs_write_dirty_block_groups(trans
, extent_root
);
203 old_extent_block
= btrfs_root_blocknr(&extent_root
->root_item
);
204 if (old_extent_block
== bh_blocknr(extent_root
->node
))
206 btrfs_set_root_blocknr(&extent_root
->root_item
,
207 bh_blocknr(extent_root
->node
));
208 ret
= btrfs_update_root(trans
, tree_root
,
209 &extent_root
->root_key
,
210 &extent_root
->root_item
);
212 btrfs_write_dirty_block_groups(trans
, extent_root
);
217 static int wait_for_commit(struct btrfs_root
*root
,
218 struct btrfs_transaction
*commit
)
221 mutex_lock(&root
->fs_info
->trans_mutex
);
222 while(!commit
->commit_done
) {
223 prepare_to_wait(&commit
->commit_wait
, &wait
,
224 TASK_UNINTERRUPTIBLE
);
225 if (commit
->commit_done
)
227 mutex_unlock(&root
->fs_info
->trans_mutex
);
229 mutex_lock(&root
->fs_info
->trans_mutex
);
231 mutex_unlock(&root
->fs_info
->trans_mutex
);
232 finish_wait(&commit
->commit_wait
, &wait
);
237 struct list_head list
;
238 struct btrfs_root
*root
;
239 struct btrfs_root
*latest_root
;
242 int btrfs_add_dead_root(struct btrfs_root
*root
,
243 struct btrfs_root
*latest
,
244 struct list_head
*dead_list
)
246 struct dirty_root
*dirty
;
248 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
252 dirty
->latest_root
= latest
;
253 list_add(&dirty
->list
, dead_list
);
257 static int add_dirty_roots(struct btrfs_trans_handle
*trans
,
258 struct radix_tree_root
*radix
,
259 struct list_head
*list
)
261 struct dirty_root
*dirty
;
262 struct btrfs_root
*gang
[8];
263 struct btrfs_root
*root
;
270 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
272 BTRFS_ROOT_TRANS_TAG
);
275 for (i
= 0; i
< ret
; i
++) {
277 radix_tree_tag_clear(radix
,
278 (unsigned long)root
->root_key
.objectid
,
279 BTRFS_ROOT_TRANS_TAG
);
280 if (root
->commit_root
== root
->node
) {
281 WARN_ON(bh_blocknr(root
->node
) !=
282 btrfs_root_blocknr(&root
->root_item
));
283 brelse(root
->commit_root
);
284 root
->commit_root
= NULL
;
286 /* make sure to update the root on disk
287 * so we get any updates to the block used
290 err
= btrfs_update_root(trans
,
291 root
->fs_info
->tree_root
,
296 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
298 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
299 BUG_ON(!dirty
->root
);
301 memset(&root
->root_item
.drop_progress
, 0,
302 sizeof(struct btrfs_disk_key
));
303 root
->root_item
.drop_level
= 0;
305 memcpy(dirty
->root
, root
, sizeof(*root
));
306 dirty
->root
->node
= root
->commit_root
;
307 dirty
->latest_root
= root
;
308 root
->commit_root
= NULL
;
310 root
->root_key
.offset
= root
->fs_info
->generation
;
311 btrfs_set_root_blocknr(&root
->root_item
,
312 bh_blocknr(root
->node
));
313 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
319 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
320 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
321 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
322 &dirty
->root
->root_key
,
323 &dirty
->root
->root_item
);
327 list_add(&dirty
->list
, list
);
338 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
340 struct btrfs_fs_info
*info
= root
->fs_info
;
342 struct btrfs_trans_handle
*trans
;
344 if (root
->defrag_running
)
347 trans
= btrfs_start_transaction(root
, 1);
349 root
->defrag_running
= 1;
350 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
351 btrfs_end_transaction(trans
, root
);
352 mutex_unlock(&info
->fs_mutex
);
354 btrfs_btree_balance_dirty(root
);
357 mutex_lock(&info
->fs_mutex
);
358 trans
= btrfs_start_transaction(root
, 1);
362 root
->defrag_running
= 0;
363 radix_tree_tag_clear(&info
->fs_roots_radix
,
364 (unsigned long)root
->root_key
.objectid
,
365 BTRFS_ROOT_DEFRAG_TAG
);
366 btrfs_end_transaction(trans
, root
);
370 int btrfs_defrag_dirty_roots(struct btrfs_fs_info
*info
)
372 struct btrfs_root
*gang
[1];
373 struct btrfs_root
*root
;
380 ret
= radix_tree_gang_lookup_tag(&info
->fs_roots_radix
,
383 BTRFS_ROOT_DEFRAG_TAG
);
386 for (i
= 0; i
< ret
; i
++) {
388 last
= root
->root_key
.objectid
+ 1;
389 btrfs_defrag_root(root
, 1);
392 btrfs_defrag_root(info
->extent_root
, 1);
396 static int drop_dirty_roots(struct btrfs_root
*tree_root
,
397 struct list_head
*list
)
399 struct dirty_root
*dirty
;
400 struct btrfs_trans_handle
*trans
;
406 while(!list_empty(list
)) {
407 struct btrfs_root
*root
;
409 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
410 dirty
= list_entry(list
->next
, struct dirty_root
, list
);
411 list_del_init(&dirty
->list
);
413 num_blocks
= btrfs_root_blocks_used(&dirty
->root
->root_item
);
414 root
= dirty
->latest_root
;
417 trans
= btrfs_start_transaction(tree_root
, 1);
418 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
419 if (ret
!= -EAGAIN
) {
423 err
= btrfs_update_root(trans
,
425 &dirty
->root
->root_key
,
426 &dirty
->root
->root_item
);
429 ret
= btrfs_end_transaction(trans
, tree_root
);
431 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
433 btrfs_btree_balance_dirty(tree_root
);
436 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
440 num_blocks
-= btrfs_root_blocks_used(&dirty
->root
->root_item
);
441 blocks_used
= btrfs_root_blocks_used(&root
->root_item
);
443 record_root_in_trans(root
);
444 btrfs_set_root_blocks_used(&root
->root_item
,
445 blocks_used
- num_blocks
);
447 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
452 ret
= btrfs_end_transaction(trans
, tree_root
);
457 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
458 btrfs_btree_balance_dirty(tree_root
);
464 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
465 struct btrfs_root
*root
)
467 unsigned long joined
= 0;
468 unsigned long timeout
= 1;
469 struct btrfs_transaction
*cur_trans
;
470 struct btrfs_transaction
*prev_trans
= NULL
;
471 struct list_head dirty_fs_roots
;
472 struct radix_tree_root pinned_copy
;
476 init_bit_radix(&pinned_copy
);
477 INIT_LIST_HEAD(&dirty_fs_roots
);
479 mutex_lock(&root
->fs_info
->trans_mutex
);
480 if (trans
->transaction
->in_commit
) {
481 cur_trans
= trans
->transaction
;
482 trans
->transaction
->use_count
++;
483 mutex_unlock(&root
->fs_info
->trans_mutex
);
484 btrfs_end_transaction(trans
, root
);
486 mutex_unlock(&root
->fs_info
->fs_mutex
);
487 ret
= wait_for_commit(root
, cur_trans
);
490 mutex_lock(&root
->fs_info
->trans_mutex
);
491 put_transaction(cur_trans
);
492 mutex_unlock(&root
->fs_info
->trans_mutex
);
494 mutex_lock(&root
->fs_info
->fs_mutex
);
497 trans
->transaction
->in_commit
= 1;
498 cur_trans
= trans
->transaction
;
499 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
500 prev_trans
= list_entry(cur_trans
->list
.prev
,
501 struct btrfs_transaction
, list
);
502 if (!prev_trans
->commit_done
) {
503 prev_trans
->use_count
++;
504 mutex_unlock(&root
->fs_info
->fs_mutex
);
505 mutex_unlock(&root
->fs_info
->trans_mutex
);
507 wait_for_commit(root
, prev_trans
);
509 mutex_lock(&root
->fs_info
->fs_mutex
);
510 mutex_lock(&root
->fs_info
->trans_mutex
);
511 put_transaction(prev_trans
);
516 joined
= cur_trans
->num_joined
;
517 WARN_ON(cur_trans
!= trans
->transaction
);
518 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
519 TASK_UNINTERRUPTIBLE
);
521 if (cur_trans
->num_writers
> 1)
522 timeout
= MAX_SCHEDULE_TIMEOUT
;
526 mutex_unlock(&root
->fs_info
->fs_mutex
);
527 mutex_unlock(&root
->fs_info
->trans_mutex
);
529 schedule_timeout(timeout
);
531 mutex_lock(&root
->fs_info
->fs_mutex
);
532 mutex_lock(&root
->fs_info
->trans_mutex
);
533 finish_wait(&cur_trans
->writer_wait
, &wait
);
534 } while (cur_trans
->num_writers
> 1 ||
535 (cur_trans
->num_joined
!= joined
));
537 WARN_ON(cur_trans
!= trans
->transaction
);
538 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
542 ret
= btrfs_commit_tree_roots(trans
, root
);
545 cur_trans
= root
->fs_info
->running_transaction
;
546 root
->fs_info
->running_transaction
= NULL
;
547 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
549 btrfs_set_super_root(&root
->fs_info
->super_copy
,
550 bh_blocknr(root
->fs_info
->tree_root
->node
));
551 memcpy(root
->fs_info
->disk_super
, &root
->fs_info
->super_copy
,
552 sizeof(root
->fs_info
->super_copy
));
554 btrfs_copy_pinned(root
, &pinned_copy
);
556 mutex_unlock(&root
->fs_info
->trans_mutex
);
557 mutex_unlock(&root
->fs_info
->fs_mutex
);
558 ret
= btrfs_write_and_wait_transaction(trans
, root
);
560 write_ctree_super(trans
, root
);
561 mutex_lock(&root
->fs_info
->fs_mutex
);
562 btrfs_finish_extent_commit(trans
, root
, &pinned_copy
);
563 mutex_lock(&root
->fs_info
->trans_mutex
);
564 cur_trans
->commit_done
= 1;
565 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
566 wake_up(&cur_trans
->commit_wait
);
567 put_transaction(cur_trans
);
568 put_transaction(cur_trans
);
570 if (root
->fs_info
->closing
)
571 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
573 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
575 mutex_unlock(&root
->fs_info
->trans_mutex
);
576 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
578 if (root
->fs_info
->closing
) {
579 mutex_unlock(&root
->fs_info
->fs_mutex
);
580 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
581 mutex_lock(&root
->fs_info
->fs_mutex
);
586 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
588 struct list_head dirty_roots
;
589 INIT_LIST_HEAD(&dirty_roots
);
591 mutex_lock(&root
->fs_info
->trans_mutex
);
592 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
593 mutex_unlock(&root
->fs_info
->trans_mutex
);
595 if (!list_empty(&dirty_roots
)) {
596 drop_dirty_roots(root
, &dirty_roots
);
600 void btrfs_transaction_cleaner(struct work_struct
*work
)
602 struct btrfs_fs_info
*fs_info
= container_of(work
,
603 struct btrfs_fs_info
,
606 struct btrfs_root
*root
= fs_info
->tree_root
;
607 struct btrfs_transaction
*cur
;
608 struct btrfs_trans_handle
*trans
;
610 unsigned long delay
= HZ
* 30;
613 mutex_lock(&root
->fs_info
->fs_mutex
);
614 mutex_lock(&root
->fs_info
->trans_mutex
);
615 cur
= root
->fs_info
->running_transaction
;
617 mutex_unlock(&root
->fs_info
->trans_mutex
);
621 if (now
< cur
->start_time
|| now
- cur
->start_time
< 30) {
622 mutex_unlock(&root
->fs_info
->trans_mutex
);
626 mutex_unlock(&root
->fs_info
->trans_mutex
);
627 btrfs_defrag_dirty_roots(root
->fs_info
);
628 trans
= btrfs_start_transaction(root
, 1);
629 ret
= btrfs_commit_transaction(trans
, root
);
631 mutex_unlock(&root
->fs_info
->fs_mutex
);
632 btrfs_clean_old_snapshots(root
);
633 btrfs_transaction_queue_work(root
, delay
);
636 void btrfs_transaction_queue_work(struct btrfs_root
*root
, int delay
)
638 queue_delayed_work(trans_wq
, &root
->fs_info
->trans_work
, delay
);
641 void btrfs_transaction_flush_work(struct btrfs_root
*root
)
643 cancel_rearming_delayed_workqueue(trans_wq
, &root
->fs_info
->trans_work
);
644 flush_workqueue(trans_wq
);
647 void __init
btrfs_init_transaction_sys(void)
649 trans_wq
= create_workqueue("btrfs");
652 void __exit
btrfs_exit_transaction_sys(void)
654 destroy_workqueue(trans_wq
);