2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/module.h>
21 #include <linux/sched.h>
24 #include "transaction.h"
26 static int total_trans
= 0;
27 extern struct kmem_cache
*btrfs_trans_handle_cachep
;
28 extern struct kmem_cache
*btrfs_transaction_cachep
;
30 static struct workqueue_struct
*trans_wq
;
32 #define BTRFS_ROOT_TRANS_TAG 0
34 static void put_transaction(struct btrfs_transaction
*transaction
)
36 WARN_ON(transaction
->use_count
== 0);
37 transaction
->use_count
--;
38 if (transaction
->use_count
== 0) {
39 WARN_ON(total_trans
== 0);
41 list_del_init(&transaction
->list
);
42 memset(transaction
, 0, sizeof(*transaction
));
43 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
47 static int join_transaction(struct btrfs_root
*root
)
49 struct btrfs_transaction
*cur_trans
;
50 cur_trans
= root
->fs_info
->running_transaction
;
52 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
56 root
->fs_info
->generation
++;
57 root
->fs_info
->running_transaction
= cur_trans
;
58 cur_trans
->num_writers
= 0;
59 cur_trans
->transid
= root
->fs_info
->generation
;
60 init_waitqueue_head(&cur_trans
->writer_wait
);
61 init_waitqueue_head(&cur_trans
->commit_wait
);
62 cur_trans
->in_commit
= 0;
63 cur_trans
->use_count
= 1;
64 cur_trans
->commit_done
= 0;
65 cur_trans
->start_time
= get_seconds();
66 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
67 init_bit_radix(&cur_trans
->dirty_pages
);
69 cur_trans
->num_writers
++;
73 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
76 struct btrfs_trans_handle
*h
=
77 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
81 mutex_lock(&root
->fs_info
->trans_mutex
);
82 ret
= join_transaction(root
);
84 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
86 if (root
!= root
->fs_info
->tree_root
&& root
->last_trans
<
88 if (root
->root_item
.refs
!= 0) {
89 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
90 (unsigned long)root
->root_key
.objectid
,
91 BTRFS_ROOT_TRANS_TAG
);
92 root
->commit_root
= root
->node
;
98 root
->last_trans
= running_trans_id
;
99 h
->transid
= running_trans_id
;
100 h
->transaction
= root
->fs_info
->running_transaction
;
101 h
->blocks_reserved
= num_blocks
;
103 h
->block_group
= NULL
;
104 root
->fs_info
->running_transaction
->use_count
++;
105 mutex_unlock(&root
->fs_info
->trans_mutex
);
109 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
110 struct btrfs_root
*root
)
112 struct btrfs_transaction
*cur_trans
;
114 mutex_lock(&root
->fs_info
->trans_mutex
);
115 cur_trans
= root
->fs_info
->running_transaction
;
116 WARN_ON(cur_trans
->num_writers
< 1);
117 if (waitqueue_active(&cur_trans
->writer_wait
))
118 wake_up(&cur_trans
->writer_wait
);
119 cur_trans
->num_writers
--;
120 put_transaction(cur_trans
);
121 mutex_unlock(&root
->fs_info
->trans_mutex
);
122 memset(trans
, 0, sizeof(*trans
));
123 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
128 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
129 struct btrfs_root
*root
)
131 unsigned long gang
[16];
137 struct radix_tree_root
*dirty_pages
;
138 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
140 if (!trans
|| !trans
->transaction
) {
141 return filemap_write_and_wait(btree_inode
->i_mapping
);
143 dirty_pages
= &trans
->transaction
->dirty_pages
;
145 ret
= find_first_radix_bit(dirty_pages
, gang
,
146 0, ARRAY_SIZE(gang
));
149 for (i
= 0; i
< ret
; i
++) {
151 clear_radix_bit(dirty_pages
, gang
[i
]);
152 page
= find_lock_page(btree_inode
->i_mapping
,
156 err
= write_one_page(page
, 0);
159 page_cache_release(page
);
162 err
= filemap_fdatawait(btree_inode
->i_mapping
);
168 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
169 struct btrfs_root
*root
)
172 u64 old_extent_block
;
173 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
174 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
175 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
177 btrfs_write_dirty_block_groups(trans
, extent_root
);
179 old_extent_block
= btrfs_root_blocknr(&extent_root
->root_item
);
180 if (old_extent_block
== bh_blocknr(extent_root
->node
))
182 btrfs_set_root_blocknr(&extent_root
->root_item
,
183 bh_blocknr(extent_root
->node
));
184 ret
= btrfs_update_root(trans
, tree_root
,
185 &extent_root
->root_key
,
186 &extent_root
->root_item
);
188 btrfs_write_dirty_block_groups(trans
, extent_root
);
193 static int wait_for_commit(struct btrfs_root
*root
,
194 struct btrfs_transaction
*commit
)
197 while(!commit
->commit_done
) {
198 prepare_to_wait(&commit
->commit_wait
, &wait
,
199 TASK_UNINTERRUPTIBLE
);
200 if (commit
->commit_done
)
202 mutex_unlock(&root
->fs_info
->trans_mutex
);
204 mutex_lock(&root
->fs_info
->trans_mutex
);
206 finish_wait(&commit
->commit_wait
, &wait
);
211 struct list_head list
;
212 struct btrfs_key snap_key
;
213 struct buffer_head
*commit_root
;
214 struct btrfs_root
*root
;
218 int btrfs_add_dead_root(struct btrfs_root
*root
, struct list_head
*dead_list
)
220 struct dirty_root
*dirty
;
222 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
225 memcpy(&dirty
->snap_key
, &root
->root_key
, sizeof(root
->root_key
));
226 dirty
->commit_root
= root
->node
;
228 dirty
->free_on_drop
= 1;
229 list_add(&dirty
->list
, dead_list
);
233 static int add_dirty_roots(struct btrfs_trans_handle
*trans
,
234 struct radix_tree_root
*radix
,
235 struct list_head
*list
)
237 struct dirty_root
*dirty
;
238 struct btrfs_root
*gang
[8];
239 struct btrfs_root
*root
;
240 struct btrfs_root_item tmp_item
;
247 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
249 BTRFS_ROOT_TRANS_TAG
);
252 for (i
= 0; i
< ret
; i
++) {
254 radix_tree_tag_clear(radix
,
255 (unsigned long)root
->root_key
.objectid
,
256 BTRFS_ROOT_TRANS_TAG
);
257 if (root
->commit_root
== root
->node
) {
258 WARN_ON(bh_blocknr(root
->node
) !=
259 btrfs_root_blocknr(&root
->root_item
));
260 brelse(root
->commit_root
);
261 root
->commit_root
= NULL
;
264 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
266 memcpy(&dirty
->snap_key
, &root
->root_key
,
267 sizeof(root
->root_key
));
268 dirty
->commit_root
= root
->commit_root
;
269 root
->commit_root
= NULL
;
271 dirty
->free_on_drop
= 0;
272 memcpy(&tmp_item
, &root
->root_item
, sizeof(tmp_item
));
274 root
->root_key
.offset
= root
->fs_info
->generation
;
275 btrfs_set_root_blocknr(&root
->root_item
,
276 bh_blocknr(root
->node
));
277 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
283 refs
= btrfs_root_refs(&tmp_item
);
284 btrfs_set_root_refs(&tmp_item
, refs
- 1);
285 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
291 list_add(&dirty
->list
, list
);
299 static int drop_dirty_roots(struct btrfs_root
*tree_root
,
300 struct list_head
*list
)
302 struct dirty_root
*dirty
;
303 struct btrfs_trans_handle
*trans
;
305 while(!list_empty(list
)) {
306 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
307 dirty
= list_entry(list
->next
, struct dirty_root
, list
);
308 list_del_init(&dirty
->list
);
310 trans
= btrfs_start_transaction(tree_root
, 1);
311 ret
= btrfs_drop_snapshot(trans
, dirty
->root
,
314 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->snap_key
);
317 ret
= btrfs_end_transaction(trans
, tree_root
);
320 if (dirty
->free_on_drop
)
323 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
324 btrfs_btree_balance_dirty(tree_root
);
329 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
330 struct btrfs_root
*root
)
333 struct btrfs_transaction
*cur_trans
;
334 struct btrfs_transaction
*prev_trans
= NULL
;
335 struct list_head dirty_fs_roots
;
338 INIT_LIST_HEAD(&dirty_fs_roots
);
340 mutex_lock(&root
->fs_info
->trans_mutex
);
341 if (trans
->transaction
->in_commit
) {
342 cur_trans
= trans
->transaction
;
343 trans
->transaction
->use_count
++;
344 btrfs_end_transaction(trans
, root
);
345 ret
= wait_for_commit(root
, cur_trans
);
347 put_transaction(cur_trans
);
348 mutex_unlock(&root
->fs_info
->trans_mutex
);
351 cur_trans
= trans
->transaction
;
352 trans
->transaction
->in_commit
= 1;
353 while (trans
->transaction
->num_writers
> 1) {
354 WARN_ON(cur_trans
!= trans
->transaction
);
355 prepare_to_wait(&trans
->transaction
->writer_wait
, &wait
,
356 TASK_UNINTERRUPTIBLE
);
357 if (trans
->transaction
->num_writers
<= 1)
359 mutex_unlock(&root
->fs_info
->trans_mutex
);
361 mutex_lock(&root
->fs_info
->trans_mutex
);
362 finish_wait(&trans
->transaction
->writer_wait
, &wait
);
364 finish_wait(&trans
->transaction
->writer_wait
, &wait
);
365 WARN_ON(cur_trans
!= trans
->transaction
);
366 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
370 ret
= btrfs_commit_tree_roots(trans
, root
);
373 cur_trans
= root
->fs_info
->running_transaction
;
374 root
->fs_info
->running_transaction
= NULL
;
375 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
376 prev_trans
= list_entry(cur_trans
->list
.prev
,
377 struct btrfs_transaction
, list
);
378 if (prev_trans
->commit_done
)
381 prev_trans
->use_count
++;
383 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
385 btrfs_set_super_root(&root
->fs_info
->super_copy
,
386 bh_blocknr(root
->fs_info
->tree_root
->node
));
387 memcpy(root
->fs_info
->disk_super
, &root
->fs_info
->super_copy
,
388 sizeof(root
->fs_info
->super_copy
));
389 mutex_unlock(&root
->fs_info
->trans_mutex
);
390 mutex_unlock(&root
->fs_info
->fs_mutex
);
391 ret
= btrfs_write_and_wait_transaction(trans
, root
);
393 mutex_lock(&root
->fs_info
->trans_mutex
);
394 wait_for_commit(root
, prev_trans
);
395 put_transaction(prev_trans
);
396 mutex_unlock(&root
->fs_info
->trans_mutex
);
399 write_ctree_super(trans
, root
);
401 mutex_lock(&root
->fs_info
->fs_mutex
);
402 btrfs_finish_extent_commit(trans
, root
);
403 mutex_lock(&root
->fs_info
->trans_mutex
);
404 cur_trans
->commit_done
= 1;
405 wake_up(&cur_trans
->commit_wait
);
406 put_transaction(cur_trans
);
407 put_transaction(cur_trans
);
408 if (root
->fs_info
->closing
)
409 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
411 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
412 mutex_unlock(&root
->fs_info
->trans_mutex
);
413 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
415 if (root
->fs_info
->closing
) {
416 mutex_unlock(&root
->fs_info
->fs_mutex
);
417 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
418 mutex_lock(&root
->fs_info
->fs_mutex
);
423 void btrfs_transaction_cleaner(struct work_struct
*work
)
425 struct btrfs_fs_info
*fs_info
= container_of(work
,
426 struct btrfs_fs_info
,
429 struct btrfs_root
*root
= fs_info
->tree_root
;
430 struct btrfs_transaction
*cur
;
431 struct btrfs_trans_handle
*trans
;
432 struct list_head dirty_roots
;
434 unsigned long delay
= HZ
* 30;
437 INIT_LIST_HEAD(&dirty_roots
);
438 mutex_lock(&root
->fs_info
->fs_mutex
);
439 mutex_lock(&root
->fs_info
->trans_mutex
);
440 cur
= root
->fs_info
->running_transaction
;
442 mutex_unlock(&root
->fs_info
->trans_mutex
);
446 if (now
< cur
->start_time
|| now
- cur
->start_time
< 30) {
447 mutex_unlock(&root
->fs_info
->trans_mutex
);
451 mutex_unlock(&root
->fs_info
->trans_mutex
);
452 trans
= btrfs_start_transaction(root
, 1);
453 ret
= btrfs_commit_transaction(trans
, root
);
455 mutex_unlock(&root
->fs_info
->fs_mutex
);
457 mutex_lock(&root
->fs_info
->trans_mutex
);
458 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
459 mutex_unlock(&root
->fs_info
->trans_mutex
);
461 if (!list_empty(&dirty_roots
)) {
462 drop_dirty_roots(root
, &dirty_roots
);
464 btrfs_transaction_queue_work(root
, delay
);
467 void btrfs_transaction_queue_work(struct btrfs_root
*root
, int delay
)
469 queue_delayed_work(trans_wq
, &root
->fs_info
->trans_work
, delay
);
472 void btrfs_transaction_flush_work(struct btrfs_root
*root
)
474 cancel_rearming_delayed_workqueue(trans_wq
, &root
->fs_info
->trans_work
);
475 flush_workqueue(trans_wq
);
478 void __init
btrfs_init_transaction_sys(void)
480 trans_wq
= create_workqueue("btrfs");
483 void __exit
btrfs_exit_transaction_sys(void)
485 destroy_workqueue(trans_wq
);