2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
23 #include <linux/blkdev.h>
26 #include "transaction.h"
28 #include "ref-cache.h"
31 extern struct kmem_cache
*btrfs_trans_handle_cachep
;
32 extern struct kmem_cache
*btrfs_transaction_cachep
;
34 #define BTRFS_ROOT_TRANS_TAG 0
36 static noinline
void put_transaction(struct btrfs_transaction
*transaction
)
38 WARN_ON(transaction
->use_count
== 0);
39 transaction
->use_count
--;
40 if (transaction
->use_count
== 0) {
41 list_del_init(&transaction
->list
);
42 memset(transaction
, 0, sizeof(*transaction
));
43 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
48 * either allocate a new transaction or hop into the existing one
50 static noinline
int join_transaction(struct btrfs_root
*root
)
52 struct btrfs_transaction
*cur_trans
;
53 cur_trans
= root
->fs_info
->running_transaction
;
55 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
58 root
->fs_info
->generation
++;
59 root
->fs_info
->last_alloc
= 0;
60 root
->fs_info
->last_data_alloc
= 0;
61 cur_trans
->num_writers
= 1;
62 cur_trans
->num_joined
= 0;
63 cur_trans
->transid
= root
->fs_info
->generation
;
64 init_waitqueue_head(&cur_trans
->writer_wait
);
65 init_waitqueue_head(&cur_trans
->commit_wait
);
66 cur_trans
->in_commit
= 0;
67 cur_trans
->blocked
= 0;
68 cur_trans
->use_count
= 1;
69 cur_trans
->commit_done
= 0;
70 cur_trans
->start_time
= get_seconds();
71 INIT_LIST_HEAD(&cur_trans
->pending_snapshots
);
72 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
73 extent_io_tree_init(&cur_trans
->dirty_pages
,
74 root
->fs_info
->btree_inode
->i_mapping
,
76 spin_lock(&root
->fs_info
->new_trans_lock
);
77 root
->fs_info
->running_transaction
= cur_trans
;
78 spin_unlock(&root
->fs_info
->new_trans_lock
);
80 cur_trans
->num_writers
++;
81 cur_trans
->num_joined
++;
88 * this does all the record keeping required to make sure that a
89 * reference counted root is properly recorded in a given transaction.
90 * This is required to make sure the old root from before we joined the transaction
91 * is deleted when the transaction commits
93 noinline
int btrfs_record_root_in_trans(struct btrfs_root
*root
)
95 struct btrfs_dirty_root
*dirty
;
96 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
97 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
98 WARN_ON(root
== root
->fs_info
->extent_root
);
99 if (root
->root_item
.refs
!= 0) {
100 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
101 (unsigned long)root
->root_key
.objectid
,
102 BTRFS_ROOT_TRANS_TAG
);
104 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
106 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
107 BUG_ON(!dirty
->root
);
108 dirty
->latest_root
= root
;
109 INIT_LIST_HEAD(&dirty
->list
);
111 root
->commit_root
= btrfs_root_node(root
);
113 memcpy(dirty
->root
, root
, sizeof(*root
));
114 spin_lock_init(&dirty
->root
->node_lock
);
115 spin_lock_init(&dirty
->root
->list_lock
);
116 mutex_init(&dirty
->root
->objectid_mutex
);
117 mutex_init(&dirty
->root
->log_mutex
);
118 INIT_LIST_HEAD(&dirty
->root
->dead_list
);
119 dirty
->root
->node
= root
->commit_root
;
120 dirty
->root
->commit_root
= NULL
;
122 spin_lock(&root
->list_lock
);
123 list_add(&dirty
->root
->dead_list
, &root
->dead_list
);
124 spin_unlock(&root
->list_lock
);
126 root
->dirty_root
= dirty
;
130 root
->last_trans
= running_trans_id
;
135 /* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk.
139 static void wait_current_trans(struct btrfs_root
*root
)
141 struct btrfs_transaction
*cur_trans
;
143 cur_trans
= root
->fs_info
->running_transaction
;
144 if (cur_trans
&& cur_trans
->blocked
) {
146 cur_trans
->use_count
++;
148 prepare_to_wait(&root
->fs_info
->transaction_wait
, &wait
,
149 TASK_UNINTERRUPTIBLE
);
150 if (cur_trans
->blocked
) {
151 mutex_unlock(&root
->fs_info
->trans_mutex
);
153 mutex_lock(&root
->fs_info
->trans_mutex
);
154 finish_wait(&root
->fs_info
->transaction_wait
,
157 finish_wait(&root
->fs_info
->transaction_wait
,
162 put_transaction(cur_trans
);
166 static struct btrfs_trans_handle
*start_transaction(struct btrfs_root
*root
,
167 int num_blocks
, int wait
)
169 struct btrfs_trans_handle
*h
=
170 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
173 mutex_lock(&root
->fs_info
->trans_mutex
);
174 if (!root
->fs_info
->log_root_recovering
&&
175 ((wait
== 1 && !root
->fs_info
->open_ioctl_trans
) || wait
== 2))
176 wait_current_trans(root
);
177 ret
= join_transaction(root
);
180 btrfs_record_root_in_trans(root
);
181 h
->transid
= root
->fs_info
->running_transaction
->transid
;
182 h
->transaction
= root
->fs_info
->running_transaction
;
183 h
->blocks_reserved
= num_blocks
;
185 h
->block_group
= NULL
;
186 h
->alloc_exclude_nr
= 0;
187 h
->alloc_exclude_start
= 0;
188 root
->fs_info
->running_transaction
->use_count
++;
189 mutex_unlock(&root
->fs_info
->trans_mutex
);
193 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
196 return start_transaction(root
, num_blocks
, 1);
198 struct btrfs_trans_handle
*btrfs_join_transaction(struct btrfs_root
*root
,
201 return start_transaction(root
, num_blocks
, 0);
204 struct btrfs_trans_handle
*btrfs_start_ioctl_transaction(struct btrfs_root
*r
,
207 return start_transaction(r
, num_blocks
, 2);
210 /* wait for a transaction commit to be fully complete */
211 static noinline
int wait_for_commit(struct btrfs_root
*root
,
212 struct btrfs_transaction
*commit
)
215 mutex_lock(&root
->fs_info
->trans_mutex
);
216 while(!commit
->commit_done
) {
217 prepare_to_wait(&commit
->commit_wait
, &wait
,
218 TASK_UNINTERRUPTIBLE
);
219 if (commit
->commit_done
)
221 mutex_unlock(&root
->fs_info
->trans_mutex
);
223 mutex_lock(&root
->fs_info
->trans_mutex
);
225 mutex_unlock(&root
->fs_info
->trans_mutex
);
226 finish_wait(&commit
->commit_wait
, &wait
);
231 * rate limit against the drop_snapshot code. This helps to slow down new operations
232 * if the drop_snapshot code isn't able to keep up.
234 static void throttle_on_drops(struct btrfs_root
*root
)
236 struct btrfs_fs_info
*info
= root
->fs_info
;
237 int harder_count
= 0;
240 if (atomic_read(&info
->throttles
)) {
243 thr
= atomic_read(&info
->throttle_gen
);
246 prepare_to_wait(&info
->transaction_throttle
,
247 &wait
, TASK_UNINTERRUPTIBLE
);
248 if (!atomic_read(&info
->throttles
)) {
249 finish_wait(&info
->transaction_throttle
, &wait
);
253 finish_wait(&info
->transaction_throttle
, &wait
);
254 } while (thr
== atomic_read(&info
->throttle_gen
));
257 if (root
->fs_info
->total_ref_cache_size
> 1 * 1024 * 1024 &&
261 if (root
->fs_info
->total_ref_cache_size
> 5 * 1024 * 1024 &&
265 if (root
->fs_info
->total_ref_cache_size
> 10 * 1024 * 1024 &&
271 void btrfs_throttle(struct btrfs_root
*root
)
273 mutex_lock(&root
->fs_info
->trans_mutex
);
274 if (!root
->fs_info
->open_ioctl_trans
)
275 wait_current_trans(root
);
276 mutex_unlock(&root
->fs_info
->trans_mutex
);
278 throttle_on_drops(root
);
281 static int __btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
282 struct btrfs_root
*root
, int throttle
)
284 struct btrfs_transaction
*cur_trans
;
285 struct btrfs_fs_info
*info
= root
->fs_info
;
287 mutex_lock(&info
->trans_mutex
);
288 cur_trans
= info
->running_transaction
;
289 WARN_ON(cur_trans
!= trans
->transaction
);
290 WARN_ON(cur_trans
->num_writers
< 1);
291 cur_trans
->num_writers
--;
293 if (waitqueue_active(&cur_trans
->writer_wait
))
294 wake_up(&cur_trans
->writer_wait
);
295 put_transaction(cur_trans
);
296 mutex_unlock(&info
->trans_mutex
);
297 memset(trans
, 0, sizeof(*trans
));
298 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
301 throttle_on_drops(root
);
306 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
307 struct btrfs_root
*root
)
309 return __btrfs_end_transaction(trans
, root
, 0);
312 int btrfs_end_transaction_throttle(struct btrfs_trans_handle
*trans
,
313 struct btrfs_root
*root
)
315 return __btrfs_end_transaction(trans
, root
, 1);
319 * when btree blocks are allocated, they have some corresponding bits set for
320 * them in one of two extent_io trees. This is used to make sure all of
321 * those extents are on disk for transaction or log commit
323 int btrfs_write_and_wait_marked_extents(struct btrfs_root
*root
,
324 struct extent_io_tree
*dirty_pages
)
330 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
336 ret
= find_first_extent_bit(dirty_pages
, start
, &start
, &end
,
340 while(start
<= end
) {
343 index
= start
>> PAGE_CACHE_SHIFT
;
344 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
345 page
= find_get_page(btree_inode
->i_mapping
, index
);
349 btree_lock_page_hook(page
);
350 if (!page
->mapping
) {
352 page_cache_release(page
);
356 if (PageWriteback(page
)) {
358 wait_on_page_writeback(page
);
361 page_cache_release(page
);
365 err
= write_one_page(page
, 0);
368 page_cache_release(page
);
372 ret
= find_first_extent_bit(dirty_pages
, 0, &start
, &end
,
377 clear_extent_dirty(dirty_pages
, start
, end
, GFP_NOFS
);
378 while(start
<= end
) {
379 index
= start
>> PAGE_CACHE_SHIFT
;
380 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
381 page
= find_get_page(btree_inode
->i_mapping
, index
);
384 if (PageDirty(page
)) {
385 btree_lock_page_hook(page
);
386 wait_on_page_writeback(page
);
387 err
= write_one_page(page
, 0);
391 wait_on_page_writeback(page
);
392 page_cache_release(page
);
401 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
402 struct btrfs_root
*root
)
404 if (!trans
|| !trans
->transaction
) {
405 struct inode
*btree_inode
;
406 btree_inode
= root
->fs_info
->btree_inode
;
407 return filemap_write_and_wait(btree_inode
->i_mapping
);
409 return btrfs_write_and_wait_marked_extents(root
,
410 &trans
->transaction
->dirty_pages
);
414 * this is used to update the root pointer in the tree of tree roots.
416 * But, in the case of the extent allocation tree, updating the root
417 * pointer may allocate blocks which may change the root of the extent
420 * So, this loops and repeats and makes sure the cowonly root didn't
421 * change while the root pointer was being updated in the metadata.
423 static int update_cowonly_root(struct btrfs_trans_handle
*trans
,
424 struct btrfs_root
*root
)
428 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
430 btrfs_extent_post_op(trans
, root
);
431 btrfs_write_dirty_block_groups(trans
, root
);
432 btrfs_extent_post_op(trans
, root
);
435 old_root_bytenr
= btrfs_root_bytenr(&root
->root_item
);
436 if (old_root_bytenr
== root
->node
->start
)
438 btrfs_set_root_bytenr(&root
->root_item
,
440 btrfs_set_root_level(&root
->root_item
,
441 btrfs_header_level(root
->node
));
442 btrfs_set_root_generation(&root
->root_item
, trans
->transid
);
444 btrfs_extent_post_op(trans
, root
);
446 ret
= btrfs_update_root(trans
, tree_root
,
450 btrfs_write_dirty_block_groups(trans
, root
);
451 btrfs_extent_post_op(trans
, root
);
457 * update all the cowonly tree roots on disk
459 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
460 struct btrfs_root
*root
)
462 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
463 struct list_head
*next
;
464 struct extent_buffer
*eb
;
466 btrfs_extent_post_op(trans
, fs_info
->tree_root
);
468 eb
= btrfs_lock_root_node(fs_info
->tree_root
);
469 btrfs_cow_block(trans
, fs_info
->tree_root
, eb
, NULL
, 0, &eb
, 0);
470 btrfs_tree_unlock(eb
);
471 free_extent_buffer(eb
);
473 btrfs_extent_post_op(trans
, fs_info
->tree_root
);
475 while(!list_empty(&fs_info
->dirty_cowonly_roots
)) {
476 next
= fs_info
->dirty_cowonly_roots
.next
;
478 root
= list_entry(next
, struct btrfs_root
, dirty_list
);
480 update_cowonly_root(trans
, root
);
486 * dead roots are old snapshots that need to be deleted. This allocates
487 * a dirty root struct and adds it into the list of dead roots that need to
490 int btrfs_add_dead_root(struct btrfs_root
*root
, struct btrfs_root
*latest
)
492 struct btrfs_dirty_root
*dirty
;
494 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
498 dirty
->latest_root
= latest
;
500 mutex_lock(&root
->fs_info
->trans_mutex
);
501 list_add(&dirty
->list
, &latest
->fs_info
->dead_roots
);
502 mutex_unlock(&root
->fs_info
->trans_mutex
);
507 * at transaction commit time we need to schedule the old roots for
508 * deletion via btrfs_drop_snapshot. This runs through all the
509 * reference counted roots that were modified in the current
510 * transaction and puts them into the drop list
512 static noinline
int add_dirty_roots(struct btrfs_trans_handle
*trans
,
513 struct radix_tree_root
*radix
,
514 struct list_head
*list
)
516 struct btrfs_dirty_root
*dirty
;
517 struct btrfs_root
*gang
[8];
518 struct btrfs_root
*root
;
525 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
527 BTRFS_ROOT_TRANS_TAG
);
530 for (i
= 0; i
< ret
; i
++) {
532 radix_tree_tag_clear(radix
,
533 (unsigned long)root
->root_key
.objectid
,
534 BTRFS_ROOT_TRANS_TAG
);
536 BUG_ON(!root
->ref_tree
);
537 dirty
= root
->dirty_root
;
539 btrfs_free_log(trans
, root
);
540 btrfs_free_reloc_root(trans
, root
);
542 if (root
->commit_root
== root
->node
) {
543 WARN_ON(root
->node
->start
!=
544 btrfs_root_bytenr(&root
->root_item
));
546 free_extent_buffer(root
->commit_root
);
547 root
->commit_root
= NULL
;
548 root
->dirty_root
= NULL
;
550 spin_lock(&root
->list_lock
);
551 list_del_init(&dirty
->root
->dead_list
);
552 spin_unlock(&root
->list_lock
);
557 /* make sure to update the root on disk
558 * so we get any updates to the block used
561 err
= btrfs_update_root(trans
,
562 root
->fs_info
->tree_root
,
568 memset(&root
->root_item
.drop_progress
, 0,
569 sizeof(struct btrfs_disk_key
));
570 root
->root_item
.drop_level
= 0;
571 root
->commit_root
= NULL
;
572 root
->dirty_root
= NULL
;
573 root
->root_key
.offset
= root
->fs_info
->generation
;
574 btrfs_set_root_bytenr(&root
->root_item
,
576 btrfs_set_root_level(&root
->root_item
,
577 btrfs_header_level(root
->node
));
578 btrfs_set_root_generation(&root
->root_item
,
579 root
->root_key
.offset
);
581 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
587 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
588 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
589 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
590 &dirty
->root
->root_key
,
591 &dirty
->root
->root_item
);
595 list_add(&dirty
->list
, list
);
598 free_extent_buffer(dirty
->root
->node
);
608 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
609 * otherwise every leaf in the btree is read and defragged.
611 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
613 struct btrfs_fs_info
*info
= root
->fs_info
;
615 struct btrfs_trans_handle
*trans
;
619 if (root
->defrag_running
)
621 trans
= btrfs_start_transaction(root
, 1);
623 root
->defrag_running
= 1;
624 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
625 nr
= trans
->blocks_used
;
626 btrfs_end_transaction(trans
, root
);
627 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
630 trans
= btrfs_start_transaction(root
, 1);
631 if (root
->fs_info
->closing
|| ret
!= -EAGAIN
)
634 root
->defrag_running
= 0;
636 btrfs_end_transaction(trans
, root
);
641 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
644 static noinline
int drop_dirty_roots(struct btrfs_root
*tree_root
,
645 struct list_head
*list
)
647 struct btrfs_dirty_root
*dirty
;
648 struct btrfs_trans_handle
*trans
;
656 while(!list_empty(list
)) {
657 struct btrfs_root
*root
;
659 dirty
= list_entry(list
->prev
, struct btrfs_dirty_root
, list
);
660 list_del_init(&dirty
->list
);
662 num_bytes
= btrfs_root_used(&dirty
->root
->root_item
);
663 root
= dirty
->latest_root
;
664 atomic_inc(&root
->fs_info
->throttles
);
667 trans
= btrfs_start_transaction(tree_root
, 1);
668 mutex_lock(&root
->fs_info
->drop_mutex
);
669 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
670 if (ret
!= -EAGAIN
) {
673 mutex_unlock(&root
->fs_info
->drop_mutex
);
675 err
= btrfs_update_root(trans
,
677 &dirty
->root
->root_key
,
678 &dirty
->root
->root_item
);
681 nr
= trans
->blocks_used
;
682 ret
= btrfs_end_transaction(trans
, tree_root
);
685 btrfs_btree_balance_dirty(tree_root
, nr
);
689 atomic_dec(&root
->fs_info
->throttles
);
690 wake_up(&root
->fs_info
->transaction_throttle
);
692 num_bytes
-= btrfs_root_used(&dirty
->root
->root_item
);
693 bytes_used
= btrfs_root_used(&root
->root_item
);
695 btrfs_record_root_in_trans(root
);
696 btrfs_set_root_used(&root
->root_item
,
697 bytes_used
- num_bytes
);
700 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
705 mutex_unlock(&root
->fs_info
->drop_mutex
);
707 spin_lock(&root
->list_lock
);
708 list_del_init(&dirty
->root
->dead_list
);
709 if (!list_empty(&root
->dead_list
)) {
710 struct btrfs_root
*oldest
;
711 oldest
= list_entry(root
->dead_list
.prev
,
712 struct btrfs_root
, dead_list
);
713 max_useless
= oldest
->root_key
.offset
- 1;
715 max_useless
= root
->root_key
.offset
- 1;
717 spin_unlock(&root
->list_lock
);
719 nr
= trans
->blocks_used
;
720 ret
= btrfs_end_transaction(trans
, tree_root
);
723 ret
= btrfs_remove_leaf_refs(root
, max_useless
, 0);
726 free_extent_buffer(dirty
->root
->node
);
730 btrfs_btree_balance_dirty(tree_root
, nr
);
737 * new snapshots need to be created at a very specific time in the
738 * transaction commit. This does the actual creation
740 static noinline
int create_pending_snapshot(struct btrfs_trans_handle
*trans
,
741 struct btrfs_fs_info
*fs_info
,
742 struct btrfs_pending_snapshot
*pending
)
744 struct btrfs_key key
;
745 struct btrfs_root_item
*new_root_item
;
746 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
747 struct btrfs_root
*root
= pending
->root
;
748 struct extent_buffer
*tmp
;
749 struct extent_buffer
*old
;
753 new_root_item
= kmalloc(sizeof(*new_root_item
), GFP_NOFS
);
754 if (!new_root_item
) {
758 ret
= btrfs_find_free_objectid(trans
, tree_root
, 0, &objectid
);
762 btrfs_record_root_in_trans(root
);
763 btrfs_set_root_last_snapshot(&root
->root_item
, trans
->transid
);
764 memcpy(new_root_item
, &root
->root_item
, sizeof(*new_root_item
));
766 key
.objectid
= objectid
;
767 key
.offset
= trans
->transid
;
768 btrfs_set_key_type(&key
, BTRFS_ROOT_ITEM_KEY
);
770 old
= btrfs_lock_root_node(root
);
771 btrfs_cow_block(trans
, root
, old
, NULL
, 0, &old
, 0);
773 btrfs_copy_root(trans
, root
, old
, &tmp
, objectid
);
774 btrfs_tree_unlock(old
);
775 free_extent_buffer(old
);
777 btrfs_set_root_bytenr(new_root_item
, tmp
->start
);
778 btrfs_set_root_level(new_root_item
, btrfs_header_level(tmp
));
779 btrfs_set_root_generation(new_root_item
, trans
->transid
);
780 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
, &key
,
782 btrfs_tree_unlock(tmp
);
783 free_extent_buffer(tmp
);
787 key
.offset
= (u64
)-1;
788 memcpy(&pending
->root_key
, &key
, sizeof(key
));
790 kfree(new_root_item
);
794 static noinline
int finish_pending_snapshot(struct btrfs_fs_info
*fs_info
,
795 struct btrfs_pending_snapshot
*pending
)
800 struct btrfs_trans_handle
*trans
;
801 struct inode
*parent_inode
;
803 struct btrfs_root
*parent_root
;
805 parent_inode
= pending
->dentry
->d_parent
->d_inode
;
806 parent_root
= BTRFS_I(parent_inode
)->root
;
807 trans
= btrfs_start_transaction(parent_root
, 1);
810 * insert the directory item
812 namelen
= strlen(pending
->name
);
813 ret
= btrfs_set_inode_index(parent_inode
, &index
);
814 ret
= btrfs_insert_dir_item(trans
, parent_root
,
815 pending
->name
, namelen
,
817 &pending
->root_key
, BTRFS_FT_DIR
, index
);
822 /* add the backref first */
823 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
824 pending
->root_key
.objectid
,
825 BTRFS_ROOT_BACKREF_KEY
,
826 parent_root
->root_key
.objectid
,
827 parent_inode
->i_ino
, index
, pending
->name
,
832 /* now add the forward ref */
833 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
834 parent_root
->root_key
.objectid
,
836 pending
->root_key
.objectid
,
837 parent_inode
->i_ino
, index
, pending
->name
,
840 inode
= btrfs_lookup_dentry(parent_inode
, pending
->dentry
);
841 d_instantiate(pending
->dentry
, inode
);
843 btrfs_end_transaction(trans
, fs_info
->fs_root
);
848 * create all the snapshots we've scheduled for creation
850 static noinline
int create_pending_snapshots(struct btrfs_trans_handle
*trans
,
851 struct btrfs_fs_info
*fs_info
)
853 struct btrfs_pending_snapshot
*pending
;
854 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
855 struct list_head
*cur
;
858 list_for_each(cur
, head
) {
859 pending
= list_entry(cur
, struct btrfs_pending_snapshot
, list
);
860 ret
= create_pending_snapshot(trans
, fs_info
, pending
);
866 static noinline
int finish_pending_snapshots(struct btrfs_trans_handle
*trans
,
867 struct btrfs_fs_info
*fs_info
)
869 struct btrfs_pending_snapshot
*pending
;
870 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
873 while(!list_empty(head
)) {
874 pending
= list_entry(head
->next
,
875 struct btrfs_pending_snapshot
, list
);
876 ret
= finish_pending_snapshot(fs_info
, pending
);
878 list_del(&pending
->list
);
879 kfree(pending
->name
);
885 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
886 struct btrfs_root
*root
)
888 unsigned long joined
= 0;
889 unsigned long timeout
= 1;
890 struct btrfs_transaction
*cur_trans
;
891 struct btrfs_transaction
*prev_trans
= NULL
;
892 struct btrfs_root
*chunk_root
= root
->fs_info
->chunk_root
;
893 struct list_head dirty_fs_roots
;
894 struct extent_io_tree
*pinned_copy
;
898 INIT_LIST_HEAD(&dirty_fs_roots
);
899 mutex_lock(&root
->fs_info
->trans_mutex
);
900 if (trans
->transaction
->in_commit
) {
901 cur_trans
= trans
->transaction
;
902 trans
->transaction
->use_count
++;
903 mutex_unlock(&root
->fs_info
->trans_mutex
);
904 btrfs_end_transaction(trans
, root
);
906 ret
= wait_for_commit(root
, cur_trans
);
909 mutex_lock(&root
->fs_info
->trans_mutex
);
910 put_transaction(cur_trans
);
911 mutex_unlock(&root
->fs_info
->trans_mutex
);
916 pinned_copy
= kmalloc(sizeof(*pinned_copy
), GFP_NOFS
);
920 extent_io_tree_init(pinned_copy
,
921 root
->fs_info
->btree_inode
->i_mapping
, GFP_NOFS
);
923 trans
->transaction
->in_commit
= 1;
924 trans
->transaction
->blocked
= 1;
925 cur_trans
= trans
->transaction
;
926 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
927 prev_trans
= list_entry(cur_trans
->list
.prev
,
928 struct btrfs_transaction
, list
);
929 if (!prev_trans
->commit_done
) {
930 prev_trans
->use_count
++;
931 mutex_unlock(&root
->fs_info
->trans_mutex
);
933 wait_for_commit(root
, prev_trans
);
935 mutex_lock(&root
->fs_info
->trans_mutex
);
936 put_transaction(prev_trans
);
941 int snap_pending
= 0;
942 joined
= cur_trans
->num_joined
;
943 if (!list_empty(&trans
->transaction
->pending_snapshots
))
946 WARN_ON(cur_trans
!= trans
->transaction
);
947 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
948 TASK_UNINTERRUPTIBLE
);
950 if (cur_trans
->num_writers
> 1)
951 timeout
= MAX_SCHEDULE_TIMEOUT
;
955 mutex_unlock(&root
->fs_info
->trans_mutex
);
958 ret
= btrfs_wait_ordered_extents(root
, 1);
962 schedule_timeout(timeout
);
964 mutex_lock(&root
->fs_info
->trans_mutex
);
965 finish_wait(&cur_trans
->writer_wait
, &wait
);
966 } while (cur_trans
->num_writers
> 1 ||
967 (cur_trans
->num_joined
!= joined
));
969 ret
= create_pending_snapshots(trans
, root
->fs_info
);
972 WARN_ON(cur_trans
!= trans
->transaction
);
974 /* btrfs_commit_tree_roots is responsible for getting the
975 * various roots consistent with each other. Every pointer
976 * in the tree of tree roots has to point to the most up to date
977 * root for every subvolume and other tree. So, we have to keep
978 * the tree logging code from jumping in and changing any
981 * At this point in the commit, there can't be any tree-log
982 * writers, but a little lower down we drop the trans mutex
983 * and let new people in. By holding the tree_log_mutex
984 * from now until after the super is written, we avoid races
985 * with the tree-log code.
987 mutex_lock(&root
->fs_info
->tree_log_mutex
);
989 * keep tree reloc code from adding new reloc trees
991 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
994 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
998 /* add_dirty_roots gets rid of all the tree log roots, it is now
999 * safe to free the root of tree log roots
1001 btrfs_free_log_root_tree(trans
, root
->fs_info
);
1003 ret
= btrfs_commit_tree_roots(trans
, root
);
1006 cur_trans
= root
->fs_info
->running_transaction
;
1007 spin_lock(&root
->fs_info
->new_trans_lock
);
1008 root
->fs_info
->running_transaction
= NULL
;
1009 spin_unlock(&root
->fs_info
->new_trans_lock
);
1010 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
1011 cur_trans
->transid
);
1012 btrfs_set_super_root(&root
->fs_info
->super_copy
,
1013 root
->fs_info
->tree_root
->node
->start
);
1014 btrfs_set_super_root_level(&root
->fs_info
->super_copy
,
1015 btrfs_header_level(root
->fs_info
->tree_root
->node
));
1017 btrfs_set_super_chunk_root(&root
->fs_info
->super_copy
,
1018 chunk_root
->node
->start
);
1019 btrfs_set_super_chunk_root_level(&root
->fs_info
->super_copy
,
1020 btrfs_header_level(chunk_root
->node
));
1021 btrfs_set_super_chunk_root_generation(&root
->fs_info
->super_copy
,
1022 btrfs_header_generation(chunk_root
->node
));
1024 if (!root
->fs_info
->log_root_recovering
) {
1025 btrfs_set_super_log_root(&root
->fs_info
->super_copy
, 0);
1026 btrfs_set_super_log_root_level(&root
->fs_info
->super_copy
, 0);
1029 memcpy(&root
->fs_info
->super_for_commit
, &root
->fs_info
->super_copy
,
1030 sizeof(root
->fs_info
->super_copy
));
1032 btrfs_copy_pinned(root
, pinned_copy
);
1034 trans
->transaction
->blocked
= 0;
1035 wake_up(&root
->fs_info
->transaction_throttle
);
1036 wake_up(&root
->fs_info
->transaction_wait
);
1038 mutex_unlock(&root
->fs_info
->trans_mutex
);
1039 ret
= btrfs_write_and_wait_transaction(trans
, root
);
1041 write_ctree_super(trans
, root
, 0);
1044 * the super is written, we can safely allow the tree-loggers
1045 * to go about their business
1047 mutex_unlock(&root
->fs_info
->tree_log_mutex
);
1049 btrfs_finish_extent_commit(trans
, root
, pinned_copy
);
1052 btrfs_drop_dead_reloc_roots(root
);
1053 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
1055 /* do the directory inserts of any pending snapshot creations */
1056 finish_pending_snapshots(trans
, root
->fs_info
);
1058 mutex_lock(&root
->fs_info
->trans_mutex
);
1060 cur_trans
->commit_done
= 1;
1061 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
1062 wake_up(&cur_trans
->commit_wait
);
1064 put_transaction(cur_trans
);
1065 put_transaction(cur_trans
);
1067 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
1068 if (root
->fs_info
->closing
)
1069 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
1071 mutex_unlock(&root
->fs_info
->trans_mutex
);
1073 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
1075 if (root
->fs_info
->closing
) {
1076 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
1082 * interface function to delete all the snapshots we have scheduled for deletion
1084 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
1086 struct list_head dirty_roots
;
1087 INIT_LIST_HEAD(&dirty_roots
);
1089 mutex_lock(&root
->fs_info
->trans_mutex
);
1090 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
1091 mutex_unlock(&root
->fs_info
->trans_mutex
);
1093 if (!list_empty(&dirty_roots
)) {
1094 drop_dirty_roots(root
, &dirty_roots
);