* Boston, MA 021110-1307, USA.
*/
+#include <linux/version.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> // for block_sync_page
#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
#include "crc32c.h"
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
#include "print-tree.h"
#include "async-thread.h"
+#include "locking.h"
#if 0
static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
{
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
- root->fs_info->running_transaction->transid)
+ root->fs_info->running_transaction->transid) {
+ WARN_ON(!btrfs_tree_locked(buf));
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
+ }
return 0;
}
root->in_sysfs = 0;
INIT_LIST_HEAD(&root->dirty_list);
+ spin_lock_init(&root->node_lock);
+ mutex_init(&root->objectid_mutex);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
#endif
}
+static int cleaner_kthread(void *arg)
+{
+ struct btrfs_root *root = arg;
+
+ do {
+ smp_mb();
+ if (root->fs_info->closing)
+ break;
+
+ vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+ mutex_lock(&root->fs_info->cleaner_mutex);
+printk("cleaner awake\n");
+ btrfs_clean_old_snapshots(root);
+printk("cleaner done\n");
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ if (freezing(current)) {
+ refrigerator();
+ } else {
+ smp_mb();
+ if (root->fs_info->closing)
+ break;
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ }
+ } while (!kthread_should_stop());
+ return 0;
+}
+
+static int transaction_kthread(void *arg)
+{
+ struct btrfs_root *root = arg;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_transaction *cur;
+ unsigned long now;
+ unsigned long delay;
+ int ret;
+
+ do {
+ smp_mb();
+ if (root->fs_info->closing)
+ break;
+
+ delay = HZ * 30;
+ vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+ mutex_lock(&root->fs_info->transaction_kthread_mutex);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ cur = root->fs_info->running_transaction;
+ if (!cur) {
+ mutex_unlock(&root->fs_info->trans_mutex);
+ goto sleep;
+ }
+ now = get_seconds();
+ if (now < cur->start_time || now - cur->start_time < 30) {
+ mutex_unlock(&root->fs_info->trans_mutex);
+ delay = HZ * 5;
+ goto sleep;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ btrfs_defrag_dirty_roots(root->fs_info);
+ trans = btrfs_start_transaction(root, 1);
+ ret = btrfs_commit_transaction(trans, root);
+sleep:
+ wake_up_process(root->fs_info->cleaner_kthread);
+ mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+
+ if (freezing(current)) {
+ refrigerator();
+ } else {
+ if (root->fs_info->closing)
+ break;
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(delay);
+ __set_current_state(TASK_RUNNING);
+ }
+ } while (!kthread_should_stop());
+ return 0;
+}
+
struct btrfs_root *open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options)
GFP_NOFS);
int ret;
int err = -EINVAL;
+
struct btrfs_super_block *disk_super;
if (!extent_root || !tree_root || !fs_info) {
INIT_LIST_HEAD(&fs_info->space_info);
btrfs_mapping_init(&fs_info->mapping_tree);
atomic_set(&fs_info->nr_async_submits, 0);
+ atomic_set(&fs_info->throttles, 0);
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
+ fs_info->thread_pool_size = min(num_online_cpus() + 2, 8);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->do_barriers = 1;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
- INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
-#else
- INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
-#endif
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
mutex_init(&fs_info->trans_mutex);
- mutex_init(&fs_info->fs_mutex);
-
- /* we need to start all the end_io workers up front because the
- * queue work function gets called at interrupt time. The endio
- * workers don't normally start IO, so some number of them <= the
- * number of cpus is fine. They handle checksumming after a read.
- *
- * The other worker threads do start IO, so the max is larger than
- * the number of CPUs. FIXME, tune this for huge machines
- */
- btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2);
- btrfs_init_workers(&fs_info->endio_workers, num_online_cpus());
- btrfs_start_workers(&fs_info->workers, 1);
- btrfs_start_workers(&fs_info->endio_workers, num_online_cpus());
+ mutex_init(&fs_info->drop_mutex);
+ mutex_init(&fs_info->alloc_mutex);
+ mutex_init(&fs_info->chunk_mutex);
+ mutex_init(&fs_info->transaction_kthread_mutex);
+ mutex_init(&fs_info->cleaner_mutex);
#if 0
ret = add_hasher(fs_info, "crc32c");
if (err)
goto fail_sb_buffer;
+ /*
+ * we need to start all the end_io workers up front because the
+ * queue work function gets called at interrupt time, and so it
+ * cannot dynamically grow.
+ */
+ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
+ btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
+ btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->workers, 1);
+ btrfs_start_workers(&fs_info->submit_workers, 1);
+ btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+
err = -EINVAL;
if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
printk("Btrfs: wanted %llu devices, but found %llu\n",
goto fail_sb_buffer;
}
- mutex_lock(&fs_info->fs_mutex);
-
+ mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
+ mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk("btrfs: failed to read the system array on %s\n",
sb->s_id);
(unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
BTRFS_UUID_SIZE);
+ mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_chunk_tree(chunk_root);
+ mutex_unlock(&fs_info->chunk_mutex);
BUG_ON(ret);
btrfs_close_extra_devices(fs_devices);
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
+ fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
+ "btrfs-cleaner");
+ if (!fs_info->cleaner_kthread)
+ goto fail_extent_root;
+
+ fs_info->transaction_kthread = kthread_run(transaction_kthread,
+ tree_root,
+ "btrfs-transaction");
+ if (!fs_info->transaction_kthread)
+ goto fail_trans_kthread;
+
- mutex_unlock(&fs_info->fs_mutex);
return tree_root;
+fail_trans_kthread:
+ kthread_stop(fs_info->cleaner_kthread);
fail_extent_root:
free_extent_buffer(extent_root->node);
fail_tree_root:
free_extent_buffer(tree_root->node);
fail_sys_array:
- mutex_unlock(&fs_info->fs_mutex);
fail_sb_buffer:
extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
-fail_iput:
- iput(fs_info->btree_inode);
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->submit_workers);
+fail_iput:
+ iput(fs_info->btree_inode);
fail:
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
struct btrfs_fs_info *fs_info = root->fs_info;
fs_info->closing = 1;
- btrfs_transaction_flush_work(root);
- mutex_lock(&fs_info->fs_mutex);
+ smp_mb();
+
+ kthread_stop(root->fs_info->transaction_kthread);
+ kthread_stop(root->fs_info->cleaner_kthread);
+
btrfs_defrag_dirty_roots(root->fs_info);
+ btrfs_clean_old_snapshots(root);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
/* run commit again to drop the original snapshot */
BUG_ON(ret);
write_ctree_super(NULL, root);
- mutex_unlock(&fs_info->fs_mutex);
-
- btrfs_transaction_flush_work(root);
if (fs_info->delalloc_bytes) {
printk("btrfs: at unmount delalloc count %Lu\n",
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->submit_workers);
iput(fs_info->btree_inode);
#if 0
u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;
+ WARN_ON(!btrfs_tree_locked(buf));
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
(unsigned long long)buf->start,
set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
}
-void btrfs_throttle(struct btrfs_root *root)
-{
- struct backing_dev_info *bdi;
-
- bdi = &root->fs_info->bdi;
- if (root->fs_info->throttles && bdi_write_congested(bdi)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
- congestion_wait(WRITE, HZ/20);
-#else
- blk_congestion_wait(WRITE, HZ/20);
-#endif
- }
-}
-
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
{
/*