It is possible that many tasks sync the log tree at the same time, but
only one task can do the sync work, the others will wait for it. But those
wait tasks didn't get the result of the log sync, and returned 0 when they
ended the wait. It caused those tasks skipped the error handle, and the
serious problem was they told the users the file sync succeeded but in
fact they failed.
This patch fixes this problem by introducing a log context structure,
we insert it into the a global list. When the sync fails, we will set
the error number of every log context in the list, then the waiting tasks
get the error number of the log context and handle the error if need.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
+ struct list_head log_ctxs[2];
atomic_t log_writers;
atomic_t log_commit[2];
atomic_t log_batch;
atomic_t log_writers;
atomic_t log_commit[2];
atomic_t log_batch;
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
+ INIT_LIST_HEAD(&root->log_ctxs[0]);
+ INIT_LIST_HEAD(&root->log_ctxs[1]);
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_trans_handle *trans;
+ struct btrfs_log_ctx ctx;
+ int ret = 0;
bool full_sync = 0;
trace_btrfs_sync_file(file, datasync);
bool full_sync = 0;
trace_btrfs_sync_file(file, datasync);
- ret = btrfs_log_dentry_safe(trans, root, dentry);
+ btrfs_init_log_ctx(&ctx);
+
+ ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
if (ret != BTRFS_NO_LOG_SYNC) {
if (!ret) {
if (ret != BTRFS_NO_LOG_SYNC) {
if (!ret) {
- ret = btrfs_sync_log(trans, root);
+ ret = btrfs_sync_log(trans, root, &ctx);
if (!ret) {
ret = btrfs_end_transaction(trans, root);
goto out;
if (!ret) {
ret = btrfs_end_transaction(trans, root);
goto out;
* syncing the tree wait for us to finish
*/
static int start_log_trans(struct btrfs_trans_handle *trans,
* syncing the tree wait for us to finish
*/
static int start_log_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+ struct btrfs_root *root,
+ struct btrfs_log_ctx *ctx)
int ret;
mutex_lock(&root->log_mutex);
int ret;
mutex_lock(&root->log_mutex);
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
+ if (ctx) {
+ index = root->log_transid % 2;
+ list_add_tail(&ctx->list, &root->log_ctxs[index]);
+ }
mutex_unlock(&root->log_mutex);
return 0;
}
mutex_unlock(&root->log_mutex);
return 0;
}
root->log_start_pid = current->pid;
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
root->log_start_pid = current->pid;
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
+ if (ctx) {
+ index = root->log_transid % 2;
+ list_add_tail(&ctx->list, &root->log_ctxs[index]);
+ }
out:
mutex_unlock(&root->log_mutex);
return ret;
out:
mutex_unlock(&root->log_mutex);
return ret;
-static int wait_log_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int transid)
+static void wait_log_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int transid)
{
DEFINE_WAIT(wait);
int index = transid % 2;
{
DEFINE_WAIT(wait);
int index = transid % 2;
/*
* we only allow two pending log transactions at a time,
/*
* we only allow two pending log transactions at a time,
* current transaction, we're done
*/
do {
* current transaction, we're done
*/
do {
- if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
- trans->transid) {
- ret = -EAGAIN;
- break;
- }
-
prepare_to_wait(&root->log_commit_wait[index],
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
prepare_to_wait(&root->log_commit_wait[index],
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
mutex_lock(&root->log_mutex);
} while (root->log_transid < transid + 2 &&
atomic_read(&root->log_commit[index]));
mutex_lock(&root->log_mutex);
} while (root->log_transid < transid + 2 &&
atomic_read(&root->log_commit[index]));
}
static void wait_for_writer(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
DEFINE_WAIT(wait);
}
static void wait_for_writer(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
DEFINE_WAIT(wait);
- while (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) !=
- trans->transid && atomic_read(&root->log_writers)) {
+
+ while (atomic_read(&root->log_writers)) {
prepare_to_wait(&root->log_writer_wait,
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
prepare_to_wait(&root->log_writer_wait,
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
- if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) !=
- trans->transid && atomic_read(&root->log_writers))
+ if (atomic_read(&root->log_writers))
schedule();
mutex_lock(&root->log_mutex);
finish_wait(&root->log_writer_wait, &wait);
}
}
schedule();
mutex_lock(&root->log_mutex);
finish_wait(&root->log_writer_wait, &wait);
}
}
+static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
+ struct btrfs_log_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ mutex_lock(&root->log_mutex);
+ list_del_init(&ctx->list);
+ mutex_unlock(&root->log_mutex);
+}
+
+/*
+ * Invoked in log mutex context, or be sure there is no other task which
+ * can access the list.
+ */
+static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
+ int index, int error)
+{
+ struct btrfs_log_ctx *ctx;
+
+ if (!error) {
+ INIT_LIST_HEAD(&root->log_ctxs[index]);
+ return;
+ }
+
+ list_for_each_entry(ctx, &root->log_ctxs[index], list)
+ ctx->log_ret = error;
+
+ INIT_LIST_HEAD(&root->log_ctxs[index]);
+}
+
/*
* btrfs_sync_log does sends a given tree log down to the disk and
* updates the super blocks to record it. When this call is done,
/*
* btrfs_sync_log does sends a given tree log down to the disk and
* updates the super blocks to record it. When this call is done,
* that has happened.
*/
int btrfs_sync_log(struct btrfs_trans_handle *trans,
* that has happened.
*/
int btrfs_sync_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+ struct btrfs_root *root, struct btrfs_log_ctx *ctx)
{
int index1;
int index2;
{
int index1;
int index2;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
int log_transid = 0;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
int log_transid = 0;
+ struct btrfs_log_ctx root_log_ctx;
struct blk_plug plug;
mutex_lock(&root->log_mutex);
log_transid = root->log_transid;
index1 = root->log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
struct blk_plug plug;
mutex_lock(&root->log_mutex);
log_transid = root->log_transid;
index1 = root->log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
- ret = wait_log_commit(trans, root, root->log_transid);
+ wait_log_commit(trans, root, root->log_transid);
mutex_unlock(&root->log_mutex);
mutex_unlock(&root->log_mutex);
}
atomic_set(&root->log_commit[index1], 1);
}
atomic_set(&root->log_commit[index1], 1);
}
index2 = log_root_tree->log_transid % 2;
}
index2 = log_root_tree->log_transid % 2;
+
+ btrfs_init_log_ctx(&root_log_ctx);
+ list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
+
if (atomic_read(&log_root_tree->log_commit[index2])) {
blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
if (atomic_read(&log_root_tree->log_commit[index2])) {
blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
- ret = wait_log_commit(trans, log_root_tree,
- log_root_tree->log_transid);
+ wait_log_commit(trans, log_root_tree,
+ log_root_tree->log_transid);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
+ ret = root_log_ctx.log_ret;
goto out;
}
atomic_set(&log_root_tree->log_commit[index2], 1);
goto out;
}
atomic_set(&log_root_tree->log_commit[index2], 1);
mutex_unlock(&root->log_mutex);
out_wake_log_root:
mutex_unlock(&root->log_mutex);
out_wake_log_root:
+ /*
+ * We needn't get log_mutex here because we are sure all
+ * the other tasks are blocked.
+ */
+ btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
+
+ /*
+ * It is dangerous if log_commit is changed before we set
+ * ->log_ret of log ctx. Because the readers may not get
+ * the return value.
+ */
+ smp_wmb();
+
atomic_set(&log_root_tree->log_commit[index2], 0);
smp_mb();
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
atomic_set(&log_root_tree->log_commit[index2], 0);
smp_mb();
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
+ /* See above. */
+ btrfs_remove_all_log_ctxs(root, index1, ret);
+
+ /* See above. */
+ smp_wmb();
atomic_set(&root->log_commit[index1], 0);
atomic_set(&root->log_commit[index1], 0);
smp_mb();
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
smp_mb();
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only)
+ struct dentry *parent, int exists_only,
+ struct btrfs_log_ctx *ctx)
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
- ret = start_log_trans(trans, root);
+ ret = start_log_trans(trans, root, ctx);
if (ret)
goto end_no_trans;
if (ret)
goto end_no_trans;
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 1;
}
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 1;
}
+
+ if (ret)
+ btrfs_remove_log_ctx(root, ctx);
btrfs_end_log_trans(root);
end_no_trans:
return ret;
btrfs_end_log_trans(root);
end_no_trans:
return ret;
* data on disk.
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
* data on disk.
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct dentry *dentry)
+ struct btrfs_root *root, struct dentry *dentry,
+ struct btrfs_log_ctx *ctx)
{
struct dentry *parent = dget_parent(dentry);
int ret;
{
struct dentry *parent = dget_parent(dentry);
int ret;
- ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+ ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
+ 0, ctx);
dput(parent);
return ret;
dput(parent);
return ret;
root->fs_info->last_trans_committed))
return 0;
root->fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, inode, parent, 1);
+ return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL);
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
+struct btrfs_log_ctx {
+ int log_ret;
+ struct list_head list;
+};
+
+static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
+{
+ ctx->log_ret = 0;
+ INIT_LIST_HEAD(&ctx->list);
+}
+
int btrfs_sync_log(struct btrfs_trans_handle *trans,
int btrfs_sync_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_root *root, struct btrfs_log_ctx *ctx);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct dentry *dentry);
+ struct btrfs_root *root, struct dentry *dentry,
+ struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,