Btrfs: fix fallocate deadlock on inode extent lock
[deliverable/linux.git] / fs / btrfs / file.c
index dc78954861b333d75e831e27c6bbe7760f6e1de9..da3ed965c956a43382f0a2ed4939243ab351ecaf 100644 (file)
@@ -363,15 +363,16 @@ out:
  */
 noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
-                      u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
+                      u64 start, u64 end, u64 locked_end,
+                      u64 inline_limit, u64 *hint_byte)
 {
        u64 extent_end = 0;
-       u64 locked_end = end;
        u64 search_start = start;
        u64 leaf_start;
        u64 ram_bytes = 0;
        u64 orig_parent = 0;
        u64 disk_bytenr = 0;
+       u64 orig_locked_end = locked_end;
        u8 compression;
        u8 encryption;
        u16 other_encoding = 0;
@@ -606,6 +607,7 @@ next_slot:
                        btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
 
                        btrfs_release_path(root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_insert_empty_item(trans, root, path, &ins,
                                                      sizeof(*extent));
                        BUG_ON(ret);
@@ -639,17 +641,22 @@ next_slot:
                                                        ram_bytes);
                        btrfs_set_file_extent_type(leaf, extent, found_type);
 
+                       btrfs_unlock_up_safe(path, 1);
                        btrfs_mark_buffer_dirty(path->nodes[0]);
+                       btrfs_set_lock_blocking(path->nodes[0]);
 
                        if (disk_bytenr != 0) {
                                ret = btrfs_update_extent_ref(trans, root,
-                                               disk_bytenr, orig_parent,
+                                               disk_bytenr,
+                                               le64_to_cpu(old.disk_num_bytes),
+                                               orig_parent,
                                                leaf->start,
                                                root->root_key.objectid,
                                                trans->transid, ins.objectid);
 
                                BUG_ON(ret);
                        }
+                       path->leave_spinning = 0;
                        btrfs_release_path(root, path);
                        if (disk_bytenr != 0)
                                inode_add_bytes(inode, extent_end - end);
@@ -678,9 +685,9 @@ next_slot:
        }
 out:
        btrfs_free_path(path);
-       if (locked_end > end) {
-               unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
-                             GFP_NOFS);
+       if (locked_end > orig_locked_end) {
+               unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
+                             locked_end - 1, GFP_NOFS);
        }
        btrfs_check_file(root, inode);
        return ret;
@@ -824,7 +831,7 @@ again:
 
                ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
                BUG_ON(ret);
-               goto done;
+               goto release;
        } else if (split == start) {
                if (locked_end < extent_end) {
                        ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -912,7 +919,7 @@ again:
        btrfs_set_file_extent_other_encoding(leaf, fi, 0);
 
        if (orig_parent != leaf->start) {
-               ret = btrfs_update_extent_ref(trans, root, bytenr,
+               ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
                                              orig_parent, leaf->start,
                                              root->root_key.objectid,
                                              trans->transid, inode->i_ino);
@@ -920,6 +927,8 @@ again:
        }
 done:
        btrfs_mark_buffer_dirty(leaf);
+
+release:
        btrfs_release_path(root, path);
        if (split_end && split == start) {
                split = end;
@@ -1125,7 +1134,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                if (will_write) {
                        btrfs_fdatawrite_range(inode->i_mapping, pos,
                                               pos + write_bytes - 1,
-                                              WB_SYNC_NONE);
+                                              WB_SYNC_ALL);
                } else {
                        balance_dirty_pages_ratelimited_nr(inode->i_mapping,
                                                           num_pages);
@@ -1155,6 +1164,20 @@ out_nolock:
                page_cache_release(pinned[1]);
        *ppos = pos;
 
+       /*
+        * we want to make sure fsync finds this change
+        * but we haven't joined a transaction running right now.
+        *
+        * Later on, someone is sure to update the inode and get the
+        * real transid recorded.
+        *
+        * We set last_trans now to the fs_info generation + 1,
+        * this will either be one more than the running transaction
+        * or the generation used for the next transaction if there isn't
+        * one running right now.
+        */
+       BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
+
        if (num_written > 0 && will_write) {
                struct btrfs_trans_handle *trans;
 
@@ -1167,8 +1190,11 @@ out_nolock:
                        ret = btrfs_log_dentry_safe(trans, root,
                                                    file->f_dentry);
                        if (ret == 0) {
-                               btrfs_sync_log(trans, root);
-                               btrfs_end_transaction(trans, root);
+                               ret = btrfs_sync_log(trans, root);
+                               if (ret == 0)
+                                       btrfs_end_transaction(trans, root);
+                               else
+                                       btrfs_commit_transaction(trans, root);
                        } else {
                                btrfs_commit_transaction(trans, root);
                        }
@@ -1185,6 +1211,18 @@ out_nolock:
 
 int btrfs_release_file(struct inode *inode, struct file *filp)
 {
+       /*
+        * ordered_data_close is set by settattr when we are about to truncate
+        * a file from a non-zero size to a zero size.  This tries to
+        * flush down new bytes that may have been written if the
+        * application were using truncate to replace a file in place.
+        */
+       if (BTRFS_I(inode)->ordered_data_close) {
+               BTRFS_I(inode)->ordered_data_close = 0;
+               btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
+               if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+                       filemap_flush(inode->i_mapping);
+       }
        if (filp->private_data)
                btrfs_ioctl_trans_end(filp);
        return 0;
@@ -1260,8 +1298,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
        if (ret > 0) {
                ret = btrfs_commit_transaction(trans, root);
        } else {
-               btrfs_sync_log(trans, root);
-               ret = btrfs_end_transaction(trans, root);
+               ret = btrfs_sync_log(trans, root);
+               if (ret == 0)
+                       ret = btrfs_end_transaction(trans, root);
+               else
+                       ret = btrfs_commit_transaction(trans, root);
        }
        mutex_lock(&dentry->d_inode->i_mutex);
 out:
This page took 0.028009 seconds and 5 git commands to generate.