Btrfs: fix lots of orphan inodes when the space is not enough
[deliverable/linux.git] / fs / btrfs / inode.c
index 657f16d9c78b38ba6a425ec0a3e53dc591dd5ec9..1ea0988b82e1a25cd699c2e6b96d793a4ff12623 100644 (file)
@@ -88,13 +88,17 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
        [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
 };
 
-static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct inode *inode);
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
 static noinline int cow_file_range(struct inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+                                          u64 len, u64 orig_start,
+                                          u64 block_start, u64 block_len,
+                                          u64 orig_block_len, int type);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@ -699,14 +703,19 @@ retry:
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                em->compress_type = async_extent->compress_type;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+               em->generation = -1;
 
                while (1) {
                        write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@ -886,12 +895,17 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
+               em->generation = -1;
 
                while (1) {
                        write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
                        write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
@@ -1143,6 +1157,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        u64 extent_offset;
        u64 disk_bytenr;
        u64 num_bytes;
+       u64 disk_num_bytes;
        int extent_type;
        int ret, err;
        int type;
@@ -1245,6 +1260,8 @@ next_slot:
                        extent_offset = btrfs_file_extent_offset(leaf, fi);
                        extent_end = found_key.offset +
                                btrfs_file_extent_num_bytes(leaf, fi);
+                       disk_num_bytes =
+                               btrfs_file_extent_disk_num_bytes(leaf, fi);
                        if (extent_end <= start) {
                                path->slots[0]++;
                                goto next_slot;
@@ -1315,16 +1332,21 @@ out_check:
                        em = alloc_extent_map();
                        BUG_ON(!em); /* -ENOMEM */
                        em->start = cur_offset;
-                       em->orig_start = em->start;
+                       em->orig_start = found_key.offset - extent_offset;
                        em->len = num_bytes;
                        em->block_len = num_bytes;
                        em->block_start = disk_bytenr;
+                       em->orig_block_len = disk_num_bytes;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
-                       set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+                       set_bit(EXTENT_FLAG_FILLING, &em->flags);
+                       em->generation = -1;
                        while (1) {
                                write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
+                               if (!ret)
+                                       list_move(&em->list,
+                                                 &em_tree->modified_extents);
                                write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
@@ -1622,6 +1644,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        int ret = 0;
        int skip_sum;
        int metadata = 0;
+       int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -1644,7 +1667,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                                goto out;
                }
                goto mapit;
-       } else if (!skip_sum) {
+       } else if (async && !skip_sum) {
                /* csum items have already been cloned */
                if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
                        goto mapit;
@@ -1655,6 +1678,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                                   __btrfs_submit_bio_start,
                                   __btrfs_submit_bio_done);
                goto out;
+       } else if (!skip_sum) {
+               ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+               if (ret)
+                       goto out;
        }
 
 mapit:
@@ -1895,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
-               ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-               if (!ret) {
-                       if (nolock)
-                               trans = btrfs_join_transaction_nolock(root);
-                       else
-                               trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans)) {
-                               ret = PTR_ERR(trans);
-                               trans = NULL;
-                               goto out;
-                       }
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_update_inode_fallback(trans, root, inode);
-                       if (ret) /* -ENOMEM or corruption */
-                               btrfs_abort_transaction(trans, root, ret);
+               btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+               if (nolock)
+                       trans = btrfs_join_transaction_nolock(root);
+               else
+                       trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       trans = NULL;
+                       goto out;
                }
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+               ret = btrfs_update_inode_fallback(trans, root, inode);
+               if (ret) /* -ENOMEM or corruption */
+                       btrfs_abort_transaction(trans, root, ret);
                goto out;
        }
 
@@ -1959,15 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
-       ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-               ret = btrfs_update_inode_fallback(trans, root, inode);
-               if (ret) { /* -ENOMEM or corruption */
-                       btrfs_abort_transaction(trans, root, ret);
-                       goto out_unlock;
-               }
-       } else {
-               btrfs_set_inode_last_trans(trans, inode);
+       btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+       ret = btrfs_update_inode_fallback(trans, root, inode);
+       if (ret) { /* -ENOMEM or corruption */
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_unlock;
        }
        ret = 0;
 out_unlock:
@@ -2457,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                continue;
                        }
                        nr_truncate++;
+
+                       /* 1 for the orphan item deletion. */
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
+                       ret = btrfs_orphan_add(trans, inode);
+                       btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
+
                        ret = btrfs_truncate(inode);
                } else {
                        nr_unlink++;
@@ -3521,11 +3554,11 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
        if (ret)
                goto out;
 
-       ret = -ENOMEM;
 again:
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+               ret = -ENOMEM;
                goto out;
        }
 
@@ -3574,7 +3607,6 @@ again:
                goto out_unlock;
        }
 
-       ret = 0;
        if (offset != PAGE_CACHE_SIZE) {
                if (!len)
                        len = PAGE_CACHE_SIZE - offset;
@@ -3645,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                block_end - cur_offset, 0);
                if (IS_ERR(em)) {
                        err = PTR_ERR(em);
+                       em = NULL;
                        break;
                }
                last_byte = min(extent_map_end(em), block_end);
@@ -3692,6 +3725,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
                        hole_em->block_start = EXTENT_MAP_HOLE;
                        hole_em->block_len = 0;
+                       hole_em->orig_block_len = 0;
                        hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                        hole_em->compress_type = BTRFS_COMPRESS_NONE;
                        hole_em->generation = trans->transid;
@@ -3727,16 +3761,27 @@ next:
        return err;
 }
 
-static int btrfs_setsize(struct inode *inode, loff_t newsize)
+static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        loff_t oldsize = i_size_read(inode);
+       loff_t newsize = attr->ia_size;
+       int mask = attr->ia_valid;
        int ret;
 
        if (newsize == oldsize)
                return 0;
 
+       /*
+        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+        * special case where we need to update the times despite not having
+        * these flags set.  For all other operations the VFS set these flags
+        * explicitly if it wants a timestamp update.
+        */
+       if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
+               inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+
        if (newsize > oldsize) {
                truncate_pagecache(inode, oldsize, newsize);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3762,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
                        set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
                                &BTRFS_I(inode)->runtime_flags);
 
+               /*
+                * 1 for the orphan item we're going to add
+                * 1 for the orphan item deletion.
+                */
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+
+               /*
+                * We need to do this in case we fail at _any_ point during the
+                * actual truncate.  Once we do the truncate_setsize we could
+                * invalidate pages which forces any outstanding ordered io to
+                * be instantly completed which will give us extents that need
+                * to be truncated.  If we fail to get an orphan inode down we
+                * could have left over extents that were never meant to live,
+                * so we need to garuntee from this point on that everything
+                * will be consistent.
+                */
+               ret = btrfs_orphan_add(trans, inode);
+               btrfs_end_transaction(trans, root);
+               if (ret)
+                       return ret;
+
                /* we don't support swapfiles, so vmtruncate shouldn't fail */
                truncate_setsize(inode, newsize);
                ret = btrfs_truncate(inode);
+               if (ret && inode->i_nlink)
+                       btrfs_orphan_del(NULL, inode);
        }
 
        return ret;
@@ -3784,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                return err;
 
        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-               err = btrfs_setsize(inode, attr->ia_size);
+               err = btrfs_setsize(inode, attr);
                if (err)
                        return err;
        }
@@ -3834,6 +3904,12 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
+       ret = btrfs_commit_inode_delayed_inode(inode);
+       if (ret) {
+               btrfs_orphan_del(NULL, inode);
+               goto no_delete;
+       }
+
        rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
        if (!rsv) {
                btrfs_orphan_del(NULL, inode);
@@ -3871,7 +3947,7 @@ void btrfs_evict_inode(struct inode *inode)
                        goto no_delete;
                }
 
-               trans = btrfs_start_transaction_lflush(root, 1);
+               trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        btrfs_orphan_del(NULL, inode);
                        btrfs_free_block_rsv(root, rsv);
@@ -3885,9 +3961,6 @@ void btrfs_evict_inode(struct inode *inode)
                        break;
 
                trans->block_rsv = &root->fs_info->trans_block_rsv;
-               ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
-
                btrfs_end_transaction(trans, root);
                trans = NULL;
                btrfs_btree_balance_dirty(root);
@@ -4797,8 +4870,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (S_ISREG(mode)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-               if (btrfs_test_opt(root, NODATACOW) ||
-                   (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
+               if (btrfs_test_opt(root, NODATACOW))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
        }
 
@@ -4864,7 +4936,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_dir_item(trans, root, name, name_len,
                                    parent_inode, &key,
                                    btrfs_inode_type(inode), index);
-       if (ret == -EEXIST)
+       if (ret == -EEXIST || ret == -EOVERFLOW)
                goto fail_dir_item;
        else if (ret) {
                btrfs_abort_transaction(trans, root, ret);
@@ -5020,6 +5092,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        if (err)
                goto out_unlock;
 
+       err = btrfs_update_inode(trans, root, inode);
+       if (err)
+               goto out_unlock;
+
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -5084,6 +5160,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        inode_inc_iversion(inode);
        inode->i_ctime = CURRENT_TIME;
        ihold(inode);
+       set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
@@ -5359,6 +5436,7 @@ again:
                if (start + len <= found_key.offset)
                        goto not_found;
                em->start = start;
+               em->orig_start = start;
                em->len = found_key.offset - start;
                goto not_found_em;
        }
@@ -5369,6 +5447,8 @@ again:
                em->len = extent_end - extent_start;
                em->orig_start = extent_start -
                                 btrfs_file_extent_offset(leaf, item);
+               em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
+                                                                     item);
                bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
                if (bytenr == 0) {
                        em->block_start = EXTENT_MAP_HOLE;
@@ -5378,8 +5458,7 @@ again:
                        set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        em->compress_type = compress_type;
                        em->block_start = bytenr;
-                       em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
-                                                                        item);
+                       em->block_len = em->orig_block_len;
                } else {
                        bytenr += btrfs_file_extent_offset(leaf, item);
                        em->block_start = bytenr;
@@ -5409,7 +5488,8 @@ again:
                em->start = extent_start + extent_offset;
                em->len = (copy_size + root->sectorsize - 1) &
                        ~((u64)root->sectorsize - 1);
-               em->orig_start = EXTENT_MAP_INLINE;
+               em->orig_block_len = em->len;
+               em->orig_start = em->start;
                if (compress_type) {
                        set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        em->compress_type = compress_type;
@@ -5462,6 +5542,7 @@ again:
        }
 not_found:
        em->start = start;
+       em->orig_start = start;
        em->len = len;
 not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
@@ -5557,10 +5638,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                return em;
        if (em) {
                /*
-                * if our em maps to a hole, there might
-                * actually be delalloc bytes behind it
+                * if our em maps to
+                * -  a hole or
+                * -  a pre-alloc extent,
+                * there might actually be delalloc bytes behind it.
                 */
-               if (em->block_start != EXTENT_MAP_HOLE)
+               if (em->block_start != EXTENT_MAP_HOLE &&
+                   !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        return em;
                else
                        hole_em = em;
@@ -5642,6 +5726,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                         */
                        em->block_start = hole_em->block_start;
                        em->block_len = hole_len;
+                       if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+                               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                } else {
                        em->start = range_start;
                        em->len = found;
@@ -5663,38 +5749,19 @@ out:
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
-                                                 struct extent_map *em,
                                                  u64 start, u64 len)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
-       bool insert = false;
-
-       /*
-        * Ok if the extent map we looked up is a hole and is for the exact
-        * range we want, there is no reason to allocate a new one, however if
-        * it is not right then we need to free this one and drop the cache for
-        * our range.
-        */
-       if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
-           em->len != len) {
-               free_extent_map(em);
-               em = NULL;
-               insert = true;
-               btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
-       }
 
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return ERR_CAST(trans);
 
-       if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
-               btrfs_add_inode_defrag(trans, inode);
-
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5705,37 +5772,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                goto out;
        }
 
-       if (!em) {
-               em = alloc_extent_map();
-               if (!em) {
-                       em = ERR_PTR(-ENOMEM);
-                       goto out;
-               }
-       }
-
-       em->start = start;
-       em->orig_start = em->start;
-       em->len = ins.offset;
-
-       em->block_start = ins.objectid;
-       em->block_len = ins.offset;
-       em->bdev = root->fs_info->fs_devices->latest_bdev;
-
-       /*
-        * We need to do this because if we're using the original em we searched
-        * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
-        */
-       em->flags = 0;
-       set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
-       while (insert) {
-               write_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               write_unlock(&em_tree->lock);
-               if (ret != -EEXIST)
-                       break;
-               btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
-       }
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, 0);
+       if (IS_ERR(em))
+               goto out;
 
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
@@ -5912,7 +5952,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
                                           u64 len, u64 orig_start,
                                           u64 block_start, u64 block_len,
-                                          int type)
+                                          u64 orig_block_len, int type)
 {
        struct extent_map_tree *em_tree;
        struct extent_map *em;
@@ -5930,15 +5970,20 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
        em->block_len = block_len;
        em->block_start = block_start;
        em->bdev = root->fs_info->fs_devices->latest_bdev;
+       em->orig_block_len = orig_block_len;
+       em->generation = -1;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
        if (type == BTRFS_ORDERED_PREALLOC)
-               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+               set_bit(EXTENT_FLAG_FILLING, &em->flags);
 
        do {
                btrfs_drop_extent_cache(inode, em->start,
                                em->start + em->len - 1, 0);
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
+               if (!ret)
+                       list_move(&em->list,
+                                 &em_tree->modified_extents);
                write_unlock(&em_tree->lock);
        } while (ret == -EEXIST);
 
@@ -6065,13 +6110,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                        goto must_cow;
 
                if (can_nocow_odirect(trans, inode, start, len) == 1) {
-                       u64 orig_start = em->start;
+                       u64 orig_start = em->orig_start;
+                       u64 orig_block_len = em->orig_block_len;
 
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
                                em = create_pinned_em(inode, start, len,
                                                       orig_start,
-                                                      block_start, len, type);
+                                                      block_start, len,
+                                                      orig_block_len, type);
                                if (IS_ERR(em)) {
                                        btrfs_end_transaction(trans, root);
                                        goto unlock_err;
@@ -6095,7 +6142,8 @@ must_cow:
         * it above
         */
        len = bh_result->b_size;
-       em = btrfs_new_extent_direct(inode, em, start, len);
+       free_extent_map(em);
+       em = btrfs_new_extent_direct(inode, start, len);
        if (IS_ERR(em)) {
                ret = PTR_ERR(em);
                goto unlock_err;
@@ -6336,6 +6384,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
 
+       if (async_submit)
+               async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
        bio_get(bio);
 
        if (!write) {
@@ -6935,11 +6986,9 @@ static int btrfs_truncate(struct inode *inode)
 
        /*
         * 1 for the truncate slack space
-        * 1 for the orphan item we're going to add
-        * 1 for the orphan item deletion
         * 1 for updating the inode.
         */
-       trans = btrfs_start_transaction(root, 4);
+       trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out;
@@ -6950,12 +6999,6 @@ static int btrfs_truncate(struct inode *inode)
                                      min_size);
        BUG_ON(ret);
 
-       ret = btrfs_orphan_add(trans, inode);
-       if (ret) {
-               btrfs_end_transaction(trans, root);
-               goto out;
-       }
-
        /*
         * setattr is responsible for setting the ordered_data_close flag,
         * but that is only tested during the last file release.  That
@@ -7024,12 +7067,6 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
-       } else if (ret && inode->i_nlink > 0) {
-               /*
-                * Failed to do the truncate, remove us from the in memory
-                * orphan list.
-                */
-               ret = btrfs_orphan_del(NULL, inode);
        }
 
        if (trans) {
@@ -7116,6 +7153,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        ei->io_tree.track_uptodate = 1;
        ei->io_failure_tree.track_uptodate = 1;
+       atomic_set(&ei->sync_writers, 0);
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7340,6 +7378,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
            new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
+
+
+       /* check for collisions, even if the  name isn't there */
+       ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+                            new_dentry->d_name.name,
+                            new_dentry->d_name.len);
+
+       if (ret) {
+               if (ret == -EEXIST) {
+                       /* we shouldn't get
+                        * eexist without a new_inode */
+                       if (!new_inode) {
+                               WARN_ON(1);
+                               return ret;
+                       }
+               } else {
+                       /* maybe -EOVERFLOW */
+                       return ret;
+               }
+       }
+       ret = 0;
+
        /*
         * we're using rename to replace one file with another.
         * and the replacement file is large.  Start IO on it now so
@@ -7528,41 +7588,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
  */
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 {
-       struct list_head *head = &root->fs_info->delalloc_inodes;
        struct btrfs_inode *binode;
        struct inode *inode;
        struct btrfs_delalloc_work *work, *next;
        struct list_head works;
+       struct list_head splice;
        int ret = 0;
 
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
 
        INIT_LIST_HEAD(&works);
-
+       INIT_LIST_HEAD(&splice);
+again:
        spin_lock(&root->fs_info->delalloc_lock);
-       while (!list_empty(head)) {
-               binode = list_entry(head->next, struct btrfs_inode,
+       list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+       while (!list_empty(&splice)) {
+               binode = list_entry(splice.next, struct btrfs_inode,
                                    delalloc_inodes);
+
+               list_del_init(&binode->delalloc_inodes);
+
                inode = igrab(&binode->vfs_inode);
                if (!inode)
-                       list_del_init(&binode->delalloc_inodes);
+                       continue;
+
+               list_add_tail(&binode->delalloc_inodes,
+                             &root->fs_info->delalloc_inodes);
                spin_unlock(&root->fs_info->delalloc_lock);
-               if (inode) {
-                       work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
-                       if (!work) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-                       list_add_tail(&work->list, &works);
-                       btrfs_queue_worker(&root->fs_info->flush_workers,
-                                          &work->work);
+
+               work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+               if (unlikely(!work)) {
+                       ret = -ENOMEM;
+                       goto out;
                }
+               list_add_tail(&work->list, &works);
+               btrfs_queue_worker(&root->fs_info->flush_workers,
+                                  &work->work);
+
                cond_resched();
                spin_lock(&root->fs_info->delalloc_lock);
        }
        spin_unlock(&root->fs_info->delalloc_lock);
 
+       list_for_each_entry_safe(work, next, &works, list) {
+               list_del_init(&work->list);
+               btrfs_wait_and_free_delalloc_work(work);
+       }
+
+       spin_lock(&root->fs_info->delalloc_lock);
+       if (!list_empty(&root->fs_info->delalloc_inodes)) {
+               spin_unlock(&root->fs_info->delalloc_lock);
+               goto again;
+       }
+       spin_unlock(&root->fs_info->delalloc_lock);
+
        /* the filemap_flush will queue IO into the worker threads, but
         * we have to make sure the IO is actually started and that
         * ordered extents get created before we return
@@ -7575,11 +7655,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
                    atomic_read(&root->fs_info->async_delalloc_pages) == 0));
        }
        atomic_dec(&root->fs_info->async_submit_draining);
+       return 0;
 out:
        list_for_each_entry_safe(work, next, &works, list) {
                list_del_init(&work->list);
                btrfs_wait_and_free_delalloc_work(work);
        }
+
+       if (!list_empty_careful(&splice)) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
        return ret;
 }
 
@@ -7765,6 +7852,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                em->len = ins.offset;
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
+               em->orig_block_len = ins.offset;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
                set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                em->generation = trans->transid;
This page took 0.039462 seconds and 5 git commands to generate.