Btrfs: log changed inodes based on the extent map tree
[deliverable/linux.git] / fs / btrfs / file.c
index 5caf285c6e4d0f1cb7adf82d8af911ab614a807a..c56088ece50041487cc9c318f515da912196b2b4 100644 (file)
@@ -39,7 +39,9 @@
 #include "tree-log.h"
 #include "locking.h"
 #include "compat.h"
+#include "volumes.h"
 
+static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
  * when auto defrag is enabled we
  * queue up these defrag structs to remember which
@@ -89,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
  * If an existing record is found the defrag item you
  * pass in is freed
  */
-static void __btrfs_add_inode_defrag(struct inode *inode,
+static int __btrfs_add_inode_defrag(struct inode *inode,
                                    struct inode_defrag *defrag)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -117,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
                                entry->transid = defrag->transid;
                        if (defrag->last_offset > entry->last_offset)
                                entry->last_offset = defrag->last_offset;
-                       goto exists;
+                       return -EEXIST;
                }
        }
        set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
        rb_link_node(&defrag->rb_node, parent, p);
        rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
-       return;
+       return 0;
+}
 
-exists:
-       kfree(defrag);
-       return;
+static inline int __need_auto_defrag(struct btrfs_root *root)
+{
+       if (!btrfs_test_opt(root, AUTO_DEFRAG))
+               return 0;
+
+       if (btrfs_fs_closing(root->fs_info))
+               return 0;
 
+       return 1;
 }
 
 /*
@@ -141,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct inode_defrag *defrag;
        u64 transid;
+       int ret;
 
-       if (!btrfs_test_opt(root, AUTO_DEFRAG))
-               return 0;
-
-       if (btrfs_fs_closing(root->fs_info))
+       if (!__need_auto_defrag(root))
                return 0;
 
        if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
@@ -156,7 +162,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
        else
                transid = BTRFS_I(inode)->root->last_trans;
 
-       defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
+       defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
        if (!defrag)
                return -ENOMEM;
 
@@ -165,20 +171,56 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
        defrag->root = root->root_key.objectid;
 
        spin_lock(&root->fs_info->defrag_inodes_lock);
-       if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
-               __btrfs_add_inode_defrag(inode, defrag);
-       else
-               kfree(defrag);
+       if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
+               /*
+                * If we set IN_DEFRAG flag and evict the inode from memory,
+                * and then re-read this inode, this new inode doesn't have
+                * IN_DEFRAG flag. At the case, we may find the existed defrag.
+                */
+               ret = __btrfs_add_inode_defrag(inode, defrag);
+               if (ret)
+                       kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+       } else {
+               kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+       }
        spin_unlock(&root->fs_info->defrag_inodes_lock);
        return 0;
 }
 
 /*
- * must be called with the defrag_inodes lock held
+ * Requeue the defrag object. If there is a defrag object that points to
+ * the same inode in the tree, we will merge them together (by
+ * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
+ */
+void btrfs_requeue_inode_defrag(struct inode *inode,
+                               struct inode_defrag *defrag)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       int ret;
+
+       if (!__need_auto_defrag(root))
+               goto out;
+
+       /*
+        * Here we don't check the IN_DEFRAG flag, because we need merge
+        * them together.
+        */
+       spin_lock(&root->fs_info->defrag_inodes_lock);
+       ret = __btrfs_add_inode_defrag(inode, defrag);
+       spin_unlock(&root->fs_info->defrag_inodes_lock);
+       if (ret)
+               goto out;
+       return;
+out:
+       kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+}
+
+/*
+ * pick the defragable inode that we want, if it doesn't exist, we will get
+ * the next one.
  */
-struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
-                                            u64 root, u64 ino,
-                                            struct rb_node **next)
+static struct inode_defrag *
+btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
 {
        struct inode_defrag *entry = NULL;
        struct inode_defrag tmp;
@@ -189,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
        tmp.ino = ino;
        tmp.root = root;
 
-       p = info->defrag_inodes.rb_node;
+       spin_lock(&fs_info->defrag_inodes_lock);
+       p = fs_info->defrag_inodes.rb_node;
        while (p) {
                parent = p;
                entry = rb_entry(parent, struct inode_defrag, rb_node);
@@ -200,52 +243,131 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
                else if (ret > 0)
                        p = parent->rb_right;
                else
-                       return entry;
+                       goto out;
        }
 
-       if (next) {
-               while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
-                       parent = rb_next(parent);
+       if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
+               parent = rb_next(parent);
+               if (parent)
                        entry = rb_entry(parent, struct inode_defrag, rb_node);
-               }
-               *next = parent;
+               else
+                       entry = NULL;
        }
-       return NULL;
+out:
+       if (entry)
+               rb_erase(parent, &fs_info->defrag_inodes);
+       spin_unlock(&fs_info->defrag_inodes_lock);
+       return entry;
 }
 
-/*
- * run through the list of inodes in the FS that need
- * defragging
- */
-int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
+void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
 {
        struct inode_defrag *defrag;
+       struct rb_node *node;
+
+       spin_lock(&fs_info->defrag_inodes_lock);
+       node = rb_first(&fs_info->defrag_inodes);
+       while (node) {
+               rb_erase(node, &fs_info->defrag_inodes);
+               defrag = rb_entry(node, struct inode_defrag, rb_node);
+               kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+
+               if (need_resched()) {
+                       spin_unlock(&fs_info->defrag_inodes_lock);
+                       cond_resched();
+                       spin_lock(&fs_info->defrag_inodes_lock);
+               }
+
+               node = rb_first(&fs_info->defrag_inodes);
+       }
+       spin_unlock(&fs_info->defrag_inodes_lock);
+}
+
+#define BTRFS_DEFRAG_BATCH     1024
+
+static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
+                                   struct inode_defrag *defrag)
+{
        struct btrfs_root *inode_root;
        struct inode *inode;
-       struct rb_node *n;
        struct btrfs_key key;
        struct btrfs_ioctl_defrag_range_args range;
-       u64 first_ino = 0;
-       u64 root_objectid = 0;
        int num_defrag;
-       int defrag_batch = 1024;
 
+       /* get the inode */
+       key.objectid = defrag->root;
+       btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+       key.offset = (u64)-1;
+       inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(inode_root)) {
+               kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+               return PTR_ERR(inode_root);
+       }
+
+       key.objectid = defrag->ino;
+       btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+       key.offset = 0;
+       inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
+       if (IS_ERR(inode)) {
+               kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+               return PTR_ERR(inode);
+       }
+
+       /* do a chunk of defrag */
+       clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
        memset(&range, 0, sizeof(range));
        range.len = (u64)-1;
+       range.start = defrag->last_offset;
+
+       sb_start_write(fs_info->sb);
+       num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+                                      BTRFS_DEFRAG_BATCH);
+       sb_end_write(fs_info->sb);
+       /*
+        * if we filled the whole defrag batch, there
+        * must be more work to do.  Queue this defrag
+        * again
+        */
+       if (num_defrag == BTRFS_DEFRAG_BATCH) {
+               defrag->last_offset = range.start;
+               btrfs_requeue_inode_defrag(inode, defrag);
+       } else if (defrag->last_offset && !defrag->cycled) {
+               /*
+                * we didn't fill our defrag batch, but
+                * we didn't start at zero.  Make sure we loop
+                * around to the start of the file.
+                */
+               defrag->last_offset = 0;
+               defrag->cycled = 1;
+               btrfs_requeue_inode_defrag(inode, defrag);
+       } else {
+               kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+       }
+
+       iput(inode);
+       return 0;
+}
+
+/*
+ * run through the list of inodes in the FS that need
+ * defragging
+ */
+int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
+{
+       struct inode_defrag *defrag;
+       u64 first_ino = 0;
+       u64 root_objectid = 0;
 
        atomic_inc(&fs_info->defrag_running);
-       spin_lock(&fs_info->defrag_inodes_lock);
        while(1) {
-               n = NULL;
+               if (!__need_auto_defrag(fs_info->tree_root))
+                       break;
 
                /* find an inode to defrag */
-               defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
-                                                first_ino, &n);
+               defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
+                                                first_ino);
                if (!defrag) {
-                       if (n) {
-                               defrag = rb_entry(n, struct inode_defrag,
-                                                 rb_node);
-                       } else if (root_objectid || first_ino) {
+                       if (root_objectid || first_ino) {
                                root_objectid = 0;
                                first_ino = 0;
                                continue;
@@ -254,70 +376,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
                        }
                }
 
-               /* remove it from the rbtree */
                first_ino = defrag->ino + 1;
                root_objectid = defrag->root;
-               rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
-
-               if (btrfs_fs_closing(fs_info))
-                       goto next_free;
-
-               spin_unlock(&fs_info->defrag_inodes_lock);
-
-               /* get the inode */
-               key.objectid = defrag->root;
-               btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
-               key.offset = (u64)-1;
-               inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
-               if (IS_ERR(inode_root))
-                       goto next;
-
-               key.objectid = defrag->ino;
-               btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
-               key.offset = 0;
 
-               inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
-               if (IS_ERR(inode))
-                       goto next;
-
-               /* do a chunk of defrag */
-               clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
-               range.start = defrag->last_offset;
-               num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
-                                              defrag_batch);
-               /*
-                * if we filled the whole defrag batch, there
-                * must be more work to do.  Queue this defrag
-                * again
-                */
-               if (num_defrag == defrag_batch) {
-                       defrag->last_offset = range.start;
-                       __btrfs_add_inode_defrag(inode, defrag);
-                       /*
-                        * we don't want to kfree defrag, we added it back to
-                        * the rbtree
-                        */
-                       defrag = NULL;
-               } else if (defrag->last_offset && !defrag->cycled) {
-                       /*
-                        * we didn't fill our defrag batch, but
-                        * we didn't start at zero.  Make sure we loop
-                        * around to the start of the file.
-                        */
-                       defrag->last_offset = 0;
-                       defrag->cycled = 1;
-                       __btrfs_add_inode_defrag(inode, defrag);
-                       defrag = NULL;
-               }
-
-               iput(inode);
-next:
-               spin_lock(&fs_info->defrag_inodes_lock);
-next_free:
-               kfree(defrag);
+               __btrfs_run_defrag_inode(fs_info, defrag);
        }
-       spin_unlock(&fs_info->defrag_inodes_lock);
-
        atomic_dec(&fs_info->defrag_running);
 
        /*
@@ -458,14 +521,15 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
  * this drops all the extents in the cache that intersect the range
  * [start, end].  Existing extents are split as required.
  */
-int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
-                           int skip_pinned)
+void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+                            int skip_pinned)
 {
        struct extent_map *em;
        struct extent_map *split = NULL;
        struct extent_map *split2 = NULL;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 len = end - start + 1;
+       u64 gen;
        int ret;
        int testend = 1;
        unsigned long flags;
@@ -477,11 +541,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                testend = 0;
        }
        while (1) {
+               int no_splits = 0;
+
                if (!split)
                        split = alloc_extent_map();
                if (!split2)
                        split2 = alloc_extent_map();
-               BUG_ON(!split || !split2); /* -ENOMEM */
+               if (!split || !split2)
+                       no_splits = 1;
 
                write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, len);
@@ -490,6 +557,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        break;
                }
                flags = em->flags;
+               gen = em->generation;
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
                        if (testend && em->start + em->len >= start + len) {
                                free_extent_map(em);
@@ -506,6 +574,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                clear_bit(EXTENT_FLAG_PINNED, &em->flags);
                remove_extent_mapping(em_tree, em);
+               if (no_splits)
+                       goto next;
 
                if (em->block_start < EXTENT_MAP_LAST_BYTE &&
                    em->start < start) {
@@ -518,12 +588,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                                split->block_len = em->block_len;
                        else
                                split->block_len = split->len;
-
+                       split->orig_block_len = max(split->block_len,
+                                                   em->orig_block_len);
+                       split->generation = gen;
                        split->bdev = em->bdev;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
                        ret = add_extent_mapping(em_tree, split);
                        BUG_ON(ret); /* Logic error */
+                       list_move(&split->list, &em_tree->modified_extents);
                        free_extent_map(split);
                        split = split2;
                        split2 = NULL;
@@ -537,6 +610,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        split->bdev = em->bdev;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
+                       split->generation = gen;
+                       split->orig_block_len = max(em->block_len,
+                                                   em->orig_block_len);
 
                        if (compressed) {
                                split->block_len = em->block_len;
@@ -545,14 +621,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        } else {
                                split->block_len = split->len;
                                split->block_start = em->block_start + diff;
-                               split->orig_start = split->start;
+                               split->orig_start = em->orig_start;
                        }
 
                        ret = add_extent_mapping(em_tree, split);
                        BUG_ON(ret); /* Logic error */
+                       list_move(&split->list, &em_tree->modified_extents);
                        free_extent_map(split);
                        split = NULL;
                }
+next:
                write_unlock(&em_tree->lock);
 
                /* once for us */
@@ -564,7 +642,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                free_extent_map(split);
        if (split2)
                free_extent_map(split2);
-       return 0;
 }
 
 /*
@@ -576,13 +653,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
  * it is either truncated or split.  Anything entirely inside the range
  * is deleted from the tree.
  */
-int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
-                      u64 start, u64 end, u64 *hint_byte, int drop_cache)
+int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root, struct inode *inode,
+                        struct btrfs_path *path, u64 start, u64 end,
+                        u64 *drop_end, int drop_cache)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_buffer *leaf;
        struct btrfs_file_extent_item *fi;
-       struct btrfs_path *path;
        struct btrfs_key key;
        struct btrfs_key new_key;
        u64 ino = btrfs_ino(inode);
@@ -597,14 +674,12 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
        int recow;
        int ret;
        int modify_tree = -1;
+       int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
+       int found = 0;
 
        if (drop_cache)
                btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
        if (start >= BTRFS_I(inode)->disk_i_size)
                modify_tree = 0;
 
@@ -666,6 +741,7 @@ next_slot:
                        goto next_slot;
                }
 
+               found = 1;
                search_start = max(key.offset, start);
                if (recow || !modify_tree) {
                        modify_tree = -1;
@@ -707,14 +783,13 @@ next_slot:
                                                        extent_end - start);
                        btrfs_mark_buffer_dirty(leaf);
 
-                       if (disk_bytenr > 0) {
+                       if (update_refs && disk_bytenr > 0) {
                                ret = btrfs_inc_extent_ref(trans, root,
                                                disk_bytenr, num_bytes, 0,
                                                root->root_key.objectid,
                                                new_key.objectid,
                                                start - extent_offset, 0);
                                BUG_ON(ret); /* -ENOMEM */
-                               *hint_byte = disk_bytenr;
                        }
                        key.offset = start;
                }
@@ -734,10 +809,8 @@ next_slot:
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        extent_end - end);
                        btrfs_mark_buffer_dirty(leaf);
-                       if (disk_bytenr > 0) {
+                       if (update_refs && disk_bytenr > 0)
                                inode_sub_bytes(inode, end - key.offset);
-                               *hint_byte = disk_bytenr;
-                       }
                        break;
                }
 
@@ -753,10 +826,8 @@ next_slot:
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        start - key.offset);
                        btrfs_mark_buffer_dirty(leaf);
-                       if (disk_bytenr > 0) {
+                       if (update_refs && disk_bytenr > 0)
                                inode_sub_bytes(inode, extent_end - start);
-                               *hint_byte = disk_bytenr;
-                       }
                        if (end == extent_end)
                                break;
 
@@ -777,12 +848,13 @@ next_slot:
                                del_nr++;
                        }
 
-                       if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+                       if (update_refs &&
+                           extent_type == BTRFS_FILE_EXTENT_INLINE) {
                                inode_sub_bytes(inode,
                                                extent_end - key.offset);
                                extent_end = ALIGN(extent_end,
                                                   root->sectorsize);
-                       } else if (disk_bytenr > 0) {
+                       } else if (update_refs && disk_bytenr > 0) {
                                ret = btrfs_free_extent(trans, root,
                                                disk_bytenr, num_bytes, 0,
                                                root->root_key.objectid,
@@ -791,7 +863,6 @@ next_slot:
                                BUG_ON(ret); /* -ENOMEM */
                                inode_sub_bytes(inode,
                                                extent_end - key.offset);
-                               *hint_byte = disk_bytenr;
                        }
 
                        if (end == extent_end)
@@ -806,7 +877,7 @@ next_slot:
                                              del_nr);
                        if (ret) {
                                btrfs_abort_transaction(trans, root, ret);
-                               goto out;
+                               break;
                        }
 
                        del_nr = 0;
@@ -825,7 +896,24 @@ next_slot:
                        btrfs_abort_transaction(trans, root, ret);
        }
 
-out:
+       if (drop_end)
+               *drop_end = found ? min(end, extent_end) : end;
+       btrfs_release_path(path);
+       return ret;
+}
+
+int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root, struct inode *inode, u64 start,
+                      u64 end, int drop_cache)
+{
+       struct btrfs_path *path;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+       ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
+                                  drop_cache);
        btrfs_free_path(path);
        return ret;
 }
@@ -892,8 +980,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
        int ret;
        u64 ino = btrfs_ino(inode);
 
-       btrfs_drop_extent_cache(inode, start, end - 1, 0);
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -935,12 +1021,16 @@ again:
                        btrfs_set_item_key_safe(trans, root, path, &new_key);
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
+                       btrfs_set_file_extent_generation(leaf, fi,
+                                                        trans->transid);
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        extent_end - end);
                        btrfs_set_file_extent_offset(leaf, fi,
                                                     end - orig_offset);
                        fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
                                            struct btrfs_file_extent_item);
+                       btrfs_set_file_extent_generation(leaf, fi,
+                                                        trans->transid);
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        end - other_start);
                        btrfs_mark_buffer_dirty(leaf);
@@ -958,12 +1048,16 @@ again:
                                            struct btrfs_file_extent_item);
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        start - key.offset);
+                       btrfs_set_file_extent_generation(leaf, fi,
+                                                        trans->transid);
                        path->slots[0]++;
                        new_key.offset = start;
                        btrfs_set_item_key_safe(trans, root, path, &new_key);
 
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
+                       btrfs_set_file_extent_generation(leaf, fi,
+                                                        trans->transid);
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        other_end - start);
                        btrfs_set_file_extent_offset(leaf, fi,
@@ -991,12 +1085,14 @@ again:
                leaf = path->nodes[0];
                fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
                                    struct btrfs_file_extent_item);
+               btrfs_set_file_extent_generation(leaf, fi, trans->transid);
                btrfs_set_file_extent_num_bytes(leaf, fi,
                                                split - key.offset);
 
                fi = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_file_extent_item);
 
+               btrfs_set_file_extent_generation(leaf, fi, trans->transid);
                btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
                btrfs_set_file_extent_num_bytes(leaf, fi,
                                                extent_end - split);
@@ -1056,12 +1152,14 @@ again:
                           struct btrfs_file_extent_item);
                btrfs_set_file_extent_type(leaf, fi,
                                           BTRFS_FILE_EXTENT_REG);
+               btrfs_set_file_extent_generation(leaf, fi, trans->transid);
                btrfs_mark_buffer_dirty(leaf);
        } else {
                fi = btrfs_item_ptr(leaf, del_slot - 1,
                           struct btrfs_file_extent_item);
                btrfs_set_file_extent_type(leaf, fi,
                                           BTRFS_FILE_EXTENT_REG);
+               btrfs_set_file_extent_generation(leaf, fi, trans->transid);
                btrfs_set_file_extent_num_bytes(leaf, fi,
                                                extent_end - key.offset);
                btrfs_mark_buffer_dirty(leaf);
@@ -1173,8 +1271,8 @@ again:
 
                clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
                                  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
-                                 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
-                                 GFP_NOFS);
+                                 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+                                 0, 0, &cached_state, GFP_NOFS);
                unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                     start_pos, last_pos - 1, &cached_state,
                                     GFP_NOFS);
@@ -1317,7 +1415,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                balance_dirty_pages_ratelimited_nr(inode->i_mapping,
                                                   dirty_pages);
                if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-                       btrfs_btree_balance_dirty(root, 1);
+                       btrfs_btree_balance_dirty(root);
 
                pos += copied;
                num_written += copied;
@@ -1378,6 +1476,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        ssize_t num_written = 0;
        ssize_t err = 0;
        size_t count, ocount;
+       bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 
        sb_start_write(inode->i_sb);
 
@@ -1435,6 +1534,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                }
        }
 
+       if (sync)
+               atomic_inc(&BTRFS_I(inode)->sync_writers);
+
        if (unlikely(file->f_flags & O_DIRECT)) {
                num_written = __btrfs_direct_write(iocb, iov, nr_segs,
                                                   pos, ppos, count, ocount);
@@ -1469,6 +1571,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                        num_written = err;
        }
 out:
+       if (sync)
+               atomic_dec(&BTRFS_I(inode)->sync_writers);
        sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
@@ -1514,16 +1618,26 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
        trace_btrfs_sync_file(file, datasync);
 
+       /*
+        * We write the dirty pages in the range and wait until they complete
+        * out of the ->i_mutex. If so, we can flush the dirty pages by
+        * multi-task, and make the performance up.
+        */
+       atomic_inc(&BTRFS_I(inode)->sync_writers);
+       ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+       atomic_dec(&BTRFS_I(inode)->sync_writers);
+       if (ret)
+               return ret;
+
        mutex_lock(&inode->i_mutex);
 
        /*
-        * we wait first, since the writeback may change the inode, also wait
-        * ordered range does a filemape_write_and_wait_range which is why we
-        * don't do it above like other file systems.
+        * We flush the dirty pages again to avoid some dirty pages in the
+        * range being left.
         */
-       root->log_batch++;
-       btrfs_wait_ordered_range(inode, start, end);
-       root->log_batch++;
+       atomic_inc(&root->log_batch);
+       btrfs_wait_ordered_range(inode, start, end - start + 1);
+       atomic_inc(&root->log_batch);
 
        /*
         * check the transaction that last modified this inode
@@ -1544,6 +1658,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
            BTRFS_I(inode)->last_trans <=
            root->fs_info->last_trans_committed) {
                BTRFS_I(inode)->last_trans = 0;
+
+               /*
+                * We'v had everything committed since the last time we were
+                * modified so clear this flag in case it was set for whatever
+                * reason, it's no longer relevant.
+                */
+               clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                         &BTRFS_I(inode)->runtime_flags);
                mutex_unlock(&inode->i_mutex);
                goto out;
        }
@@ -1599,6 +1721,7 @@ out:
 static const struct vm_operations_struct btrfs_file_vm_ops = {
        .fault          = filemap_fault,
        .page_mkwrite   = btrfs_page_mkwrite,
+       .remap_pages    = generic_file_remap_pages,
 };
 
 static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1610,11 +1733,333 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
 
        file_accessed(filp);
        vma->vm_ops = &btrfs_file_vm_ops;
-       vma->vm_flags |= VM_CAN_NONLINEAR;
 
        return 0;
 }
 
+static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
+                         int slot, u64 start, u64 end)
+{
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
+
+       if (slot < 0 || slot >= btrfs_header_nritems(leaf))
+               return 0;
+
+       btrfs_item_key_to_cpu(leaf, &key, slot);
+       if (key.objectid != btrfs_ino(inode) ||
+           key.type != BTRFS_EXTENT_DATA_KEY)
+               return 0;
+
+       fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+       if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+               return 0;
+
+       if (btrfs_file_extent_disk_bytenr(leaf, fi))
+               return 0;
+
+       if (key.offset == end)
+               return 1;
+       if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
+               return 1;
+       return 0;
+}
+
+static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
+                     struct btrfs_path *path, u64 offset, u64 end)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_buffer *leaf;
+       struct btrfs_file_extent_item *fi;
+       struct extent_map *hole_em;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct btrfs_key key;
+       int ret;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = offset;
+
+
+       ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+       if (ret < 0)
+               return ret;
+       BUG_ON(!ret);
+
+       leaf = path->nodes[0];
+       if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
+               u64 num_bytes;
+
+               path->slots[0]--;
+               fi = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_file_extent_item);
+               num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
+                       end - offset;
+               btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_offset(leaf, fi, 0);
+               btrfs_mark_buffer_dirty(leaf);
+               goto out;
+       }
+
+       if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
+               u64 num_bytes;
+
+               path->slots[0]++;
+               key.offset = offset;
+               btrfs_set_item_key_safe(trans, root, path, &key);
+               fi = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_file_extent_item);
+               num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
+                       offset;
+               btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_offset(leaf, fi, 0);
+               btrfs_mark_buffer_dirty(leaf);
+               goto out;
+       }
+       btrfs_release_path(path);
+
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+                                      0, 0, end - offset, 0, end - offset,
+                                      0, 0, 0);
+       if (ret)
+               return ret;
+
+out:
+       btrfs_release_path(path);
+
+       hole_em = alloc_extent_map();
+       if (!hole_em) {
+               btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+               set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                       &BTRFS_I(inode)->runtime_flags);
+       } else {
+               hole_em->start = offset;
+               hole_em->len = end - offset;
+               hole_em->orig_start = offset;
+
+               hole_em->block_start = EXTENT_MAP_HOLE;
+               hole_em->block_len = 0;
+               hole_em->orig_block_len = 0;
+               hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
+               hole_em->compress_type = BTRFS_COMPRESS_NONE;
+               hole_em->generation = trans->transid;
+
+               do {
+                       btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+                       write_lock(&em_tree->lock);
+                       ret = add_extent_mapping(em_tree, hole_em);
+                       if (!ret)
+                               list_move(&hole_em->list,
+                                         &em_tree->modified_extents);
+                       write_unlock(&em_tree->lock);
+               } while (ret == -EEXIST);
+               free_extent_map(hole_em);
+               if (ret)
+                       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                               &BTRFS_I(inode)->runtime_flags);
+       }
+
+       return 0;
+}
+
+static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_state *cached_state = NULL;
+       struct btrfs_path *path;
+       struct btrfs_block_rsv *rsv;
+       struct btrfs_trans_handle *trans;
+       u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
+       u64 lockend = round_down(offset + len,
+                                BTRFS_I(inode)->root->sectorsize) - 1;
+       u64 cur_offset = lockstart;
+       u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
+       u64 drop_end;
+       int ret = 0;
+       int err = 0;
+       bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
+                         ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+
+       btrfs_wait_ordered_range(inode, offset, len);
+
+       mutex_lock(&inode->i_mutex);
+       /*
+        * We needn't truncate any page which is beyond the end of the file
+        * because we are sure there is no data there.
+        */
+       /*
+        * Only do this if we are in the same page and we aren't doing the
+        * entire page.
+        */
+       if (same_page && len < PAGE_CACHE_SIZE) {
+               if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE))
+                       ret = btrfs_truncate_page(inode, offset, len, 0);
+               mutex_unlock(&inode->i_mutex);
+               return ret;
+       }
+
+       /* zero back part of the first page */
+       if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
+               ret = btrfs_truncate_page(inode, offset, 0, 0);
+               if (ret) {
+                       mutex_unlock(&inode->i_mutex);
+                       return ret;
+               }
+       }
+
+       /* zero the front end of the last page */
+       if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
+               ret = btrfs_truncate_page(inode, offset + len, 0, 1);
+               if (ret) {
+                       mutex_unlock(&inode->i_mutex);
+                       return ret;
+               }
+       }
+
+       if (lockend < lockstart) {
+               mutex_unlock(&inode->i_mutex);
+               return 0;
+       }
+
+       while (1) {
+               struct btrfs_ordered_extent *ordered;
+
+               truncate_pagecache_range(inode, lockstart, lockend);
+
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                0, &cached_state);
+               ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
+
+               /*
+                * We need to make sure we have no ordered extents in this range
+                * and nobody raced in and read a page in this range, if we did
+                * we need to try again.
+                */
+               if ((!ordered ||
+                   (ordered->file_offset + ordered->len < lockstart ||
+                    ordered->file_offset > lockend)) &&
+                    !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockend, EXTENT_UPTODATE, 0,
+                                    cached_state)) {
+                       if (ordered)
+                               btrfs_put_ordered_extent(ordered);
+                       break;
+               }
+               if (ordered)
+                       btrfs_put_ordered_extent(ordered);
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockend, &cached_state, GFP_NOFS);
+               btrfs_wait_ordered_range(inode, lockstart,
+                                        lockend - lockstart + 1);
+       }
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
+       if (!rsv) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+       rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
+       rsv->failfast = 1;
+
+       /*
+        * 1 - update the inode
+        * 1 - removing the extents in the range
+        * 1 - adding the hole extent
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out_free;
+       }
+
+       ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
+                                     min_size);
+       BUG_ON(ret);
+       trans->block_rsv = rsv;
+
+       while (cur_offset < lockend) {
+               ret = __btrfs_drop_extents(trans, root, inode, path,
+                                          cur_offset, lockend + 1,
+                                          &drop_end, 1);
+               if (ret != -ENOSPC)
+                       break;
+
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
+
+               ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
+
+               cur_offset = drop_end;
+
+               ret = btrfs_update_inode(trans, root, inode);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
+
+               btrfs_end_transaction(trans, root);
+               btrfs_btree_balance_dirty(root);
+
+               trans = btrfs_start_transaction(root, 3);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       trans = NULL;
+                       break;
+               }
+
+               ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
+                                             rsv, min_size);
+               BUG_ON(ret);    /* shouldn't happen */
+               trans->block_rsv = rsv;
+       }
+
+       if (ret) {
+               err = ret;
+               goto out_trans;
+       }
+
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
+       ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+       if (ret) {
+               err = ret;
+               goto out_trans;
+       }
+
+out_trans:
+       if (!trans)
+               goto out_free;
+
+       inode_inc_iversion(inode);
+       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
+       ret = btrfs_update_inode(trans, root, inode);
+       btrfs_end_transaction(trans, root);
+       btrfs_btree_balance_dirty(root);
+out_free:
+       btrfs_free_path(path);
+       btrfs_free_block_rsv(root, rsv);
+out:
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                            &cached_state, GFP_NOFS);
+       mutex_unlock(&inode->i_mutex);
+       if (ret && !err)
+               err = ret;
+       return err;
+}
+
 static long btrfs_fallocate(struct file *file, int mode,
                            loff_t offset, loff_t len)
 {
@@ -1626,22 +2071,25 @@ static long btrfs_fallocate(struct file *file, int mode,
        u64 alloc_end;
        u64 alloc_hint = 0;
        u64 locked_end;
-       u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
        struct extent_map *em;
+       int blocksize = BTRFS_I(inode)->root->sectorsize;
        int ret;
 
-       alloc_start = offset & ~mask;
-       alloc_end =  (offset + len + mask) & ~mask;
+       alloc_start = round_down(offset, blocksize);
+       alloc_end = round_up(offset + len, blocksize);
 
-       /* We only support the FALLOC_FL_KEEP_SIZE mode */
-       if (mode & ~FALLOC_FL_KEEP_SIZE)
+       /* Make sure we aren't being give some crap mode */
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               return btrfs_punch_hole(inode, offset, len);
+
        /*
         * Make sure we have enough space before we do the
         * allocation.
         */
-       ret = btrfs_check_data_free_space(inode, len);
+       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
        if (ret)
                return ret;
 
@@ -1709,7 +2157,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                }
                last_byte = min(extent_map_end(em), alloc_end);
                actual_end = min_t(u64, extent_map_end(em), offset + len);
-               last_byte = (last_byte + mask) & ~mask;
+               last_byte = ALIGN(last_byte, blocksize);
 
                if (em->block_start == EXTENT_MAP_HOLE ||
                    (cur_offset >= inode->i_size &&
@@ -1748,7 +2196,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 out:
        mutex_unlock(&inode->i_mutex);
        /* Let go of our reservation. */
-       btrfs_free_reserved_data_space(inode, len);
+       btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
        return ret;
 }
 
@@ -1924,3 +2372,21 @@ const struct file_operations btrfs_file_operations = {
        .compat_ioctl   = btrfs_ioctl,
 #endif
 };
+
+void btrfs_auto_defrag_exit(void)
+{
+       if (btrfs_inode_defrag_cachep)
+               kmem_cache_destroy(btrfs_inode_defrag_cachep);
+}
+
+int btrfs_auto_defrag_init(void)
+{
+       btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
+                                       sizeof(struct inode_defrag), 0,
+                                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                                       NULL);
+       if (!btrfs_inode_defrag_cachep)
+               return -ENOMEM;
+
+       return 0;
+}
This page took 0.039598 seconds and 5 git commands to generate.