Merge branch 'for-4.5/lightnvm' of git://git.kernel.dk/linux-block

[deliverable/linux.git] / fs / hugetlbfs / inode.c
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index d8f51ee8126b3282156721d2d2639e24d3aa536e..8bbf7f3e2a27e0669e7f7fd5624a2cb5b50ce0fe 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -4,11 +4,11 @@
   * Nadia Yvette Chambers, 2002
   *
   * Copyright (C) 2002 Linus Torvalds.
+ * License: GPL
   */
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
-#include <linux/module.h>
  #include <linux/thread_info.h>
  #include <asm/current.h>
  #include <linux/sched.h>               /* remove ASAP */
@@ -324,11 +324,48 @@ static void remove_huge_page(struct page *page)
         delete_from_page_cache(page);
  }
  
+static void
+hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
+{
+       struct vm_area_struct *vma;
+
+       /*
+        * end == 0 indicates that the entire range after
+        * start should be unmapped.
+        */
+       vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
+               unsigned long v_offset;
+               unsigned long v_end;
+
+               /*
+                * Can the expression below overflow on 32-bit arches?
+                * No, because the interval tree returns us only those vmas
+                * which overlap the truncated area starting at pgoff,
+                * and no vma on a 32-bit arch can span beyond the 4GB.
+                */
+               if (vma->vm_pgoff < start)
+                       v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
+               else
+                       v_offset = 0;
+
+               if (!end)
+                       v_end = vma->vm_end;
+               else {
+                       v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
+                                                       + vma->vm_start;
+                       if (v_end > vma->vm_end)
+                               v_end = vma->vm_end;
+               }
+
+               unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
+                                                                       NULL);
+       }
+}
  
  /*
   * remove_inode_hugepages handles two distinct cases: truncation and hole
   * punch.  There are subtle differences in operation for each case.
-
+ *
   * truncation is indicated by end of range being LLONG_MAX
   *     In this case, we first scan the range and release found pages.
   *     After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
@@ -379,6 +416,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
  
                 for (i = 0; i < pagevec_count(&pvec); ++i) {
                         struct page *page = pvec.pages[i];
+                       bool rsv_on_error;
                         u32 hash;
  
                         /*
@@ -395,37 +433,43 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
                                                         mapping, next, 0);
                         mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
-                       lock_page(page);
-                       if (likely(!page_mapped(page))) {
-                               bool rsv_on_error = !PagePrivate(page);
-                               /*
-                                * We must free the huge page and remove
-                                * from page cache (remove_huge_page) BEFORE
-                                * removing the region/reserve map
-                                * (hugetlb_unreserve_pages).  In rare out
-                                * of memory conditions, removal of the
-                                * region/reserve map could fail.  Before
-                                * free'ing the page, note PagePrivate which
-                                * is used in case of error.
-                                */
-                               remove_huge_page(page);
-                               freed++;
-                               if (!truncate_op) {
-                                       if (unlikely(hugetlb_unreserve_pages(
-                                                       inode, next,
-                                                       next + 1, 1)))
-                                               hugetlb_fix_reserve_counts(
-                                                       inode, rsv_on_error);
-                               }
-                       } else {
-                               /*
-                                * If page is mapped, it was faulted in after
-                                * being unmapped.  It indicates a race between
-                                * hole punch and page fault.  Do nothing in
-                                * this case.  Getting here in a truncate
-                                * operation is a bug.
-                                */
+                       /*
+                        * If page is mapped, it was faulted in after being
+                        * unmapped in caller.  Unmap (again) now after taking
+                        * the fault mutex.  The mutex will prevent faults
+                        * until we finish removing the page.
+                        *
+                        * This race can only happen in the hole punch case.
+                        * Getting here in a truncate operation is a bug.
+                        */
+                       if (unlikely(page_mapped(page))) {
                                 BUG_ON(truncate_op);
+
+                               i_mmap_lock_write(mapping);
+                               hugetlb_vmdelete_list(&mapping->i_mmap,
+                                       next * pages_per_huge_page(h),
+                                       (next + 1) * pages_per_huge_page(h));
+                               i_mmap_unlock_write(mapping);
+                       }
+
+                       lock_page(page);
+                       /*
+                        * We must free the huge page and remove from page
+                        * cache (remove_huge_page) BEFORE removing the
+                        * region/reserve map (hugetlb_unreserve_pages).  In
+                        * rare out of memory conditions, removal of the
+                        * region/reserve map could fail.  Before free'ing
+                        * the page, note PagePrivate which is used in case
+                        * of error.
+                        */
+                       rsv_on_error = !PagePrivate(page);
+                       remove_huge_page(page);
+                       freed++;
+                       if (!truncate_op) {
+                               if (unlikely(hugetlb_unreserve_pages(inode,
+                                                       next, next + 1, 1)))
+                                       hugetlb_fix_reserve_counts(inode,
+                                                               rsv_on_error);
                         }
  
                         unlock_page(page);
@@ -452,41 +496,6 @@ static void hugetlbfs_evict_inode(struct inode *inode)
         clear_inode(inode);
  }
  
-static inline void
-hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
-{
-       struct vm_area_struct *vma;
-
-       /*
-        * end == 0 indicates that the entire range after
-        * start should be unmapped.
-        */
-       vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
-               unsigned long v_offset;
-
-               /*
-                * Can the expression below overflow on 32-bit arches?
-                * No, because the interval tree returns us only those vmas
-                * which overlap the truncated area starting at pgoff,
-                * and no vma on a 32-bit arch can span beyond the 4GB.
-                */
-               if (vma->vm_pgoff < start)
-                       v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
-               else
-                       v_offset = 0;
-
-               if (end) {
-                       end = ((end - start) << PAGE_SHIFT) +
-                              vma->vm_start + v_offset;
-                       if (end > vma->vm_end)
-                               end = vma->vm_end;
-               } else
-                       end = vma->vm_end;
-
-               unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL);
-       }
-}
-
  static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
  {
         pgoff_t pgoff;
@@ -708,7 +717,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
  /*
   * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
   * be taken from reclaim -- unlike regular filesystems. This needs an
- * annotation because huge_pmd_share() does an allocation under
+ * annotation because huge_pmd_share() does an allocation under hugetlb's
   * i_mmap_rwsem.
   */
  static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
@@ -738,7 +747,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                 /*
                  * The policy is initialized here even if we are creating a
                  * private inode because initialization simply creates an
-                * an empty rb tree and calls spin_lock_init(), later when we
+                * an empty rb tree and calls rwlock_init(), later when we
                  * call mpol_free_shared_policy() it will just return because
                  * the rb tree will still be empty.
                  */
@@ -1202,7 +1211,6 @@ static struct file_system_type hugetlbfs_fs_type = {
         .mount          = hugetlbfs_mount,
         .kill_sb        = kill_litter_super,
  };
-MODULE_ALIAS_FS("hugetlbfs");
  
  static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
  
@@ -1322,7 +1330,7 @@ static int __init init_hugetlbfs_fs(void)
         error = -ENOMEM;
         hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
                                         sizeof(struct hugetlbfs_inode_info),
-                                       0, 0, init_once);
+                                       0, SLAB_ACCOUNT, init_once);
         if (hugetlbfs_inode_cachep == NULL)
                 goto out2;
  
@@ -1356,26 +1364,4 @@ static int __init init_hugetlbfs_fs(void)
   out2:
         return error;
  }
-
-static void __exit exit_hugetlbfs_fs(void)
-{
-       struct hstate *h;
-       int i;
-
-
-       /*
-        * Make sure all delayed rcu free inodes are flushed before we
-        * destroy cache.
-        */
-       rcu_barrier();
-       kmem_cache_destroy(hugetlbfs_inode_cachep);
-       i = 0;
-       for_each_hstate(h)
-               kern_unmount(hugetlbfs_vfsmount[i++]);
-       unregister_filesystem(&hugetlbfs_fs_type);
-}
-
-module_init(init_hugetlbfs_fs)
-module_exit(exit_hugetlbfs_fs)
-
-MODULE_LICENSE("GPL");
+fs_initcall(init_hugetlbfs_fs)