Merge branch 'stable/for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel...

[deliverable/linux.git] / fs / hugetlbfs / inode.c
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 47789292a582f84ef7dc98be8fb29285bcd466af..e1f465a389d5be1b27f8fd98451312bb94b454b8 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -141,7 +141,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  
         vma_len = (loff_t)(vma->vm_end - vma->vm_start);
  
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
         file_accessed(file);
  
         ret = -ENOMEM;
@@ -157,7 +157,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
         if (vma->vm_flags & VM_WRITE && inode->i_size < len)
                 inode->i_size = len;
  out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
  
         return ret;
  }
@@ -324,11 +324,48 @@ static void remove_huge_page(struct page *page)
         delete_from_page_cache(page);
  }
  
+static void
+hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
+{
+       struct vm_area_struct *vma;
+
+       /*
+        * end == 0 indicates that the entire range after
+        * start should be unmapped.
+        */
+       vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
+               unsigned long v_offset;
+               unsigned long v_end;
+
+               /*
+                * Can the expression below overflow on 32-bit arches?
+                * No, because the interval tree returns us only those vmas
+                * which overlap the truncated area starting at pgoff,
+                * and no vma on a 32-bit arch can span beyond the 4GB.
+                */
+               if (vma->vm_pgoff < start)
+                       v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
+               else
+                       v_offset = 0;
+
+               if (!end)
+                       v_end = vma->vm_end;
+               else {
+                       v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
+                                                       + vma->vm_start;
+                       if (v_end > vma->vm_end)
+                               v_end = vma->vm_end;
+               }
+
+               unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
+                                                                       NULL);
+       }
+}
  
  /*
   * remove_inode_hugepages handles two distinct cases: truncation and hole
   * punch.  There are subtle differences in operation for each case.
-
+ *
   * truncation is indicated by end of range being LLONG_MAX
   *     In this case, we first scan the range and release found pages.
   *     After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
@@ -379,6 +416,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
  
                 for (i = 0; i < pagevec_count(&pvec); ++i) {
                         struct page *page = pvec.pages[i];
+                       bool rsv_on_error;
                         u32 hash;
  
                         /*
@@ -395,37 +433,43 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
                                                         mapping, next, 0);
                         mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
-                       lock_page(page);
-                       if (likely(!page_mapped(page))) {
-                               bool rsv_on_error = !PagePrivate(page);
-                               /*
-                                * We must free the huge page and remove
-                                * from page cache (remove_huge_page) BEFORE
-                                * removing the region/reserve map
-                                * (hugetlb_unreserve_pages).  In rare out
-                                * of memory conditions, removal of the
-                                * region/reserve map could fail.  Before
-                                * free'ing the page, note PagePrivate which
-                                * is used in case of error.
-                                */
-                               remove_huge_page(page);
-                               freed++;
-                               if (!truncate_op) {
-                                       if (unlikely(hugetlb_unreserve_pages(
-                                                       inode, next,
-                                                       next + 1, 1)))
-                                               hugetlb_fix_reserve_counts(
-                                                       inode, rsv_on_error);
-                               }
-                       } else {
-                               /*
-                                * If page is mapped, it was faulted in after
-                                * being unmapped.  It indicates a race between
-                                * hole punch and page fault.  Do nothing in
-                                * this case.  Getting here in a truncate
-                                * operation is a bug.
-                                */
+                       /*
+                        * If page is mapped, it was faulted in after being
+                        * unmapped in caller.  Unmap (again) now after taking
+                        * the fault mutex.  The mutex will prevent faults
+                        * until we finish removing the page.
+                        *
+                        * This race can only happen in the hole punch case.
+                        * Getting here in a truncate operation is a bug.
+                        */
+                       if (unlikely(page_mapped(page))) {
                                 BUG_ON(truncate_op);
+
+                               i_mmap_lock_write(mapping);
+                               hugetlb_vmdelete_list(&mapping->i_mmap,
+                                       next * pages_per_huge_page(h),
+                                       (next + 1) * pages_per_huge_page(h));
+                               i_mmap_unlock_write(mapping);
+                       }
+
+                       lock_page(page);
+                       /*
+                        * We must free the huge page and remove from page
+                        * cache (remove_huge_page) BEFORE removing the
+                        * region/reserve map (hugetlb_unreserve_pages).  In
+                        * rare out of memory conditions, removal of the
+                        * region/reserve map could fail.  Before free'ing
+                        * the page, note PagePrivate which is used in case
+                        * of error.
+                        */
+                       rsv_on_error = !PagePrivate(page);
+                       remove_huge_page(page);
+                       freed++;
+                       if (!truncate_op) {
+                               if (unlikely(hugetlb_unreserve_pages(inode,
+                                                       next, next + 1, 1)))
+                                       hugetlb_fix_reserve_counts(inode,
+                                                               rsv_on_error);
                         }
  
                         unlock_page(page);
@@ -452,41 +496,6 @@ static void hugetlbfs_evict_inode(struct inode *inode)
         clear_inode(inode);
  }
  
-static inline void
-hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
-{
-       struct vm_area_struct *vma;
-
-       /*
-        * end == 0 indicates that the entire range after
-        * start should be unmapped.
-        */
-       vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
-               unsigned long v_offset;
-
-               /*
-                * Can the expression below overflow on 32-bit arches?
-                * No, because the interval tree returns us only those vmas
-                * which overlap the truncated area starting at pgoff,
-                * and no vma on a 32-bit arch can span beyond the 4GB.
-                */
-               if (vma->vm_pgoff < start)
-                       v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
-               else
-                       v_offset = 0;
-
-               if (end) {
-                       end = ((end - start) << PAGE_SHIFT) +
-                              vma->vm_start + v_offset;
-                       if (end > vma->vm_end)
-                               end = vma->vm_end;
-               } else
-                       end = vma->vm_end;
-
-               unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL);
-       }
-}
-
  static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
  {
         pgoff_t pgoff;
@@ -521,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         if (hole_end > hole_start) {
                 struct address_space *mapping = inode->i_mapping;
  
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                 i_mmap_lock_write(mapping);
                 if (!RB_EMPTY_ROOT(&mapping->i_mmap))
                         hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -529,7 +538,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                                                 hole_end  >> PAGE_SHIFT);
                 i_mmap_unlock_write(mapping);
                 remove_inode_hugepages(inode, hole_start, hole_end);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
         }
  
         return 0;
@@ -563,7 +572,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
         start = offset >> hpage_shift;
         end = (offset + len + hpage_size - 1) >> hpage_shift;
  
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
  
         /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
         error = inode_newsize_ok(inode, offset + len);
@@ -650,7 +659,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                 i_size_write(inode, offset + len);
         inode->i_ctime = CURRENT_TIME;
  out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
         return error;
  }
  
@@ -708,7 +717,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
  /*
   * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
   * be taken from reclaim -- unlike regular filesystems. This needs an
- * annotation because huge_pmd_share() does an allocation under
+ * annotation because huge_pmd_share() does an allocation under hugetlb's
   * i_mmap_rwsem.
   */
  static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;