mm/hwpoison: fix refcount of THP head page in no-injection case
[deliverable/linux.git] / mm / hugetlb.c
index 070880fe1ff7186dd66680bf3af9dc14d9bf471a..cd1280c487ff946fcbe33bd81b7a28badd180106 100644 (file)
@@ -546,6 +546,28 @@ retry:
        return del;
 }
 
+/*
+ * A rare out of memory error was encountered which prevented removal of
+ * the reserve map region for a page.  The huge page itself was free'ed
+ * and removed from the page cache.  This routine will adjust the subpool
+ * usage count, and the global reserve count if needed.  By incrementing
+ * these counts, the reserve map entry which could not be deleted will
+ * appear as a "reserved" entry instead of simply dangling with incorrect
+ * counts.
+ */
+void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve)
+{
+       struct hugepage_subpool *spool = subpool_inode(inode);
+       long rsv_adjust;
+
+       rsv_adjust = hugepage_subpool_get_pages(spool, 1);
+       if (restore_reserve && rsv_adjust) {
+               struct hstate *h = hstate_inode(inode);
+
+               hugetlb_acct_memory(h, 1);
+       }
+}
+
 /*
  * Count and return the number of huge pages in the reserve map
  * that intersect with the range [f, t).
@@ -779,8 +801,19 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
        }
 
        /* Shared mappings always use reserves */
-       if (vma->vm_flags & VM_MAYSHARE)
-               return true;
+       if (vma->vm_flags & VM_MAYSHARE) {
+               /*
+                * We know VM_NORESERVE is not set.  Therefore, there SHOULD
+                * be a region map for all pages.  The only situation where
+                * there is no region map is if a hole was punched via
+                * fallocate.  In this case, there really are no reverves to
+                * use.  This situation is indicated if chg != 0.
+                */
+               if (chg)
+                       return false;
+               else
+                       return true;
+       }
 
        /*
         * Only the process that called mmap() has reserves for
@@ -1694,40 +1727,64 @@ static void vma_end_reservation(struct hstate *h,
        (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
 }
 
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
+struct page *alloc_huge_page(struct vm_area_struct *vma,
                                    unsigned long addr, int avoid_reserve)
 {
        struct hugepage_subpool *spool = subpool_vma(vma);
        struct hstate *h = hstate_vma(vma);
        struct page *page;
-       long chg, commit;
+       long map_chg, map_commit;
+       long gbl_chg;
        int ret, idx;
        struct hugetlb_cgroup *h_cg;
 
        idx = hstate_index(h);
        /*
-        * Processes that did not create the mapping will have no
-        * reserves and will not have accounted against subpool
-        * limit. Check that the subpool limit can be made before
-        * satisfying the allocation MAP_NORESERVE mappings may also
-        * need pages and subpool limit allocated allocated if no reserve
-        * mapping overlaps.
+        * Examine the region/reserve map to determine if the process
+        * has a reservation for the page to be allocated.  A return
+        * code of zero indicates a reservation exists (no change).
         */
-       chg = vma_needs_reservation(h, vma, addr);
-       if (chg < 0)
+       map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
+       if (map_chg < 0)
                return ERR_PTR(-ENOMEM);
-       if (chg || avoid_reserve)
-               if (hugepage_subpool_get_pages(spool, 1) < 0) {
+
+       /*
+        * Processes that did not create the mapping will have no
+        * reserves as indicated by the region/reserve map. Check
+        * that the allocation will not exceed the subpool limit.
+        * Allocations for MAP_NORESERVE mappings also need to be
+        * checked against any subpool limit.
+        */
+       if (map_chg || avoid_reserve) {
+               gbl_chg = hugepage_subpool_get_pages(spool, 1);
+               if (gbl_chg < 0) {
                        vma_end_reservation(h, vma, addr);
                        return ERR_PTR(-ENOSPC);
                }
 
+               /*
+                * Even though there was no reservation in the region/reserve
+                * map, there could be reservations associated with the
+                * subpool that can be used.  This would be indicated if the
+                * return value of hugepage_subpool_get_pages() is zero.
+                * However, if avoid_reserve is specified we still avoid even
+                * the subpool reservations.
+                */
+               if (avoid_reserve)
+                       gbl_chg = 1;
+       }
+
        ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
        if (ret)
                goto out_subpool_put;
 
        spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
+       /*
+        * glb_chg is passed to indicate whether or not a page must be taken
+        * from the global free pool (global change).  gbl_chg == 0 indicates
+        * a reservation exists for the allocation.
+        */
+       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
        if (!page) {
                spin_unlock(&hugetlb_lock);
                page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
@@ -1743,8 +1800,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 
        set_page_private(page, (unsigned long)spool);
 
-       commit = vma_commit_reservation(h, vma, addr);
-       if (unlikely(chg > commit)) {
+       map_commit = vma_commit_reservation(h, vma, addr);
+       if (unlikely(map_chg > map_commit)) {
                /*
                 * The page was added to the reservation map between
                 * vma_needs_reservation and vma_commit_reservation.
@@ -1764,7 +1821,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 out_uncharge_cgroup:
        hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
 out_subpool_put:
-       if (chg || avoid_reserve)
+       if (map_chg || avoid_reserve)
                hugepage_subpool_put_pages(spool, 1);
        vma_end_reservation(h, vma, addr);
        return ERR_PTR(-ENOSPC);
@@ -3318,6 +3375,23 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
        return page != NULL;
 }
 
+int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
+                          pgoff_t idx)
+{
+       struct inode *inode = mapping->host;
+       struct hstate *h = hstate_inode(inode);
+       int err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+
+       if (err)
+               return err;
+       ClearPagePrivate(page);
+
+       spin_lock(&inode->i_lock);
+       inode->i_blocks += blocks_per_huge_page(h);
+       spin_unlock(&inode->i_lock);
+       return 0;
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
                           struct address_space *mapping, pgoff_t idx,
                           unsigned long address, pte_t *ptep, unsigned int flags)
@@ -3365,21 +3439,13 @@ retry:
                set_page_huge_active(page);
 
                if (vma->vm_flags & VM_MAYSHARE) {
-                       int err;
-                       struct inode *inode = mapping->host;
-
-                       err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+                       int err = huge_add_to_page_cache(page, mapping, idx);
                        if (err) {
                                put_page(page);
                                if (err == -EEXIST)
                                        goto retry;
                                goto out;
                        }
-                       ClearPagePrivate(page);
-
-                       spin_lock(&inode->i_lock);
-                       inode->i_blocks += blocks_per_huge_page(h);
-                       spin_unlock(&inode->i_lock);
                } else {
                        lock_page(page);
                        if (unlikely(anon_vma_prepare(vma))) {
@@ -3909,7 +3975,8 @@ out_err:
        return ret;
 }
 
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+                                                               long freed)
 {
        struct hstate *h = hstate_inode(inode);
        struct resv_map *resv_map = inode_resv_map(inode);
@@ -3917,8 +3984,17 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
        struct hugepage_subpool *spool = subpool_inode(inode);
        long gbl_reserve;
 
-       if (resv_map)
-               chg = region_del(resv_map, offset, LONG_MAX);
+       if (resv_map) {
+               chg = region_del(resv_map, start, end);
+               /*
+                * region_del() can fail in the rare case where a region
+                * must be split and another region descriptor can not be
+                * allocated.  If end == LONG_MAX, it will not fail.
+                */
+               if (chg < 0)
+                       return chg;
+       }
+
        spin_lock(&inode->i_lock);
        inode->i_blocks -= (blocks_per_huge_page(h) * freed);
        spin_unlock(&inode->i_lock);
@@ -3929,6 +4005,8 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
         */
        gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
        hugetlb_acct_memory(h, -gbl_reserve);
+
+       return 0;
 }
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
This page took 0.04373 seconds and 5 git commands to generate.