mm, hugetlb: decrement reserve count if VM_NORESERVE alloc page cache
[deliverable/linux.git] / mm / hugetlb.c
index dacf0d2256d9790c669b88867a5c277b7a423991..5b084c7b34c6ba2232d4afcfc8729d3b87b65f5b 100644 (file)
@@ -443,10 +443,23 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 }
 
 /* Returns true if the VMA has associated reserve pages */
-static int vma_has_reserves(struct vm_area_struct *vma)
+static int vma_has_reserves(struct vm_area_struct *vma, long chg)
 {
-       if (vma->vm_flags & VM_NORESERVE)
-               return 0;
+       if (vma->vm_flags & VM_NORESERVE) {
+               /*
+                * This address is already reserved by other process(chg == 0),
+                * so, we should decrement reserved count. Without decrementing,
+                * reserve count remains after releasing inode, because this
+                * allocated page will go into page cache and is regarded as
+                * coming from reserved pool in releasing step.  Currently, we
+                * don't have any other solution to deal with this situation
+                * properly, so add work-around here.
+                */
+               if (vma->vm_flags & VM_MAYSHARE && chg == 0)
+                       return 1;
+               else
+                       return 0;
+       }
 
        /* Shared mappings always use reserves */
        if (vma->vm_flags & VM_MAYSHARE)
@@ -520,7 +533,8 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 
 static struct page *dequeue_huge_page_vma(struct hstate *h,
                                struct vm_area_struct *vma,
-                               unsigned long address, int avoid_reserve)
+                               unsigned long address, int avoid_reserve,
+                               long chg)
 {
        struct page *page = NULL;
        struct mempolicy *mpol;
@@ -535,7 +549,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
         * have no page reserves. This check ensures that reservations are
         * not "stolen". The child may still get SIGKILLed
         */
-       if (!vma_has_reserves(vma) &&
+       if (!vma_has_reserves(vma, chg) &&
                        h->free_huge_pages - h->resv_huge_pages == 0)
                goto err;
 
@@ -553,8 +567,12 @@ retry_cpuset:
                if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
                        page = dequeue_huge_page_node(h, zone_to_nid(zone));
                        if (page) {
-                               if (!avoid_reserve && vma_has_reserves(vma))
-                                       h->resv_huge_pages--;
+                               if (avoid_reserve)
+                                       break;
+                               if (!vma_has_reserves(vma, chg))
+                                       break;
+
+                               h->resv_huge_pages--;
                                break;
                        }
                }
@@ -1155,7 +1173,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
                return ERR_PTR(-ENOSPC);
        }
        spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
+       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
        if (!page) {
                spin_unlock(&hugetlb_lock);
                page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
This page took 0.040591 seconds and 5 git commands to generate.