mm, compaction: more robust check for scanners meeting
[deliverable/linux.git] / mm / huge_memory.c
index 7735f99931fa1e47cea0ad880f0138b7761f2088..71a4822c832b9b63a570a6aba244c1e0ec199e6a 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/swap.h>
 #include <linux/shrinker.h>
 #include <linux/mm_inline.h>
+#include <linux/dax.h>
 #include <linux/kthread.h>
 #include <linux/khugepaged.h>
 #include <linux/freezer.h>
@@ -105,7 +106,7 @@ static struct khugepaged_scan khugepaged_scan = {
 };
 
 
-static int set_recommended_min_free_kbytes(void)
+static void set_recommended_min_free_kbytes(void)
 {
        struct zone *zone;
        int nr_zones = 0;
@@ -140,7 +141,6 @@ static int set_recommended_min_free_kbytes(void)
                min_free_kbytes = recommended_min;
        }
        setup_per_zone_wmarks();
-       return 0;
 }
 
 static int start_stop_khugepaged(void)
@@ -172,12 +172,7 @@ fail:
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
 
-static inline bool is_huge_zero_pmd(pmd_t pmd)
-{
-       return is_huge_zero_page(pmd_page(pmd));
-}
-
-static struct page *get_huge_zero_page(void)
+struct page *get_huge_zero_page(void)
 {
        struct page *zero_page;
 retry:
@@ -717,13 +712,14 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
 
 static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                                        struct vm_area_struct *vma,
-                                       unsigned long haddr, pmd_t *pmd,
+                                       unsigned long address, pmd_t *pmd,
                                        struct page *page, gfp_t gfp,
                                        unsigned int flags)
 {
        struct mem_cgroup *memcg;
        pgtable_t pgtable;
        spinlock_t *ptl;
+       unsigned long haddr = address & HPAGE_PMD_MASK;
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -765,7 +761,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                        mem_cgroup_cancel_charge(page, memcg);
                        put_page(page);
                        pte_free(mm, pgtable);
-                       ret = handle_userfault(vma, haddr, flags,
+                       ret = handle_userfault(vma, address, flags,
                                               VM_UFFD_MISSING);
                        VM_BUG_ON(ret & VM_FAULT_FALLBACK);
                        return ret;
@@ -793,16 +789,19 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 }
 
 /* Caller must hold page table lock. */
-static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
                struct page *zero_page)
 {
        pmd_t entry;
+       if (!pmd_none(*pmd))
+               return false;
        entry = mk_pmd(zero_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
        pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, haddr, pmd, entry);
        atomic_long_inc(&mm->nr_ptes);
+       return true;
 }
 
 int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -841,7 +840,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                if (pmd_none(*pmd)) {
                        if (userfaultfd_missing(vma)) {
                                spin_unlock(ptl);
-                               ret = handle_userfault(vma, haddr, flags,
+                               ret = handle_userfault(vma, address, flags,
                                                       VM_UFFD_MISSING);
                                VM_BUG_ON(ret & VM_FAULT_FALLBACK);
                        } else {
@@ -865,7 +864,51 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
        }
-       return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp, flags);
+       return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
+                                           flags);
+}
+
+static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+               pmd_t *pmd, unsigned long pfn, pgprot_t prot, bool write)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       pmd_t entry;
+       spinlock_t *ptl;
+
+       ptl = pmd_lock(mm, pmd);
+       if (pmd_none(*pmd)) {
+               entry = pmd_mkhuge(pfn_pmd(pfn, prot));
+               if (write) {
+                       entry = pmd_mkyoung(pmd_mkdirty(entry));
+                       entry = maybe_pmd_mkwrite(entry, vma);
+               }
+               set_pmd_at(mm, addr, pmd, entry);
+               update_mmu_cache_pmd(vma, addr, pmd);
+       }
+       spin_unlock(ptl);
+}
+
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+                       pmd_t *pmd, unsigned long pfn, bool write)
+{
+       pgprot_t pgprot = vma->vm_page_prot;
+       /*
+        * If we had pmd_special, we could avoid all these restrictions,
+        * but we need to be consistent with PTEs and architectures that
+        * can't support a 'special' bit.
+        */
+       BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+       BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+                                               (VM_PFNMAP|VM_MIXEDMAP));
+       BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+       BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
+
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return VM_FAULT_SIGBUS;
+       if (track_pfn_insert(vma, &pgprot, pfn))
+               return VM_FAULT_SIGBUS;
+       insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+       return VM_FAULT_NOPAGE;
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1412,41 +1455,41 @@ out:
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 pmd_t *pmd, unsigned long addr)
 {
+       pmd_t orig_pmd;
        spinlock_t *ptl;
-       int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-               struct page *page;
-               pgtable_t pgtable;
-               pmd_t orig_pmd;
-               /*
-                * For architectures like ppc64 we look at deposited pgtable
-                * when calling pmdp_huge_get_and_clear. So do the
-                * pgtable_trans_huge_withdraw after finishing pmdp related
-                * operations.
-                */
-               orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
-                                                       tlb->fullmm);
-               tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-               pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
-               if (is_huge_zero_pmd(orig_pmd)) {
-                       atomic_long_dec(&tlb->mm->nr_ptes);
-                       spin_unlock(ptl);
+       if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
+               return 0;
+       /*
+        * For architectures like ppc64 we look at deposited pgtable
+        * when calling pmdp_huge_get_and_clear. So do the
+        * pgtable_trans_huge_withdraw after finishing pmdp related
+        * operations.
+        */
+       orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
+                       tlb->fullmm);
+       tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+       if (vma_is_dax(vma)) {
+               spin_unlock(ptl);
+               if (is_huge_zero_pmd(orig_pmd))
                        put_huge_zero_page();
-               } else {
-                       page = pmd_page(orig_pmd);
-                       page_remove_rmap(page);
-                       VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
-                       add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
-                       VM_BUG_ON_PAGE(!PageHead(page), page);
-                       atomic_long_dec(&tlb->mm->nr_ptes);
-                       spin_unlock(ptl);
-                       tlb_remove_page(tlb, page);
-               }
-               pte_free(tlb->mm, pgtable);
-               ret = 1;
+       } else if (is_huge_zero_pmd(orig_pmd)) {
+               pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+               atomic_long_dec(&tlb->mm->nr_ptes);
+               spin_unlock(ptl);
+               put_huge_zero_page();
+       } else {
+               struct page *page = pmd_page(orig_pmd);
+               page_remove_rmap(page);
+               VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+               add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+               VM_BUG_ON_PAGE(!PageHead(page), page);
+               pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+               atomic_long_dec(&tlb->mm->nr_ptes);
+               spin_unlock(ptl);
+               tlb_remove_page(tlb, page);
        }
-       return ret;
+       return 1;
 }
 
 int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
@@ -2158,7 +2201,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
             _pte++, address += PAGE_SIZE) {
                pte_t pteval = *_pte;
                if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
-                       if (++none_or_zero <= khugepaged_max_ptes_none)
+                       if (!userfaultfd_armed(vma) &&
+                           ++none_or_zero <= khugepaged_max_ptes_none)
                                continue;
                        else
                                goto out;
@@ -2611,7 +2655,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
             _pte++, _address += PAGE_SIZE) {
                pte_t pteval = *_pte;
                if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
-                       if (++none_or_zero <= khugepaged_max_ptes_none)
+                       if (!userfaultfd_armed(vma) &&
+                           ++none_or_zero <= khugepaged_max_ptes_none)
                                continue;
                        else
                                goto out_unmap;
@@ -2907,7 +2952,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
                pmd_t *pmd)
 {
        spinlock_t *ptl;
-       struct page *page;
+       struct page *page = NULL;
        struct mm_struct *mm = vma->vm_mm;
        unsigned long haddr = address & HPAGE_PMD_MASK;
        unsigned long mmun_start;       /* For mmu_notifiers */
@@ -2920,25 +2965,27 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
 again:
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        ptl = pmd_lock(mm, pmd);
-       if (unlikely(!pmd_trans_huge(*pmd))) {
-               spin_unlock(ptl);
-               mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-               return;
-       }
-       if (is_huge_zero_pmd(*pmd)) {
+       if (unlikely(!pmd_trans_huge(*pmd)))
+               goto unlock;
+       if (vma_is_dax(vma)) {
+               pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+               if (is_huge_zero_pmd(_pmd))
+                       put_huge_zero_page();
+       } else if (is_huge_zero_pmd(*pmd)) {
                __split_huge_zero_page_pmd(vma, haddr, pmd);
-               spin_unlock(ptl);
-               mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-               return;
+       } else {
+               page = pmd_page(*pmd);
+               VM_BUG_ON_PAGE(!page_count(page), page);
+               get_page(page);
        }
-       page = pmd_page(*pmd);
-       VM_BUG_ON_PAGE(!page_count(page), page);
-       get_page(page);
+ unlock:
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
-       split_huge_page(page);
+       if (!page)
+               return;
 
+       split_huge_page(page);
        put_page(page);
 
        /*
@@ -2987,7 +3034,7 @@ static void split_huge_page_address(struct mm_struct *mm,
        split_huge_page_pmd_mm(mm, address, pmd);
 }
 
-void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+void vma_adjust_trans_huge(struct vm_area_struct *vma,
                             unsigned long start,
                             unsigned long end,
                             long adjust_next)
This page took 0.041251 seconds and 5 git commands to generate.