mm: move VM_LOCKED check to __mlock_vma_pages_range()
[deliverable/linux.git] / mm / memory.c
index 98b58fecedeffc236a9c7285689fe4720409bd30..15e1f19a3b1003a5aa5e0b618766cfe648b3b81d 100644 (file)
@@ -736,7 +736,7 @@ again:
        dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
        if (!dst_pte)
                return -ENOMEM;
-       src_pte = pte_offset_map_nested(src_pmd, addr);
+       src_pte = pte_offset_map(src_pmd, addr);
        src_ptl = pte_lockptr(src_mm, src_pmd);
        spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
        orig_src_pte = src_pte;
@@ -767,7 +767,7 @@ again:
 
        arch_leave_lazy_mmu_mode();
        spin_unlock(src_ptl);
-       pte_unmap_nested(orig_src_pte);
+       pte_unmap(orig_src_pte);
        add_mm_rss_vec(dst_mm, rss);
        pte_unmap_unlock(orig_dst_pte, dst_ptl);
        cond_resched();
@@ -1310,6 +1310,28 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                 */
                mark_page_accessed(page);
        }
+       if (flags & FOLL_MLOCK) {
+               /*
+                * The preliminary mapping check is mainly to avoid the
+                * pointless overhead of lock_page on the ZERO_PAGE
+                * which might bounce very badly if there is contention.
+                *
+                * If the page is already locked, we don't need to
+                * handle it now - vmscan will handle it later if and
+                * when it attempts to reclaim the page.
+                */
+               if (page->mapping && trylock_page(page)) {
+                       lru_add_drain();  /* push cached pages to LRU */
+                       /*
+                        * Because we lock page here and migration is
+                        * blocked by the pte's page reference, we need
+                        * only check for file-cache page truncation.
+                        */
+                       if (page->mapping)
+                               mlock_vma_page(page);
+                       unlock_page(page);
+               }
+       }
 unlock:
        pte_unmap_unlock(ptep, ptl);
 out:
@@ -1450,7 +1472,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                        if (ret & VM_FAULT_OOM)
                                                return i ? i : -ENOMEM;
                                        if (ret &
-                                           (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
+                                           (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE|
+                                            VM_FAULT_SIGBUS))
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
@@ -1590,7 +1613,7 @@ struct page *get_dump_page(unsigned long addr)
 }
 #endif /* CONFIG_ELF_CORE */
 
-pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
+pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
                        spinlock_t **ptl)
 {
        pgd_t * pgd = pgd_offset(mm, addr);
@@ -2079,7 +2102,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
                 * zeroes.
                 */
                if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
-                       memset(kaddr, 0, PAGE_SIZE);
+                       clear_page(kaddr);
                kunmap_atomic(kaddr, KM_USER0);
                flush_dcache_page(dst);
        } else
@@ -2107,10 +2130,11 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
 static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
                spinlock_t *ptl, pte_t orig_pte)
+       __releases(ptl)
 {
        struct page *old_page, *new_page;
        pte_t entry;
-       int reuse = 0, ret = 0;
+       int ret = 0;
        int page_mkwrite = 0;
        struct page *dirty_page = NULL;
 
@@ -2147,14 +2171,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        }
                        page_cache_release(old_page);
                }
-               reuse = reuse_swap_page(old_page);
-               if (reuse)
+               if (reuse_swap_page(old_page)) {
                        /*
                         * The page is all ours.  Move it to our anon_vma so
                         * the rmap code will not search our parent or siblings.
                         * Protected against the rmap code by the page lock.
                         */
                        page_move_anon_rmap(old_page, vma, address);
+                       unlock_page(old_page);
+                       goto reuse;
+               }
                unlock_page(old_page);
        } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
                                        (VM_WRITE|VM_SHARED))) {
@@ -2218,18 +2244,52 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                }
                dirty_page = old_page;
                get_page(dirty_page);
-               reuse = 1;
-       }
 
-       if (reuse) {
 reuse:
                flush_cache_page(vma, address, pte_pfn(orig_pte));
                entry = pte_mkyoung(orig_pte);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                if (ptep_set_access_flags(vma, address, page_table, entry,1))
                        update_mmu_cache(vma, address, page_table);
+               pte_unmap_unlock(page_table, ptl);
                ret |= VM_FAULT_WRITE;
-               goto unlock;
+
+               if (!dirty_page)
+                       return ret;
+
+               /*
+                * Yes, Virginia, this is actually required to prevent a race
+                * with clear_page_dirty_for_io() from clearing the page dirty
+                * bit after it clear all dirty ptes, but before a racing
+                * do_wp_page installs a dirty pte.
+                *
+                * do_no_page is protected similarly.
+                */
+               if (!page_mkwrite) {
+                       wait_on_page_locked(dirty_page);
+                       set_page_dirty_balance(dirty_page, page_mkwrite);
+               }
+               put_page(dirty_page);
+               if (page_mkwrite) {
+                       struct address_space *mapping = dirty_page->mapping;
+
+                       set_page_dirty(dirty_page);
+                       unlock_page(dirty_page);
+                       page_cache_release(dirty_page);
+                       if (mapping)    {
+                               /*
+                                * Some device drivers do not set page.mapping
+                                * but still dirty their pages
+                                */
+                               balance_dirty_pages_ratelimited(mapping);
+                       }
+               }
+
+               /* file_update_time outside page_lock */
+               if (vma->vm_file)
+                       file_update_time(vma->vm_file);
+
+               return ret;
        }
 
        /*
@@ -2335,39 +2395,6 @@ gotten:
                page_cache_release(old_page);
 unlock:
        pte_unmap_unlock(page_table, ptl);
-       if (dirty_page) {
-               /*
-                * Yes, Virginia, this is actually required to prevent a race
-                * with clear_page_dirty_for_io() from clearing the page dirty
-                * bit after it clear all dirty ptes, but before a racing
-                * do_wp_page installs a dirty pte.
-                *
-                * do_no_page is protected similarly.
-                */
-               if (!page_mkwrite) {
-                       wait_on_page_locked(dirty_page);
-                       set_page_dirty_balance(dirty_page, page_mkwrite);
-               }
-               put_page(dirty_page);
-               if (page_mkwrite) {
-                       struct address_space *mapping = dirty_page->mapping;
-
-                       set_page_dirty(dirty_page);
-                       unlock_page(dirty_page);
-                       page_cache_release(dirty_page);
-                       if (mapping)    {
-                               /*
-                                * Some device drivers do not set page.mapping
-                                * but still dirty their pages
-                                */
-                               balance_dirty_pages_ratelimited(mapping);
-                       }
-               }
-
-               /* file_update_time outside page_lock */
-               if (vma->vm_file)
-                       file_update_time(vma->vm_file);
-       }
        return ret;
 oom_free_new:
        page_cache_release(new_page);
@@ -2626,6 +2653,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *page, *swapcache = NULL;
        swp_entry_t entry;
        pte_t pte;
+       int locked;
        struct mem_cgroup *ptr = NULL;
        int exclusive = 0;
        int ret = 0;
@@ -2676,8 +2704,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out_release;
        }
 
-       lock_page(page);
+       locked = lock_page_or_retry(page, mm, flags);
        delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+       if (!locked) {
+               ret |= VM_FAULT_RETRY;
+               goto out_release;
+       }
 
        /*
         * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
@@ -2926,7 +2958,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        vmf.page = NULL;
 
        ret = vma->vm_ops->fault(vma, &vmf);
-       if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+       if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+                           VM_FAULT_RETRY)))
                return ret;
 
        if (unlikely(PageHWPoison(vmf.page))) {
@@ -3288,7 +3321,12 @@ int make_pages_present(unsigned long addr, unsigned long end)
        vma = find_vma(current->mm, addr);
        if (!vma)
                return -ENOMEM;
-       write = (vma->vm_flags & VM_WRITE) != 0;
+       /*
+        * We want to touch writable mappings with a write fault in order
+        * to break COW, except for shared mappings because these don't COW
+        * and we would not want to dirty them for nothing.
+        */
+       write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
        BUG_ON(addr >= end);
        BUG_ON(end > vma->vm_end);
        len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
@@ -3343,7 +3381,7 @@ int in_gate_area_no_task(unsigned long addr)
 
 #endif /* __HAVE_ARCH_GATE_AREA */
 
-static int follow_pte(struct mm_struct *mm, unsigned long address,
+static int __follow_pte(struct mm_struct *mm, unsigned long address,
                pte_t **ptepp, spinlock_t **ptlp)
 {
        pgd_t *pgd;
@@ -3380,6 +3418,17 @@ out:
        return -EINVAL;
 }
 
+static inline int follow_pte(struct mm_struct *mm, unsigned long address,
+                            pte_t **ptepp, spinlock_t **ptlp)
+{
+       int res;
+
+       /* (void) is needed to make gcc happy */
+       (void) __cond_lock(*ptlp,
+                          !(res = __follow_pte(mm, address, ptepp, ptlp)));
+       return res;
+}
+
 /**
  * follow_pfn - look up PFN at a user virtual address
  * @vma: memory mapping
This page took 0.030526 seconds and 5 git commands to generate.