#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/mman.h>
+#include <linux/pagemap.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
unsigned long recommended_min;
extern int min_free_kbytes;
- if (!test_bit(TRANSPARENT_HUGEPAGE_FLAG,
- &transparent_hugepage_flags) &&
- !test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
- &transparent_hugepage_flags))
+ if (!khugepaged_enabled())
return 0;
for_each_populated_zone(zone)
ret = err;
}
- if (ret > 0 &&
- (test_bit(TRANSPARENT_HUGEPAGE_FLAG,
- &transparent_hugepage_flags) ||
- test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
- &transparent_hugepage_flags)))
- set_recommended_min_free_kbytes();
-
return ret;
}
static struct kobj_attribute enabled_attr =
start_khugepaged();
- set_recommended_min_free_kbytes();
-
return 0;
out:
hugepage_exit_sysfs(hugepage_kobj);
}
__setup("transparent_hugepage=", setup_transparent_hugepage);
-static void prepare_pmd_huge_pte(pgtable_t pgtable,
- struct mm_struct *mm)
-{
- assert_spin_locked(&mm->page_table_lock);
-
- /* FIFO */
- if (!mm->pmd_huge_pte)
- INIT_LIST_HEAD(&pgtable->lru);
- else
- list_add(&pgtable->lru, &mm->pmd_huge_pte->lru);
- mm->pmd_huge_pte = pgtable;
-}
-
static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
*/
page_add_new_anon_rmap(page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
- prepare_pmd_huge_pte(pgtable, mm);
+ pgtable_trans_huge_deposit(mm, pgtable);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm->nr_ptes++;
spin_unlock(&mm->page_table_lock);
pmdp_set_wrprotect(src_mm, addr, src_pmd);
pmd = pmd_mkold(pmd_wrprotect(pmd));
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
- prepare_pmd_huge_pte(pgtable, dst_mm);
+ pgtable_trans_huge_deposit(dst_mm, pgtable);
dst_mm->nr_ptes++;
ret = 0;
return ret;
}
-/* no "address" argument so destroys page coloring of some arch */
-pgtable_t get_pmd_huge_pte(struct mm_struct *mm)
-{
- pgtable_t pgtable;
-
- assert_spin_locked(&mm->page_table_lock);
-
- /* FIFO */
- pgtable = mm->pmd_huge_pte;
- if (list_empty(&pgtable->lru))
- mm->pmd_huge_pte = NULL;
- else {
- mm->pmd_huge_pte = list_entry(pgtable->lru.next,
- struct page, lru);
- list_del(&pgtable->lru);
- }
- return pgtable;
-}
-
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address,
pmd_t _pmd;
int ret = 0, i;
struct page **pages;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR,
GFP_KERNEL);
cond_resched();
}
+ mmun_start = haddr;
+ mmun_end = haddr + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(*pmd, orig_pmd)))
goto out_free_pages;
VM_BUG_ON(!PageHead(page));
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_clear_flush(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
- pgtable = get_pmd_huge_pte(mm);
+ pgtable = pgtable_trans_huge_withdraw(mm);
pmd_populate(mm, &_pmd, pgtable);
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
page_remove_rmap(page);
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+
ret |= VM_FAULT_WRITE;
put_page(page);
out_free_pages:
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
mem_cgroup_uncharge_start();
for (i = 0; i < HPAGE_PMD_NR; i++) {
mem_cgroup_uncharge_page(pages[i]);
int ret = 0;
struct page *page, *new_page;
unsigned long haddr;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
VM_BUG_ON(!vma->anon_vma);
spin_lock(&mm->page_table_lock);
entry = pmd_mkyoung(orig_pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1))
- update_mmu_cache(vma, address, entry);
+ update_mmu_cache_pmd(vma, address, pmd);
ret |= VM_FAULT_WRITE;
goto out_unlock;
}
copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
__SetPageUptodate(new_page);
+ mmun_start = haddr;
+ mmun_end = haddr + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
spin_lock(&mm->page_table_lock);
put_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(&mm->page_table_lock);
mem_cgroup_uncharge_page(new_page);
put_page(new_page);
- goto out;
+ goto out_mn;
} else {
pmd_t entry;
VM_BUG_ON(!PageHead(page));
entry = mk_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = pmd_mkhuge(entry);
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_clear_flush(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
- update_mmu_cache(vma, address, entry);
+ update_mmu_cache_pmd(vma, address, pmd);
page_remove_rmap(page);
put_page(page);
ret |= VM_FAULT_WRITE;
}
-out_unlock:
spin_unlock(&mm->page_table_lock);
+out_mn:
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
return ret;
+out_unlock:
+ spin_unlock(&mm->page_table_lock);
+ return ret;
}
-struct page *follow_trans_huge_pmd(struct mm_struct *mm,
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
unsigned int flags)
{
+ struct mm_struct *mm = vma->vm_mm;
struct page *page = NULL;
assert_spin_locked(&mm->page_table_lock);
_pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmd, _pmd);
}
+ if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+ if (page->mapping && trylock_page(page)) {
+ lru_add_drain();
+ if (page->mapping)
+ mlock_vma_page(page);
+ unlock_page(page);
+ }
+ }
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON(!PageCompound(page));
if (flags & FOLL_GET)
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
struct page *page;
pgtable_t pgtable;
- pgtable = get_pmd_huge_pte(tlb->mm);
- page = pmd_page(*pmd);
- pmd_clear(pmd);
+ pmd_t orig_pmd;
+ pgtable = pgtable_trans_huge_withdraw(tlb->mm);
+ orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
+ page = pmd_page(orig_pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
page_remove_rmap(page);
VM_BUG_ON(page_mapcount(page) < 0);
struct mm_struct *mm = vma->vm_mm;
pmd_t *pmd;
int ret = 0;
+ /* For mmu_notifiers */
+ const unsigned long mmun_start = address;
+ const unsigned long mmun_end = address + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock);
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG);
* and it won't wait on the anon_vma->root->mutex to
* serialize against split_huge_page*.
*/
- pmdp_splitting_flush_notify(vma, address, pmd);
+ pmdp_splitting_flush(vma, address, pmd);
ret = 1;
}
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
return ret;
}
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG);
if (pmd) {
- pgtable = get_pmd_huge_pte(mm);
+ pgtable = pgtable_trans_huge_withdraw(mm);
pmd_populate(mm, &_pmd, pgtable);
- for (i = 0, haddr = address; i < HPAGE_PMD_NR;
- i++, haddr += PAGE_SIZE) {
+ haddr = address;
+ for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
pte_t *pte, entry;
BUG_ON(PageCompound(page+i));
entry = mk_pte(page + i, vma->vm_page_prot);
* SMP TLB and finally we write the non-huge version
* of the pmd entry with pmd_populate.
*/
- set_pmd_at(mm, address, pmd, pmd_mknotpresent(*pmd));
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ pmdp_invalidate(vma, address, pmd);
pmd_populate(mm, pmd, pgtable);
ret = 1;
}
struct anon_vma *anon_vma)
{
int mapcount, mapcount2;
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
struct anon_vma_chain *avc;
BUG_ON(!PageHead(page));
BUG_ON(PageTail(page));
mapcount = 0;
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long addr = vma_address(page, vma);
BUG_ON(is_vma_temporary_stack(vma));
- if (addr == -EFAULT)
- continue;
mapcount += __split_huge_page_splitting(page, vma, addr);
}
/*
__split_huge_page_refcount(page);
mapcount2 = 0;
- list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long addr = vma_address(page, vma);
BUG_ON(is_vma_temporary_stack(vma));
- if (addr == -EFAULT)
- continue;
mapcount2 += __split_huge_page_map(page, vma, addr);
}
if (mapcount != mapcount2)
int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
+ struct mm_struct *mm = vma->vm_mm;
+
switch (advice) {
case MADV_HUGEPAGE:
/*
*/
if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
return -EINVAL;
+ if (mm->def_flags & VM_NOHUGEPAGE)
+ return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
/*
return false;
*wait = false;
+ *hpage = NULL;
khugepaged_alloc_sleep();
} else if (*hpage) {
put_page(*hpage);
spinlock_t *ptl;
int isolated;
unsigned long hstart, hend;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
pte = pte_offset_map(pmd, address);
ptl = pte_lockptr(mm, pmd);
+ mmun_start = address;
+ mmun_end = address + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); /* probably unnecessary */
/*
* After this gup_fast can't run anymore. This also removes
* huge and small TLB entries for the same virtual address
* to avoid the risk of CPU bugs in that area.
*/
- _pmd = pmdp_clear_flush_notify(vma, address, pmd);
+ _pmd = pmdp_clear_flush(vma, address, pmd);
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
spin_lock(ptl);
isolated = __collapse_huge_page_isolate(vma, address, pte);
pte_unmap(pte);
__SetPageUptodate(new_page);
pgtable = pmd_pgtable(_pmd);
- VM_BUG_ON(page_count(pgtable) != 1);
- VM_BUG_ON(page_mapcount(pgtable) != 0);
_pmd = mk_pmd(new_page, vma->vm_page_prot);
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address);
set_pmd_at(mm, address, pmd, _pmd);
- update_mmu_cache(vma, address, _pmd);
- prepare_pmd_huge_pte(pgtable, mm);
+ update_mmu_cache_pmd(vma, address, pmd);
+ pgtable_trans_huge_deposit(mm, pgtable);
spin_unlock(&mm->page_table_lock);
*hpage = NULL;