* inode->i_mutex (while writing or truncating, not reading or faulting)
* mm->mmap_sem
* page->flags PG_locked (lock_page)
- * mapping->i_mmap_rwsem
- * anon_vma->rwsem
- * mm->page_table_lock or pte_lock
- * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
- * swap_lock (in swap_duplicate, swap_info_get)
- * mmlist_lock (in mmput, drain_mmlist and others)
- * mapping->private_lock (in __set_page_dirty_buffers)
- * mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
- * mapping->tree_lock (widely used)
- * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
- * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
- * sb_lock (within inode_lock in fs/fs-writeback.c)
- * mapping->tree_lock (widely used, in set_page_dirty,
- * in arch-dependent flush_dcache_mmap_lock,
- * within bdi.wb->list_lock in __sync_single_inode)
+ * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
+ * mapping->i_mmap_rwsem
+ * anon_vma->rwsem
+ * mm->page_table_lock or pte_lock
+ * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
+ * swap_lock (in swap_duplicate, swap_info_get)
+ * mmlist_lock (in mmput, drain_mmlist and others)
+ * mapping->private_lock (in __set_page_dirty_buffers)
+ * mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
+ * mapping->tree_lock (widely used)
+ * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
+ * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
+ * sb_lock (within inode_lock in fs/fs-writeback.c)
+ * mapping->tree_lock (widely used, in set_page_dirty,
+ * in arch-dependent flush_dcache_mmap_lock,
+ * within bdi.wb->list_lock in __sync_single_inode)
*
* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
* ->tasklist_lock
anon_vma_unlock_read(anon_vma);
}
-/*
- * At what user virtual address is page expected in @vma?
- */
-static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
-{
- pgoff_t pgoff = page_to_pgoff(page);
- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
-}
-
-inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
-{
- unsigned long address = __vma_address(page, vma);
-
- /* page should be within @vma mapping range */
- VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
-
- return address;
-}
-
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
static void percpu_flush_tlb_batch_pages(void *data)
{
return 1;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * Check that @page is mapped at @address into @mm. In contrast to
+ * page_check_address(), this function can handle transparent huge pages.
+ *
+ * On success returns true with pte mapped and locked. For PMD-mapped
+ * transparent huge pages *@ptep is set to NULL.
+ */
+bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+ unsigned long address, pmd_t **pmdp,
+ pte_t **ptep, spinlock_t **ptlp)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ if (unlikely(PageHuge(page))) {
+ /* when pud is not present, pte will be NULL */
+ pte = huge_pte_offset(mm, address);
+ if (!pte)
+ return false;
+
+ ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
+ pmd = NULL;
+ goto check_pte;
+ }
+
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
+ return false;
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return false;
+ pmd = pmd_offset(pud, address);
+
+ if (pmd_trans_huge(*pmd)) {
+ ptl = pmd_lock(mm, pmd);
+ if (!pmd_present(*pmd))
+ goto unlock_pmd;
+ if (unlikely(!pmd_trans_huge(*pmd))) {
+ spin_unlock(ptl);
+ goto map_pte;
+ }
+
+ if (pmd_page(*pmd) != page)
+ goto unlock_pmd;
+
+ pte = NULL;
+ goto found;
+unlock_pmd:
+ spin_unlock(ptl);
+ return false;
+ } else {
+ pmd_t pmde = *pmd;
+
+ barrier();
+ if (!pmd_present(pmde) || pmd_trans_huge(pmde))
+ return false;
+ }
+map_pte:
+ pte = pte_offset_map(pmd, address);
+ if (!pte_present(*pte)) {
+ pte_unmap(pte);
+ return false;
+ }
+
+ ptl = pte_lockptr(mm, pmd);
+check_pte:
+ spin_lock(ptl);
+
+ if (!pte_present(*pte)) {
+ pte_unmap_unlock(pte, ptl);
+ return false;
+ }
+
+ /* THP can be referenced by any subpage */
+ if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
+ pte_unmap_unlock(pte, ptl);
+ return false;
+ }
+found:
+ *ptep = pte;
+ *pmdp = pmd;
+ *ptlp = ptl;
+ return true;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
struct page_referenced_arg {
int mapcount;
int referenced;
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
+ struct page_referenced_arg *pra = arg;
+ pmd_t *pmd;
+ pte_t *pte;
spinlock_t *ptl;
int referenced = 0;
- struct page_referenced_arg *pra = arg;
- if (unlikely(PageTransHuge(page))) {
- pmd_t *pmd;
+ if (!page_check_address_transhuge(page, mm, address, &pmd, &pte, &ptl))
+ return SWAP_AGAIN;
- /*
- * rmap might return false positives; we must filter
- * these out using page_check_address_pmd().
- */
- pmd = page_check_address_pmd(page, mm, address, &ptl);
- if (!pmd)
- return SWAP_AGAIN;
-
- if (vma->vm_flags & VM_LOCKED) {
- spin_unlock(ptl);
- pra->vm_flags |= VM_LOCKED;
- return SWAP_FAIL; /* To break the loop */
- }
-
- if (pmdp_clear_flush_young_notify(vma, address, pmd))
- referenced++;
+ if (vma->vm_flags & VM_LOCKED) {
+ if (pte)
+ pte_unmap(pte);
spin_unlock(ptl);
- } else {
- pte_t *pte;
-
- /*
- * rmap might return false positives; we must filter
- * these out using page_check_address().
- */
- pte = page_check_address(page, mm, address, &ptl, 0);
- if (!pte)
- return SWAP_AGAIN;
-
- if (vma->vm_flags & VM_LOCKED) {
- pte_unmap_unlock(pte, ptl);
- pra->vm_flags |= VM_LOCKED;
- return SWAP_FAIL; /* To break the loop */
- }
+ pra->vm_flags |= VM_LOCKED;
+ return SWAP_FAIL; /* To break the loop */
+ }
+ if (pte) {
if (ptep_clear_flush_young_notify(vma, address, pte)) {
/*
* Don't treat a reference through a sequentially read
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
referenced++;
}
- pte_unmap_unlock(pte, ptl);
+ pte_unmap(pte);
+ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+ if (pmdp_clear_flush_young_notify(vma, address, pmd))
+ referenced++;
+ } else {
+ /* unexpected pmd-mapped page? */
+ WARN_ON_ONCE(1);
}
+ spin_unlock(ptl);
if (referenced)
clear_page_idle(page);
int ret;
int we_locked = 0;
struct page_referenced_arg pra = {
- .mapcount = page_mapcount(page),
+ .mapcount = total_mapcount(page),
.memcg = memcg,
};
struct rmap_walk_control rwc = {
bool compound = flags & RMAP_COMPOUND;
bool first;
- if (PageTransCompound(page)) {
+ if (compound) {
+ atomic_t *mapcount;
VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (compound) {
- atomic_t *mapcount;
-
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- mapcount = compound_mapcount_ptr(page);
- first = atomic_inc_and_test(mapcount);
- } else {
- /* Anon THP always mapped first with PMD */
- first = 0;
- VM_BUG_ON_PAGE(!page_mapcount(page), page);
- atomic_inc(&page->_mapcount);
- }
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ mapcount = compound_mapcount_ptr(page);
+ first = atomic_inc_and_test(mapcount);
} else {
first = atomic_inc_and_test(&page->_mapcount);
}
* disabled.
*/
if (compound) {
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
}
nr = HPAGE_PMD_NR;
}
- if (nr)
+ if (unlikely(PageMlocked(page)))
+ clear_page_mlock(page);
+
+ if (nr) {
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);
+ deferred_split_huge_page(page);
+ }
}
/**
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
+ if (PageTransCompound(page))
+ deferred_split_huge_page(compound_head(page));
+
/*
* It would be tidy to reset the PageAnon mapping here,
* but that might overwrite a racing page_add_anon_rmap
*/
}
+struct rmap_private {
+ enum ttu_flags flags;
+ int lazyfreed;
+};
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/
pte_t pteval;
spinlock_t *ptl;
int ret = SWAP_AGAIN;
- enum ttu_flags flags = (enum ttu_flags)arg;
+ struct rmap_private *rp = arg;
+ enum ttu_flags flags = rp->flags;
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
* See handle_pte_fault() ...
*/
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+
+ if (!PageDirty(page) && (flags & TTU_LZFREE)) {
+ /* It's a freeable page by MADV_FREE */
+ dec_mm_counter(mm, MM_ANONPAGES);
+ rp->lazyfreed++;
+ goto discard;
+ }
+
if (swap_duplicate(entry) < 0) {
set_pte_at(mm, address, pte, pteval);
ret = SWAP_FAIL;
} else
dec_mm_counter(mm, mm_counter_file(page));
+discard:
page_remove_rmap(page, PageHuge(page));
page_cache_release(page);
int try_to_unmap(struct page *page, enum ttu_flags flags)
{
int ret;
+ struct rmap_private rp = {
+ .flags = flags,
+ .lazyfreed = 0,
+ };
+
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
- .arg = (void *)flags,
+ .arg = &rp,
.done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};
ret = rmap_walk(page, &rwc);
- if (ret != SWAP_MLOCK && !page_mapped(page))
+ if (ret != SWAP_MLOCK && !page_mapped(page)) {
ret = SWAP_SUCCESS;
+ if (rp.lazyfreed && !PageDirty(page))
+ ret = SWAP_LZFREE;
+ }
return ret;
}
int try_to_munlock(struct page *page)
{
int ret;
+ struct rmap_private rp = {
+ .flags = TTU_MUNLOCK,
+ .lazyfreed = 0,
+ };
+
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
- .arg = (void *)TTU_MUNLOCK,
+ .arg = &rp,
.done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,