summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
ac401cc)
When doing cow faults, we cannot directly fill in PTE as we do for other
faults as we rely on generic code to do proper accounting of the cowed page.
We also have no page to lock to protect against races with truncate as
other faults have and we need the protection to extend until the moment
generic code inserts cowed page into PTE thus at that point we have no
protection of fs-specific i_mmap_sem. So far we relied on using
i_mmap_lock for the protection however that is completely special to cow
faults. To make fault locking more uniform use DAX entry lock instead.
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
-static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
+void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
unlock_page(entry);
put_page(entry);
} else {
unlock_page(entry);
put_page(entry);
} else {
- unlock_mapping_entry(mapping, index);
+ dax_unlock_mapping_entry(mapping, index);
goto unlock_entry;
if (!radix_tree_exceptional_entry(entry)) {
vmf->page = entry;
goto unlock_entry;
if (!radix_tree_exceptional_entry(entry)) {
vmf->page = entry;
- } else {
- unlock_mapping_entry(mapping, vmf->pgoff);
- i_mmap_lock_read(mapping);
- vmf->page = NULL;
+ return VM_FAULT_LOCKED;
- return VM_FAULT_LOCKED;
+ vmf->entry = entry;
+ return VM_FAULT_DAX_LOCKED;
}
if (!buffer_mapped(&bh)) {
}
if (!buffer_mapped(&bh)) {
#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
unsigned int offset, unsigned int length);
#else
int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
unsigned int offset, unsigned int length);
#else
{
return ERR_PTR(-ENXIO);
}
{
return ERR_PTR(-ENXIO);
}
+/* Shouldn't ever be called when dax is disabled. */
+static inline void dax_unlock_mapping_entry(struct address_space *mapping,
+ pgoff_t index)
+{
+ BUG();
+}
static inline int __dax_zero_page_range(struct block_device *bdev,
sector_t sector, unsigned int offset, unsigned int length)
{
static inline int __dax_zero_page_range(struct block_device *bdev,
sector_t sector, unsigned int offset, unsigned int length)
{
* is set (which is also implied by
* VM_FAULT_ERROR).
*/
* is set (which is also implied by
* VM_FAULT_ERROR).
*/
+ void *entry; /* ->fault handler can alternatively
+ * return locked DAX entry. In that
+ * case handler should return
+ * VM_FAULT_DAX_LOCKED and fill in
+ * entry here.
+ */
/* for ->map_pages() only */
pgoff_t max_pgoff; /* map pages for offset from pgoff till
* max_pgoff inclusive */
/* for ->map_pages() only */
pgoff_t max_pgoff; /* map pages for offset from pgoff till
* max_pgoff inclusive */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
+#define VM_FAULT_DAX_LOCKED 0x1000 /* ->fault has locked DAX entry */
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
#include <linux/dma-debug.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
#include <linux/dma-debug.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags,
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags,
- struct page *cow_page, struct page **page)
+ struct page *cow_page, struct page **page,
+ void **entry)
{
struct vm_fault vmf;
int ret;
{
struct vm_fault vmf;
int ret;
ret = vma->vm_ops->fault(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
ret = vma->vm_ops->fault(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
- if (!vmf.page)
- goto out;
+ if (ret & VM_FAULT_DAX_LOCKED) {
+ *entry = vmf.entry;
+ return ret;
+ }
if (unlikely(PageHWPoison(vmf.page))) {
if (ret & VM_FAULT_LOCKED)
if (unlikely(PageHWPoison(vmf.page))) {
if (ret & VM_FAULT_LOCKED)
else
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
else
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
*page = vmf.page;
return ret;
}
*page = vmf.page;
return ret;
}
pte_unmap_unlock(pte, ptl);
}
pte_unmap_unlock(pte, ptl);
}
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
- ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
+ &fault_entry);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out;
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out;
+ if (!(ret & VM_FAULT_DAX_LOCKED))
copy_user_highpage(new_page, fault_page, address, vma);
__SetPageUptodate(new_page);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
copy_user_highpage(new_page, fault_page, address, vma);
__SetPageUptodate(new_page);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
+ if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page);
put_page(fault_page);
} else {
unlock_page(fault_page);
put_page(fault_page);
} else {
- /*
- * The fault handler has no page to lock, so it holds
- * i_mmap_lock for read to protect against truncate.
- */
- i_mmap_unlock_read(vma->vm_file->f_mapping);
+ dax_unlock_mapping_entry(vma->vm_file->f_mapping,
+ pgoff);
mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
+ if (!(ret & VM_FAULT_DAX_LOCKED)) {
unlock_page(fault_page);
put_page(fault_page);
} else {
unlock_page(fault_page);
put_page(fault_page);
} else {
- /*
- * The fault handler has no page to lock, so it holds
- * i_mmap_lock for read to protect against truncate.
- */
- i_mmap_unlock_read(vma->vm_file->f_mapping);
+ dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff);
}
return ret;
uncharge_out:
}
return ret;
uncharge_out:
int dirtied = 0;
int ret, tmp;
int dirtied = 0;
int ret, tmp;
- ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;