[deliverable/linux.git] / include / linux / pagemap.h

#ifndef _LINUX_PAGEMAP_H
#define _LINUX_PAGEMAP_H

/*
 * Copyright 1995 Linus Torvalds
 */
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/list.h>
#include <linux/highmem.h>
#include <linux/compiler.h>
#include <asm/uaccess.h>
#include <linux/gfp.h>
#include <linux/bitops.h>
#include <linux/hardirq.h> /* for in_interrupt() */
#include <linux/hugetlb_inline.h>

/*
 * Bits in mapping->flags.  The lower __GFP_BITS_SHIFT bits are the page
 * allocation mode flags.
 */
enum mapping_flags {
	AS_EIO		= __GFP_BITS_SHIFT + 0,	/* IO error on async write */
	AS_ENOSPC	= __GFP_BITS_SHIFT + 1,	/* ENOSPC on async write */
	AS_MM_ALL_LOCKS	= __GFP_BITS_SHIFT + 2,	/* under mm_take_all_locks() */
	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
	AS_EXITING	= __GFP_BITS_SHIFT + 4, /* final truncate in progress */
};

static inline void mapping_set_error(struct address_space *mapping, int error)
{
	if (unlikely(error)) {
		if (error == -ENOSPC)
			set_bit(AS_ENOSPC, &mapping->flags);
		else
			set_bit(AS_EIO, &mapping->flags);
	}
}

static inline void mapping_set_unevictable(struct address_space *mapping)
{
	set_bit(AS_UNEVICTABLE, &mapping->flags);
}

static inline void mapping_clear_unevictable(struct address_space *mapping)
{
	clear_bit(AS_UNEVICTABLE, &mapping->flags);
}

static inline int mapping_unevictable(struct address_space *mapping)
{
	if (mapping)
		return test_bit(AS_UNEVICTABLE, &mapping->flags);
	return !!mapping;
}

static inline void mapping_set_exiting(struct address_space *mapping)
{
	set_bit(AS_EXITING, &mapping->flags);
}

static inline int mapping_exiting(struct address_space *mapping)
{
	return test_bit(AS_EXITING, &mapping->flags);
}

static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
{
	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
}

/* Restricts the given gfp_mask to what the mapping allows. */
static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
		gfp_t gfp_mask)
{
	return mapping_gfp_mask(mapping) & gfp_mask;
}

/*
 * This is non-atomic.  Only to be used before the mapping is activated.
 * Probably needs a barrier...
 */
static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
{
	m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) |
				(__force unsigned long)mask;
}

void release_pages(struct page **pages, int nr, bool cold);

/*
 * speculatively take a reference to a page.
 * If the page is free (_count == 0), then _count is untouched, and 0
 * is returned. Otherwise, _count is incremented by 1 and 1 is returned.
 *
 * This function must be called inside the same rcu_read_lock() section as has
 * been used to lookup the page in the pagecache radix-tree (or page table):
 * this allows allocators to use a synchronize_rcu() to stabilize _count.
 *
 * Unless an RCU grace period has passed, the count of all pages coming out
 * of the allocator must be considered unstable. page_count may return higher
 * than expected, and put_page must be able to do the right thing when the
 * page has been finished with, no matter what it is subsequently allocated
 * for (because put_page is what is used here to drop an invalid speculative
 * reference).
 *
 * This is the interesting part of the lockless pagecache (and lockless
 * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
 * has the following pattern:
 * 1. find page in radix tree
 * 2. conditionally increment refcount
 * 3. check the page is still in pagecache (if no, goto 1)
 *
 * Remove-side that cares about stability of _count (eg. reclaim) has the
 * following (with tree_lock held for write):
 * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
 * B. remove page from pagecache
 * C. free the page
 *
 * There are 2 critical interleavings that matter:
 * - 2 runs before A: in this case, A sees elevated refcount and bails out
 * - A runs before 2: in this case, 2 sees zero refcount and retries;
 *   subsequently, B will complete and 1 will find no page, causing the
 *   lookup to return NULL.
 *
 * It is possible that between 1 and 2, the page is removed then the exact same
 * page is inserted into the same position in pagecache. That's OK: the
 * old find_get_page using tree_lock could equally have run before or after
 * such a re-insertion, depending on order that locks are granted.
 *
 * Lookups racing against pagecache insertion isn't a big problem: either 1
 * will find the page or it will not. Likewise, the old find_get_page could run
 * either before the insertion or afterwards, depending on timing.
 */
static inline int page_cache_get_speculative(struct page *page)
{
	VM_BUG_ON(in_interrupt());

#ifdef CONFIG_TINY_RCU
# ifdef CONFIG_PREEMPT_COUNT
	VM_BUG_ON(!in_atomic());
# endif
	/*
	 * Preempt must be disabled here - we rely on rcu_read_lock doing
	 * this for us.
	 *
	 * Pagecache won't be truncated from interrupt context, so if we have
	 * found a page in the radix tree here, we have pinned its refcount by
	 * disabling preempt, and hence no need for the "speculative get" that
	 * SMP requires.
	 */
	VM_BUG_ON_PAGE(page_count(page) == 0, page);
	page_ref_inc(page);

#else
	if (unlikely(!get_page_unless_zero(page))) {
		/*
		 * Either the page has been freed, or will be freed.
		 * In either case, retry here and the caller should
		 * do the right thing (see comments above).
		 */
		return 0;
	}
#endif
	VM_BUG_ON_PAGE(PageTail(page), page);

	return 1;
}

/*
 * Same as above, but add instead of inc (could just be merged)
 */
static inline int page_cache_add_speculative(struct page *page, int count)
{
	VM_BUG_ON(in_interrupt());

#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
# ifdef CONFIG_PREEMPT_COUNT
	VM_BUG_ON(!in_atomic());
# endif
	VM_BUG_ON_PAGE(page_count(page) == 0, page);
	page_ref_add(page, count);

#else
	if (unlikely(!page_ref_add_unless(page, count, 0)))
		return 0;
#endif
	VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page);

	return 1;
}

#ifdef CONFIG_NUMA
extern struct page *__page_cache_alloc(gfp_t gfp);
#else
static inline struct page *__page_cache_alloc(gfp_t gfp)
{
	return alloc_pages(gfp, 0);
}
#endif

static inline struct page *page_cache_alloc(struct address_space *x)
{
	return __page_cache_alloc(mapping_gfp_mask(x));
}

static inline struct page *page_cache_alloc_cold(struct address_space *x)
{
	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
}

static inline struct page *page_cache_alloc_readahead(struct address_space *x)
{
	return __page_cache_alloc(mapping_gfp_mask(x) |
				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
}

typedef int filler_t(void *, struct page *);

pgoff_t page_cache_next_hole(struct address_space *mapping,
			     pgoff_t index, unsigned long max_scan);
pgoff_t page_cache_prev_hole(struct address_space *mapping,
			     pgoff_t index, unsigned long max_scan);

#define FGP_ACCESSED		0x00000001
#define FGP_LOCK		0x00000002
#define FGP_CREAT		0x00000004
#define FGP_WRITE		0x00000008
#define FGP_NOFS		0x00000010
#define FGP_NOWAIT		0x00000020

struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
		int fgp_flags, gfp_t cache_gfp_mask);

/**
 * find_get_page - find and get a page reference
 * @mapping: the address_space to search
 * @offset: the page index
 *
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned with an increased refcount.
 *
 * Otherwise, %NULL is returned.
 */
static inline struct page *find_get_page(struct address_space *mapping,
					pgoff_t offset)
{
	return pagecache_get_page(mapping, offset, 0, 0);
}

static inline struct page *find_get_page_flags(struct address_space *mapping,
					pgoff_t offset, int fgp_flags)
{
	return pagecache_get_page(mapping, offset, fgp_flags, 0);
}

/**
 * find_lock_page - locate, pin and lock a pagecache page
 * pagecache_get_page - find and get a page reference
 * @mapping: the address_space to search
 * @offset: the page index
 *
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned locked and with an increased
 * refcount.
 *
 * Otherwise, %NULL is returned.
 *
 * find_lock_page() may sleep.
 */
static inline struct page *find_lock_page(struct address_space *mapping,
					pgoff_t offset)
{
	return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
}

/**
 * find_or_create_page - locate or add a pagecache page
 * @mapping: the page's address_space
 * @index: the page's index into the mapping
 * @gfp_mask: page allocation mode
 *
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned locked and with an increased
 * refcount.
 *
 * If the page is not present, a new page is allocated using @gfp_mask
 * and added to the page cache and the VM's LRU list.  The page is
 * returned locked and with an increased refcount.
 *
 * On memory exhaustion, %NULL is returned.
 *
 * find_or_create_page() may sleep, even if @gfp_flags specifies an
 * atomic allocation!
 */
static inline struct page *find_or_create_page(struct address_space *mapping,
					pgoff_t offset, gfp_t gfp_mask)
{
	return pagecache_get_page(mapping, offset,
					FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
					gfp_mask);
}

/**
 * grab_cache_page_nowait - returns locked page at given index in given cache
 * @mapping: target address_space
 * @index: the page index
 *
 * Same as grab_cache_page(), but do not wait if the page is unavailable.
 * This is intended for speculative data generators, where the data can
 * be regenerated if the page couldn't be grabbed.  This routine should
 * be safe to call while holding the lock for another page.
 *
 * Clear __GFP_FS when allocating the page to avoid recursion into the fs
 * and deadlock against the caller's locked page.
 */
static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
				pgoff_t index)
{
	return pagecache_get_page(mapping, index,
			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
			mapping_gfp_mask(mapping));
}

struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
			  unsigned int nr_entries, struct page **entries,
			  pgoff_t *indices);
unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
			unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
			       unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
			int tag, unsigned int nr_pages, struct page **pages);
unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
			int tag, unsigned int nr_entries,
			struct page **entries, pgoff_t *indices);

struct page *grab_cache_page_write_begin(struct address_space *mapping,
			pgoff_t index, unsigned flags);

/*
 * Returns locked page at given index in given cache, creating it if needed.
 */
static inline struct page *grab_cache_page(struct address_space *mapping,
								pgoff_t index)
{
	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
}

extern struct page * read_cache_page(struct address_space *mapping,
				pgoff_t index, filler_t *filler, void *data);
extern struct page * read_cache_page_gfp(struct address_space *mapping,
				pgoff_t index, gfp_t gfp_mask);
extern int read_cache_pages(struct address_space *mapping,
		struct list_head *pages, filler_t *filler, void *data);

static inline struct page *read_mapping_page(struct address_space *mapping,
				pgoff_t index, void *data)
{
	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
	return read_cache_page(mapping, index, filler, data);
}

/*
 * Get the offset in PAGE_SIZE.
 * (TODO: hugepage should have ->index in PAGE_SIZE)
 */
static inline pgoff_t page_to_pgoff(struct page *page)
{
	pgoff_t pgoff;

	if (unlikely(PageHeadHuge(page)))
		return page->index << compound_order(page);

	if (likely(!PageTransTail(page)))
		return page->index;

	/*
	 *  We don't initialize ->index for tail pages: calculate based on
	 *  head page
	 */
	pgoff = compound_head(page)->index;
	pgoff += page - compound_head(page);
	return pgoff;
}

/*
 * Return byte-offset into filesystem object for page.
 */
static inline loff_t page_offset(struct page *page)
{
	return ((loff_t)page->index) << PAGE_SHIFT;
}

static inline loff_t page_file_offset(struct page *page)
{
	return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
}

extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
				     unsigned long address);

static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
					unsigned long address)
{
	pgoff_t pgoff;
	if (unlikely(is_vm_hugetlb_page(vma)))
		return linear_hugepage_index(vma, address);
	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
	pgoff += vma->vm_pgoff;
	return pgoff;
}

extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
				unsigned int flags);
extern void unlock_page(struct page *page);

static inline int trylock_page(struct page *page)
{
	page = compound_head(page);
	return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
}

/*
 * lock_page may only be called if we have the page's inode pinned.
 */
static inline void lock_page(struct page *page)
{
	might_sleep();
	if (!trylock_page(page))
		__lock_page(page);
}

/*
 * lock_page_killable is like lock_page but can be interrupted by fatal
 * signals.  It returns 0 if it locked the page and -EINTR if it was
 * killed while waiting.
 */
static inline int lock_page_killable(struct page *page)
{
	might_sleep();
	if (!trylock_page(page))
		return __lock_page_killable(page);
	return 0;
}

/*
 * lock_page_or_retry - Lock the page, unless this would block and the
 * caller indicated that it can handle a retry.
 *
 * Return value and mmap_sem implications depend on flags; see
 * __lock_page_or_retry().
 */
static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
				     unsigned int flags)
{
	might_sleep();
	return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
}

/*
 * This is exported only for wait_on_page_locked/wait_on_page_writeback,
 * and for filesystems which need to wait on PG_private.
 */
extern void wait_on_page_bit(struct page *page, int bit_nr);

extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
extern int wait_on_page_bit_killable_timeout(struct page *page,
					     int bit_nr, unsigned long timeout);

static inline int wait_on_page_locked_killable(struct page *page)
{
	if (!PageLocked(page))
		return 0;
	return wait_on_page_bit_killable(compound_head(page), PG_locked);
}

extern wait_queue_head_t *page_waitqueue(struct page *page);
static inline void wake_up_page(struct page *page, int bit)
{
	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
}

/* 
 * Wait for a page to be unlocked.
 *
 * This must be called with the caller "holding" the page,
 * ie with increased "page->count" so that the page won't
 * go away during the wait..
 */
static inline void wait_on_page_locked(struct page *page)
{
	if (PageLocked(page))
		wait_on_page_bit(compound_head(page), PG_locked);
}

/* 
 * Wait for a page to complete writeback
 */
static inline void wait_on_page_writeback(struct page *page)
{
	if (PageWriteback(page))
		wait_on_page_bit(page, PG_writeback);
}

extern void end_page_writeback(struct page *page);
void wait_for_stable_page(struct page *page);

void page_endio(struct page *page, int rw, int err);

/*
 * Add an arbitrary waiter to a page's wait queue
 */
extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);

/*
 * Fault a userspace page into pagetables.  Return non-zero on a fault.
 *
 * This assumes that two userspace pages are always sufficient.
 */
static inline int fault_in_pages_writeable(char __user *uaddr, int size)
{
	int ret;

	if (unlikely(size == 0))
		return 0;

	/*
	 * Writing zeroes into userspace here is OK, because we know that if
	 * the zero gets there, we'll be overwriting it.
	 */
	ret = __put_user(0, uaddr);
	if (ret == 0) {
		char __user *end = uaddr + size - 1;

		/*
		 * If the page was already mapped, this will get a cache miss
		 * for sure, so try to avoid doing it.
		 */
		if (((unsigned long)uaddr & PAGE_MASK) !=
				((unsigned long)end & PAGE_MASK))
			ret = __put_user(0, end);
	}
	return ret;
}

static inline int fault_in_pages_readable(const char __user *uaddr, int size)
{
	volatile char c;
	int ret;

	if (unlikely(size == 0))
		return 0;

	ret = __get_user(c, uaddr);
	if (ret == 0) {
		const char __user *end = uaddr + size - 1;

		if (((unsigned long)uaddr & PAGE_MASK) !=
				((unsigned long)end & PAGE_MASK)) {
			ret = __get_user(c, end);
			(void)c;
		}
	}
	return ret;
}

/*
 * Multipage variants of the above prefault helpers, useful if more than
 * PAGE_SIZE of data needs to be prefaulted. These are separate from the above
 * functions (which only handle up to PAGE_SIZE) to avoid clobbering the
 * filemap.c hotpaths.
 */
static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
{
	int ret = 0;
	char __user *end = uaddr + size - 1;

	if (unlikely(size == 0))
		return ret;

	/*
	 * Writing zeroes into userspace here is OK, because we know that if
	 * the zero gets there, we'll be overwriting it.
	 */
	while (uaddr <= end) {
		ret = __put_user(0, uaddr);
		if (ret != 0)
			return ret;
		uaddr += PAGE_SIZE;
	}

	/* Check whether the range spilled into the next page. */
	if (((unsigned long)uaddr & PAGE_MASK) ==
			((unsigned long)end & PAGE_MASK))
		ret = __put_user(0, end);

	return ret;
}

static inline int fault_in_multipages_readable(const char __user *uaddr,
					       int size)
{
	volatile char c;
	int ret = 0;
	const char __user *end = uaddr + size - 1;

	if (unlikely(size == 0))
		return ret;

	while (uaddr <= end) {
		ret = __get_user(c, uaddr);
		if (ret != 0)
			return ret;
		uaddr += PAGE_SIZE;
	}

	/* Check whether the range spilled into the next page. */
	if (((unsigned long)uaddr & PAGE_MASK) ==
			((unsigned long)end & PAGE_MASK)) {
		ret = __get_user(c, end);
		(void)c;
	}

	return ret;
}

int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
				pgoff_t index, gfp_t gfp_mask);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
				pgoff_t index, gfp_t gfp_mask);
extern void delete_from_page_cache(struct page *page);
extern void __delete_from_page_cache(struct page *page, void *shadow);
int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);

/*
 * Like add_to_page_cache_locked, but used to add newly allocated pages:
 * the page is new, so we can just run __SetPageLocked() against it.
 */
static inline int add_to_page_cache(struct page *page,
		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
{
	int error;

	__SetPageLocked(page);
	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
	if (unlikely(error))
		__ClearPageLocked(page);
	return error;
}

static inline unsigned long dir_pages(struct inode *inode)
{
	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
			       PAGE_SHIFT;
}

#endif /* _LINUX_PAGEMAP_H */
Commit	Line	Data
	1	#ifndef _LINUX_PAGEMAP_H
	2	#define _LINUX_PAGEMAP_H
	3
	4	/*
	5	* Copyright 1995 Linus Torvalds
	6	*/
	7	#include <linux/mm.h>
	8	#include <linux/fs.h>
	9	#include <linux/list.h>
	10	#include <linux/highmem.h>
	11	#include <linux/compiler.h>
	12	#include <asm/uaccess.h>
	13	#include <linux/gfp.h>
	14	#include <linux/bitops.h>
	15	#include <linux/hardirq.h> /* for in_interrupt() */
	16	#include <linux/hugetlb_inline.h>
	17
	18	/*
	19	* Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
	20	* allocation mode flags.
	21	*/
	22	enum mapping_flags {
	23	AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */
	24	AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */
	25	AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
	26	AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
	27	AS_EXITING = __GFP_BITS_SHIFT + 4, /* final truncate in progress */
	28	};
	29
	30	static inline void mapping_set_error(struct address_space *mapping, int error)
	31	{
	32	if (unlikely(error)) {
	33	if (error == -ENOSPC)
	34	set_bit(AS_ENOSPC, &mapping->flags);
	35	else
	36	set_bit(AS_EIO, &mapping->flags);
	37	}
	38	}
	39
	40	static inline void mapping_set_unevictable(struct address_space *mapping)
	41	{
	42	set_bit(AS_UNEVICTABLE, &mapping->flags);
	43	}
	44
	45	static inline void mapping_clear_unevictable(struct address_space *mapping)
	46	{
	47	clear_bit(AS_UNEVICTABLE, &mapping->flags);
	48	}
	49
	50	static inline int mapping_unevictable(struct address_space *mapping)
	51	{
	52	if (mapping)
	53	return test_bit(AS_UNEVICTABLE, &mapping->flags);
	54	return !!mapping;
	55	}
	56
	57	static inline void mapping_set_exiting(struct address_space *mapping)
	58	{
	59	set_bit(AS_EXITING, &mapping->flags);
	60	}
	61
	62	static inline int mapping_exiting(struct address_space *mapping)
	63	{
	64	return test_bit(AS_EXITING, &mapping->flags);
	65	}
	66
	67	static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
	68	{
	69	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
	70	}
	71
	72	/* Restricts the given gfp_mask to what the mapping allows. */
	73	static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
	74	gfp_t gfp_mask)
	75	{
	76	return mapping_gfp_mask(mapping) & gfp_mask;
	77	}
	78
	79	/*
	80	* This is non-atomic. Only to be used before the mapping is activated.
	81	* Probably needs a barrier...
	82	*/
	83	static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
	84	{
	85	m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) \|
	86	(__force unsigned long)mask;
	87	}
	88
	89	void release_pages(struct page **pages, int nr, bool cold);
	90
	91	/*
	92	* speculatively take a reference to a page.
	93	* If the page is free (_count == 0), then _count is untouched, and 0
	94	* is returned. Otherwise, _count is incremented by 1 and 1 is returned.
	95	*
	96	* This function must be called inside the same rcu_read_lock() section as has
	97	* been used to lookup the page in the pagecache radix-tree (or page table):
	98	* this allows allocators to use a synchronize_rcu() to stabilize _count.
	99	*
	100	* Unless an RCU grace period has passed, the count of all pages coming out
	101	* of the allocator must be considered unstable. page_count may return higher
	102	* than expected, and put_page must be able to do the right thing when the
	103	* page has been finished with, no matter what it is subsequently allocated
	104	* for (because put_page is what is used here to drop an invalid speculative
	105	* reference).
	106	*
	107	* This is the interesting part of the lockless pagecache (and lockless
	108	* get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
	109	* has the following pattern:
	110	* 1. find page in radix tree
	111	* 2. conditionally increment refcount
	112	* 3. check the page is still in pagecache (if no, goto 1)
	113	*
	114	* Remove-side that cares about stability of _count (eg. reclaim) has the
	115	* following (with tree_lock held for write):
	116	* A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
	117	* B. remove page from pagecache
	118	* C. free the page
	119	*
	120	* There are 2 critical interleavings that matter:
	121	* - 2 runs before A: in this case, A sees elevated refcount and bails out
	122	* - A runs before 2: in this case, 2 sees zero refcount and retries;
	123	* subsequently, B will complete and 1 will find no page, causing the
	124	* lookup to return NULL.
	125	*
	126	* It is possible that between 1 and 2, the page is removed then the exact same
	127	* page is inserted into the same position in pagecache. That's OK: the
	128	* old find_get_page using tree_lock could equally have run before or after
	129	* such a re-insertion, depending on order that locks are granted.
	130	*
	131	* Lookups racing against pagecache insertion isn't a big problem: either 1
	132	* will find the page or it will not. Likewise, the old find_get_page could run
	133	* either before the insertion or afterwards, depending on timing.
	134	*/
	135	static inline int page_cache_get_speculative(struct page *page)
	136	{
	137	VM_BUG_ON(in_interrupt());
	138
	139	#ifdef CONFIG_TINY_RCU
	140	# ifdef CONFIG_PREEMPT_COUNT
	141	VM_BUG_ON(!in_atomic());
	142	# endif
	143	/*
	144	* Preempt must be disabled here - we rely on rcu_read_lock doing
	145	* this for us.
	146	*
	147	* Pagecache won't be truncated from interrupt context, so if we have
	148	* found a page in the radix tree here, we have pinned its refcount by
	149	* disabling preempt, and hence no need for the "speculative get" that
	150	* SMP requires.
	151	*/
	152	VM_BUG_ON_PAGE(page_count(page) == 0, page);
	153	page_ref_inc(page);
	154
	155	#else
	156	if (unlikely(!get_page_unless_zero(page))) {
	157	/*
	158	* Either the page has been freed, or will be freed.
	159	* In either case, retry here and the caller should
	160	* do the right thing (see comments above).
	161	*/
	162	return 0;
	163	}
	164	#endif
	165	VM_BUG_ON_PAGE(PageTail(page), page);
	166
	167	return 1;
	168	}
	169
	170	/*
	171	* Same as above, but add instead of inc (could just be merged)
	172	*/
	173	static inline int page_cache_add_speculative(struct page *page, int count)
	174	{
	175	VM_BUG_ON(in_interrupt());
	176
	177	#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
	178	# ifdef CONFIG_PREEMPT_COUNT
	179	VM_BUG_ON(!in_atomic());
	180	# endif
	181	VM_BUG_ON_PAGE(page_count(page) == 0, page);
	182	page_ref_add(page, count);
	183
	184	#else
	185	if (unlikely(!page_ref_add_unless(page, count, 0)))
	186	return 0;
	187	#endif
	188	VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page);
	189
	190	return 1;
	191	}
	192
	193	#ifdef CONFIG_NUMA
	194	extern struct page *__page_cache_alloc(gfp_t gfp);
	195	#else
	196	static inline struct page *__page_cache_alloc(gfp_t gfp)
	197	{
	198	return alloc_pages(gfp, 0);
	199	}
	200	#endif
	201
	202	static inline struct page page_cache_alloc(struct address_space x)
	203	{
	204	return __page_cache_alloc(mapping_gfp_mask(x));
	205	}
	206
	207	static inline struct page page_cache_alloc_cold(struct address_space x)
	208	{
	209	return __page_cache_alloc(mapping_gfp_mask(x)\|__GFP_COLD);
	210	}
	211
	212	static inline struct page page_cache_alloc_readahead(struct address_space x)
	213	{
	214	return __page_cache_alloc(mapping_gfp_mask(x) \|
	215	__GFP_COLD \| __GFP_NORETRY \| __GFP_NOWARN);
	216	}
	217
	218	typedef int filler_t(void , struct page );
	219
	220	pgoff_t page_cache_next_hole(struct address_space *mapping,
	221	pgoff_t index, unsigned long max_scan);
	222	pgoff_t page_cache_prev_hole(struct address_space *mapping,
	223	pgoff_t index, unsigned long max_scan);
	224
	225	#define FGP_ACCESSED 0x00000001
	226	#define FGP_LOCK 0x00000002
	227	#define FGP_CREAT 0x00000004
	228	#define FGP_WRITE 0x00000008
	229	#define FGP_NOFS 0x00000010
	230	#define FGP_NOWAIT 0x00000020
	231
	232	struct page pagecache_get_page(struct address_space mapping, pgoff_t offset,
	233	int fgp_flags, gfp_t cache_gfp_mask);
	234
	235	/**
	236	* find_get_page - find and get a page reference
	237	* @mapping: the address_space to search
	238	* @offset: the page index
	239	*
	240	* Looks up the page cache slot at @mapping & @offset. If there is a
	241	* page cache page, it is returned with an increased refcount.
	242	*
	243	* Otherwise, %NULL is returned.
	244	*/
	245	static inline struct page find_get_page(struct address_space mapping,
	246	pgoff_t offset)
	247	{
	248	return pagecache_get_page(mapping, offset, 0, 0);
	249	}
	250
	251	static inline struct page find_get_page_flags(struct address_space mapping,
	252	pgoff_t offset, int fgp_flags)
	253	{
	254	return pagecache_get_page(mapping, offset, fgp_flags, 0);
	255	}
	256
	257	/**
	258	* find_lock_page - locate, pin and lock a pagecache page
	259	* pagecache_get_page - find and get a page reference
	260	* @mapping: the address_space to search
	261	* @offset: the page index
	262	*
	263	* Looks up the page cache slot at @mapping & @offset. If there is a
	264	* page cache page, it is returned locked and with an increased
	265	* refcount.
	266	*
	267	* Otherwise, %NULL is returned.
	268	*
	269	* find_lock_page() may sleep.
	270	*/
	271	static inline struct page find_lock_page(struct address_space mapping,
	272	pgoff_t offset)
	273	{
	274	return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
	275	}
	276
	277	/**
	278	* find_or_create_page - locate or add a pagecache page
	279	* @mapping: the page's address_space
	280	* @index: the page's index into the mapping
	281	* @gfp_mask: page allocation mode
	282	*
	283	* Looks up the page cache slot at @mapping & @offset. If there is a
	284	* page cache page, it is returned locked and with an increased
	285	* refcount.
	286	*
	287	* If the page is not present, a new page is allocated using @gfp_mask
	288	* and added to the page cache and the VM's LRU list. The page is
	289	* returned locked and with an increased refcount.
	290	*
	291	* On memory exhaustion, %NULL is returned.
	292	*
	293	* find_or_create_page() may sleep, even if @gfp_flags specifies an
	294	* atomic allocation!
	295	*/
	296	static inline struct page find_or_create_page(struct address_space mapping,
	297	pgoff_t offset, gfp_t gfp_mask)
	298	{
	299	return pagecache_get_page(mapping, offset,
	300	FGP_LOCK\|FGP_ACCESSED\|FGP_CREAT,
	301	gfp_mask);
	302	}
	303
	304	/**
	305	* grab_cache_page_nowait - returns locked page at given index in given cache
	306	* @mapping: target address_space
	307	* @index: the page index
	308	*
	309	* Same as grab_cache_page(), but do not wait if the page is unavailable.
	310	* This is intended for speculative data generators, where the data can
	311	* be regenerated if the page couldn't be grabbed. This routine should
	312	* be safe to call while holding the lock for another page.
	313	*
	314	* Clear __GFP_FS when allocating the page to avoid recursion into the fs
	315	* and deadlock against the caller's locked page.
	316	*/
	317	static inline struct page grab_cache_page_nowait(struct address_space mapping,
	318	pgoff_t index)
	319	{
	320	return pagecache_get_page(mapping, index,
	321	FGP_LOCK\|FGP_CREAT\|FGP_NOFS\|FGP_NOWAIT,
	322	mapping_gfp_mask(mapping));
	323	}
	324
	325	struct page find_get_entry(struct address_space mapping, pgoff_t offset);
	326	struct page find_lock_entry(struct address_space mapping, pgoff_t offset);
	327	unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
	328	unsigned int nr_entries, struct page **entries,
	329	pgoff_t *indices);
	330	unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
	331	unsigned int nr_pages, struct page **pages);
	332	unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
	333	unsigned int nr_pages, struct page **pages);
	334	unsigned find_get_pages_tag(struct address_space mapping, pgoff_t index,
	335	int tag, unsigned int nr_pages, struct page **pages);
	336	unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
	337	int tag, unsigned int nr_entries,
	338	struct page *entries, pgoff_t indices);
	339
	340	struct page grab_cache_page_write_begin(struct address_space mapping,
	341	pgoff_t index, unsigned flags);
	342
	343	/*
	344	* Returns locked page at given index in given cache, creating it if needed.
	345	*/
	346	static inline struct page grab_cache_page(struct address_space mapping,
	347	pgoff_t index)
	348	{
	349	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
	350	}
	351
	352	extern struct page * read_cache_page(struct address_space *mapping,
	353	pgoff_t index, filler_t filler, void data);
	354	extern struct page * read_cache_page_gfp(struct address_space *mapping,
	355	pgoff_t index, gfp_t gfp_mask);
	356	extern int read_cache_pages(struct address_space *mapping,
	357	struct list_head pages, filler_t filler, void *data);
	358
	359	static inline struct page read_mapping_page(struct address_space mapping,
	360	pgoff_t index, void *data)
	361	{
	362	filler_t filler = (filler_t )mapping->a_ops->readpage;
	363	return read_cache_page(mapping, index, filler, data);
	364	}
	365
	366	/*
	367	* Get the offset in PAGE_SIZE.
	368	* (TODO: hugepage should have ->index in PAGE_SIZE)
	369	*/
	370	static inline pgoff_t page_to_pgoff(struct page *page)
	371	{
	372	pgoff_t pgoff;
	373
	374	if (unlikely(PageHeadHuge(page)))
	375	return page->index << compound_order(page);
	376
	377	if (likely(!PageTransTail(page)))
	378	return page->index;
	379
	380	/*
	381	* We don't initialize ->index for tail pages: calculate based on
	382	* head page
	383	*/
	384	pgoff = compound_head(page)->index;
	385	pgoff += page - compound_head(page);
	386	return pgoff;
	387	}
	388
	389	/*
	390	* Return byte-offset into filesystem object for page.
	391	*/
	392	static inline loff_t page_offset(struct page *page)
	393	{
	394	return ((loff_t)page->index) << PAGE_SHIFT;
	395	}
	396
	397	static inline loff_t page_file_offset(struct page *page)
	398	{
	399	return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
	400	}
	401
	402	extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
	403	unsigned long address);
	404
	405	static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
	406	unsigned long address)
	407	{
	408	pgoff_t pgoff;
	409	if (unlikely(is_vm_hugetlb_page(vma)))
	410	return linear_hugepage_index(vma, address);
	411	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
	412	pgoff += vma->vm_pgoff;
	413	return pgoff;
	414	}
	415
	416	extern void __lock_page(struct page *page);
	417	extern int __lock_page_killable(struct page *page);
	418	extern int __lock_page_or_retry(struct page page, struct mm_struct mm,
	419	unsigned int flags);
	420	extern void unlock_page(struct page *page);
	421
	422	static inline int trylock_page(struct page *page)
	423	{
	424	page = compound_head(page);
	425	return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
	426	}
	427
	428	/*
	429	* lock_page may only be called if we have the page's inode pinned.
	430	*/
	431	static inline void lock_page(struct page *page)
	432	{
	433	might_sleep();
	434	if (!trylock_page(page))
	435	__lock_page(page);
	436	}
	437
	438	/*
	439	* lock_page_killable is like lock_page but can be interrupted by fatal
	440	* signals. It returns 0 if it locked the page and -EINTR if it was
	441	* killed while waiting.
	442	*/
	443	static inline int lock_page_killable(struct page *page)
	444	{
	445	might_sleep();
	446	if (!trylock_page(page))
	447	return __lock_page_killable(page);
	448	return 0;
	449	}
	450
	451	/*
	452	* lock_page_or_retry - Lock the page, unless this would block and the
	453	* caller indicated that it can handle a retry.
	454	*
	455	* Return value and mmap_sem implications depend on flags; see
	456	* __lock_page_or_retry().
	457	*/
	458	static inline int lock_page_or_retry(struct page page, struct mm_struct mm,
	459	unsigned int flags)
	460	{
	461	might_sleep();
	462	return trylock_page(page) \|\| __lock_page_or_retry(page, mm, flags);
	463	}
	464
	465	/*
	466	* This is exported only for wait_on_page_locked/wait_on_page_writeback,
	467	* and for filesystems which need to wait on PG_private.
	468	*/
	469	extern void wait_on_page_bit(struct page *page, int bit_nr);
	470
	471	extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
	472	extern int wait_on_page_bit_killable_timeout(struct page *page,
	473	int bit_nr, unsigned long timeout);
	474
	475	static inline int wait_on_page_locked_killable(struct page *page)
	476	{
	477	if (!PageLocked(page))
	478	return 0;
	479	return wait_on_page_bit_killable(compound_head(page), PG_locked);
	480	}
	481
	482	extern wait_queue_head_t page_waitqueue(struct page page);
	483	static inline void wake_up_page(struct page *page, int bit)
	484	{
	485	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
	486	}
	487
	488	/*
	489	* Wait for a page to be unlocked.
	490	*
	491	* This must be called with the caller "holding" the page,
	492	* ie with increased "page->count" so that the page won't
	493	* go away during the wait..
	494	*/
	495	static inline void wait_on_page_locked(struct page *page)
	496	{
	497	if (PageLocked(page))
	498	wait_on_page_bit(compound_head(page), PG_locked);
	499	}
	500
	501	/*
	502	* Wait for a page to complete writeback
	503	*/
	504	static inline void wait_on_page_writeback(struct page *page)
	505	{
	506	if (PageWriteback(page))
	507	wait_on_page_bit(page, PG_writeback);
	508	}
	509
	510	extern void end_page_writeback(struct page *page);
	511	void wait_for_stable_page(struct page *page);
	512
	513	void page_endio(struct page *page, int rw, int err);
	514
	515	/*
	516	* Add an arbitrary waiter to a page's wait queue
	517	*/
	518	extern void add_page_wait_queue(struct page page, wait_queue_t waiter);
	519
	520	/*
	521	* Fault a userspace page into pagetables. Return non-zero on a fault.
	522	*
	523	* This assumes that two userspace pages are always sufficient.
	524	*/
	525	static inline int fault_in_pages_writeable(char __user *uaddr, int size)
	526	{
	527	int ret;
	528
	529	if (unlikely(size == 0))
	530	return 0;
	531
	532	/*
	533	* Writing zeroes into userspace here is OK, because we know that if
	534	* the zero gets there, we'll be overwriting it.
	535	*/
	536	ret = __put_user(0, uaddr);
	537	if (ret == 0) {
	538	char __user *end = uaddr + size - 1;
	539
	540	/*
	541	* If the page was already mapped, this will get a cache miss
	542	* for sure, so try to avoid doing it.
	543	*/
	544	if (((unsigned long)uaddr & PAGE_MASK) !=
	545	((unsigned long)end & PAGE_MASK))
	546	ret = __put_user(0, end);
	547	}
	548	return ret;
	549	}
	550
	551	static inline int fault_in_pages_readable(const char __user *uaddr, int size)
	552	{
	553	volatile char c;
	554	int ret;
	555
	556	if (unlikely(size == 0))
	557	return 0;
	558
	559	ret = __get_user(c, uaddr);
	560	if (ret == 0) {
	561	const char __user *end = uaddr + size - 1;
	562
	563	if (((unsigned long)uaddr & PAGE_MASK) !=
	564	((unsigned long)end & PAGE_MASK)) {
	565	ret = __get_user(c, end);
	566	(void)c;
	567	}
	568	}
	569	return ret;
	570	}
	571
	572	/*
	573	* Multipage variants of the above prefault helpers, useful if more than
	574	* PAGE_SIZE of data needs to be prefaulted. These are separate from the above
	575	* functions (which only handle up to PAGE_SIZE) to avoid clobbering the
	576	* filemap.c hotpaths.
	577	*/
	578	static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
	579	{
	580	int ret = 0;
	581	char __user *end = uaddr + size - 1;
	582
	583	if (unlikely(size == 0))
	584	return ret;
	585
	586	/*
	587	* Writing zeroes into userspace here is OK, because we know that if
	588	* the zero gets there, we'll be overwriting it.
	589	*/
	590	while (uaddr <= end) {
	591	ret = __put_user(0, uaddr);
	592	if (ret != 0)
	593	return ret;
	594	uaddr += PAGE_SIZE;
	595	}
	596
	597	/* Check whether the range spilled into the next page. */
	598	if (((unsigned long)uaddr & PAGE_MASK) ==
	599	((unsigned long)end & PAGE_MASK))
	600	ret = __put_user(0, end);
	601
	602	return ret;
	603	}
	604
	605	static inline int fault_in_multipages_readable(const char __user *uaddr,
	606	int size)
	607	{
	608	volatile char c;
	609	int ret = 0;
	610	const char __user *end = uaddr + size - 1;
	611
	612	if (unlikely(size == 0))
	613	return ret;
	614
	615	while (uaddr <= end) {
	616	ret = __get_user(c, uaddr);
	617	if (ret != 0)
	618	return ret;
	619	uaddr += PAGE_SIZE;
	620	}
	621
	622	/* Check whether the range spilled into the next page. */
	623	if (((unsigned long)uaddr & PAGE_MASK) ==
	624	((unsigned long)end & PAGE_MASK)) {
	625	ret = __get_user(c, end);
	626	(void)c;
	627	}
	628
	629	return ret;
	630	}
	631
	632	int add_to_page_cache_locked(struct page page, struct address_space mapping,
	633	pgoff_t index, gfp_t gfp_mask);
	634	int add_to_page_cache_lru(struct page page, struct address_space mapping,
	635	pgoff_t index, gfp_t gfp_mask);
	636	extern void delete_from_page_cache(struct page *page);
	637	extern void __delete_from_page_cache(struct page page, void shadow);
	638	int replace_page_cache_page(struct page old, struct page new, gfp_t gfp_mask);
	639
	640	/*
	641	* Like add_to_page_cache_locked, but used to add newly allocated pages:
	642	* the page is new, so we can just run __SetPageLocked() against it.
	643	*/
	644	static inline int add_to_page_cache(struct page *page,
	645	struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
	646	{
	647	int error;
	648
	649	__SetPageLocked(page);
	650	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
	651	if (unlikely(error))
	652	__ClearPageLocked(page);
	653	return error;
	654	}
	655
	656	static inline unsigned long dir_pages(struct inode *inode)
	657	{
	658	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
	659	PAGE_SHIFT;
	660	}
	661
	662	#endif /* _LINUX_PAGEMAP_H */