[deliverable/linux.git] / include / asm-generic / pgtable.h

#ifndef _ASM_GENERIC_PGTABLE_H
#define _ASM_GENERIC_PGTABLE_H

#ifndef __ASSEMBLY__

#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
/*
 * Largely same as above, but only sets the access flags (dirty,
 * accessed, and writable). Furthermore, we know it always gets set
 * to a "more permissive" setting, which allows most architectures
 * to optimize this. We return whether the PTE actually changed, which
 * in turn instructs the caller to do things like update__mmu_cache.
 * This used to be done in the caller, but sparc needs minor faults to
 * force that call on sun4c so we changed this macro slightly
 */
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
({									  \
	int __changed = !pte_same(*(__ptep), __entry);			  \
	if (__changed) {						  \
		set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
		flush_tlb_page(__vma, __address);			  \
	}								  \
	__changed;							  \
})
#endif

#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define ptep_test_and_clear_young(__vma, __address, __ptep)		\
({									\
	pte_t __pte = *(__ptep);					\
	int r = 1;							\
	if (!pte_young(__pte))						\
		r = 0;							\
	else								\
		set_pte_at((__vma)->vm_mm, (__address),			\
			   (__ptep), pte_mkold(__pte));			\
	r;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep)		\
({									\
	int __young;							\
	__young = ptep_test_and_clear_young(__vma, __address, __ptep);	\
	if (__young)							\
		flush_tlb_page(__vma, __address);			\
	__young;							\
})
#endif

#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
#define ptep_test_and_clear_dirty(__vma, __address, __ptep)		\
({									\
	pte_t __pte = *__ptep;						\
	int r = 1;							\
	if (!pte_dirty(__pte))						\
		r = 0;							\
	else								\
		set_pte_at((__vma)->vm_mm, (__address), (__ptep),	\
			   pte_mkclean(__pte));				\
	r;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
#define ptep_clear_flush_dirty(__vma, __address, __ptep)		\
({									\
	int __dirty;							\
	__dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);	\
	if (__dirty)							\
		flush_tlb_page(__vma, __address);			\
	__dirty;							\
})
#endif

#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define ptep_get_and_clear(__mm, __address, __ptep)			\
({									\
	pte_t __pte = *(__ptep);					\
	pte_clear((__mm), (__address), (__ptep));			\
	__pte;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
#define ptep_get_and_clear_full(__mm, __address, __ptep, __full)	\
({									\
	pte_t __pte;							\
	__pte = ptep_get_and_clear((__mm), (__address), (__ptep));	\
	__pte;								\
})
#endif

/*
 * Some architectures may be able to avoid expensive synchronization
 * primitives when modifications are made to PTE's which are already
 * not present, or in the process of an address space destruction.
 */
#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
#define pte_clear_not_present_full(__mm, __address, __ptep, __full)	\
do {									\
	pte_clear((__mm), (__address), (__ptep));			\
} while (0)
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define ptep_clear_flush(__vma, __address, __ptep)			\
({									\
	pte_t __pte;							\
	__pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);	\
	flush_tlb_page(__vma, __address);				\
	__pte;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
	pte_t old_pte = *ptep;
	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif

#ifndef __HAVE_ARCH_PTE_SAME
#define pte_same(A,B)	(pte_val(A) == pte_val(B))
#endif

#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
#define page_test_dirty(page)		(0)
#endif

#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY
#define page_clear_dirty(page)		do { } while (0)
#endif

#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
#define pte_maybe_dirty(pte)		pte_dirty(pte)
#else
#define pte_maybe_dirty(pte)		(1)
#endif

#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
#define page_test_and_clear_young(page) (0)
#endif

#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
#define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
#endif

#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
#define lazy_mmu_prot_update(pte)	do { } while (0)
#endif

#ifndef __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)	(pte)
#endif

/*
 * A facility to provide lazy MMU batching.  This allows PTE updates and
 * page invalidations to be delayed until a call to leave lazy MMU mode
 * is issued.  Some architectures may benefit from doing this, and it is
 * beneficial for both shadow and direct mode hypervisors, which may batch
 * the PTE updates which happen during this window.  Note that using this
 * interface requires that read hazards be removed from the code.  A read
 * hazard could result in the direct mode hypervisor case, since the actual
 * write to the page tables may not yet have taken place, so reads though
 * a raw PTE pointer after it has been modified are not guaranteed to be
 * up to date.  This mode can only be entered and left under the protection of
 * the page table locks for all page tables which may be modified.  In the UP
 * case, this is required so that preemption is disabled, and in the SMP case,
 * it must synchronize the delayed page table writes properly on other CPUs.
 */
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
#define arch_enter_lazy_mmu_mode()	do {} while (0)
#define arch_leave_lazy_mmu_mode()	do {} while (0)
#define arch_flush_lazy_mmu_mode()	do {} while (0)
#endif

/*
 * A facility to provide batching of the reload of page tables with the
 * actual context switch code for paravirtualized guests.  By convention,
 * only one of the lazy modes (CPU, MMU) should be active at any given
 * time, entry should never be nested, and entry and exits should always
 * be paired.  This is for sanity of maintaining and reasoning about the
 * kernel code.
 */
#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
#define arch_enter_lazy_cpu_mode()	do {} while (0)
#define arch_leave_lazy_cpu_mode()	do {} while (0)
#define arch_flush_lazy_cpu_mode()	do {} while (0)
#endif

/*
 * When walking page tables, get the address of the next boundary,
 * or the end address of the range if that comes earlier.  Although no
 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 */

#define pgd_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})

#ifndef pud_addr_end
#define pud_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})
#endif

#ifndef pmd_addr_end
#define pmd_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})
#endif

/*
 * When walking page tables, we usually want to skip any p?d_none entries;
 * and any p?d_bad entries - reporting the error before resetting to none.
 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 */
void pgd_clear_bad(pgd_t *);
void pud_clear_bad(pud_t *);
void pmd_clear_bad(pmd_t *);

static inline int pgd_none_or_clear_bad(pgd_t *pgd)
{
	if (pgd_none(*pgd))
		return 1;
	if (unlikely(pgd_bad(*pgd))) {
		pgd_clear_bad(pgd);
		return 1;
	}
	return 0;
}

static inline int pud_none_or_clear_bad(pud_t *pud)
{
	if (pud_none(*pud))
		return 1;
	if (unlikely(pud_bad(*pud))) {
		pud_clear_bad(pud);
		return 1;
	}
	return 0;
}

static inline int pmd_none_or_clear_bad(pmd_t *pmd)
{
	if (pmd_none(*pmd))
		return 1;
	if (unlikely(pmd_bad(*pmd))) {
		pmd_clear_bad(pmd);
		return 1;
	}
	return 0;
}
#endif /* !__ASSEMBLY__ */

#endif /* _ASM_GENERIC_PGTABLE_H */
Commit	Line	Data
1da177e4 LT	1	#ifndef _ASM_GENERIC_PGTABLE_H
	2	#define _ASM_GENERIC_PGTABLE_H
	3
673eae82 RR	4	#ifndef __ASSEMBLY__
673eae82 RR	5
1da177e4 LT	6	#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
	7	/*
	8	* Largely same as above, but only sets the access flags (dirty,
	9	* accessed, and writable). Furthermore, we know it always gets set
	10	* to a "more permissive" setting, which allows most architectures
8dab5241 BH	11	* to optimize this. We return whether the PTE actually changed, which
	12	* in turn instructs the caller to do things like update__mmu_cache.
	13	* This used to be done in the caller, but sparc needs minor faults to
	14	* force that call on sun4c so we changed this macro slightly
1da177e4 LT	15	*/
1da177e4 LT	16	#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
8dab5241 BH	17	({ \
	18	int __changed = !pte_same(*(__ptep), __entry); \
	19	if (__changed) { \
	20	set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
	21	flush_tlb_page(__vma, __address); \
	22	} \
	23	__changed; \
	24	})
1da177e4 LT	25	#endif
	26
	27	#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
	28	#define ptep_test_and_clear_young(__vma, __address, __ptep) \
	29	({ \
	30	pte_t __pte = *(__ptep); \
	31	int r = 1; \
	32	if (!pte_young(__pte)) \
	33	r = 0; \
	34	else \
	35	set_pte_at((__vma)->vm_mm, (__address), \
	36	(__ptep), pte_mkold(__pte)); \
	37	r; \
	38	})
	39	#endif
	40
	41	#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
	42	#define ptep_clear_flush_young(__vma, __address, __ptep) \
	43	({ \
	44	int __young; \
	45	__young = ptep_test_and_clear_young(__vma, __address, __ptep); \
	46	if (__young) \
	47	flush_tlb_page(__vma, __address); \
	48	__young; \
	49	})
	50	#endif
	51
	52	#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
	53	#define ptep_test_and_clear_dirty(__vma, __address, __ptep) \
	54	({ \
	55	pte_t __pte = *__ptep; \
	56	int r = 1; \
	57	if (!pte_dirty(__pte)) \
	58	r = 0; \
	59	else \
	60	set_pte_at((__vma)->vm_mm, (__address), (__ptep), \
	61	pte_mkclean(__pte)); \
	62	r; \
	63	})
	64	#endif
	65
	66	#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
	67	#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
	68	({ \
	69	int __dirty; \
	70	__dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep); \
	71	if (__dirty) \
	72	flush_tlb_page(__vma, __address); \
	73	__dirty; \
	74	})
	75	#endif
	76
	77	#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
	78	#define ptep_get_and_clear(__mm, __address, __ptep) \
	79	({ \
	80	pte_t __pte = *(__ptep); \
	81	pte_clear((__mm), (__address), (__ptep)); \
	82	__pte; \
	83	})
	84	#endif
	85
a600388d ZA	86	#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
	87	#define ptep_get_and_clear_full(__mm, __address, __ptep, __full) \
	88	({ \
	89	pte_t __pte; \
	90	__pte = ptep_get_and_clear((__mm), (__address), (__ptep)); \
	91	__pte; \
	92	})
	93	#endif
	94
9888a1ca ZA	95	/*
	96	* Some architectures may be able to avoid expensive synchronization
	97	* primitives when modifications are made to PTE's which are already
	98	* not present, or in the process of an address space destruction.
	99	*/
	100	#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
	101	#define pte_clear_not_present_full(__mm, __address, __ptep, __full) \
a600388d ZA	102	do { \
	103	pte_clear((__mm), (__address), (__ptep)); \
	104	} while (0)
	105	#endif
	106
1da177e4 LT	107	#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
	108	#define ptep_clear_flush(__vma, __address, __ptep) \
	109	({ \
	110	pte_t __pte; \
	111	__pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \
	112	flush_tlb_page(__vma, __address); \
	113	__pte; \
	114	})
	115	#endif
	116
	117	#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
8c65b4a6	118	struct mm_struct;
1da177e4 LT	119	static inline void ptep_set_wrprotect(struct mm_struct mm, unsigned long address, pte_t ptep)
	120	{
	121	pte_t old_pte = *ptep;
	122	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
	123	}
	124	#endif
	125
	126	#ifndef __HAVE_ARCH_PTE_SAME
	127	#define pte_same(A,B) (pte_val(A) == pte_val(B))
	128	#endif
	129
6c210482 MS	130	#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
	131	#define page_test_dirty(page) (0)
	132	#endif
	133
	134	#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY
	135	#define page_clear_dirty(page) do { } while (0)
	136	#endif
	137
	138	#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
b4955ce3 AK	139	#define pte_maybe_dirty(pte) pte_dirty(pte)
	140	#else
	141	#define pte_maybe_dirty(pte) (1)
1da177e4 LT	142	#endif
	143
	144	#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
	145	#define page_test_and_clear_young(page) (0)
	146	#endif
	147
	148	#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
	149	#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
	150	#endif
	151
	152	#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
	153	#define lazy_mmu_prot_update(pte) do { } while (0)
	154	#endif
	155
0b0968a3	156	#ifndef __HAVE_ARCH_MOVE_PTE
8b1f3124	157	#define move_pte(pte, prot, old_addr, new_addr) (pte)
8b1f3124 NP	158	#endif
8b1f3124 NP	159
6606c3e0 ZA	160	/*
	161	* A facility to provide lazy MMU batching. This allows PTE updates and
	162	* page invalidations to be delayed until a call to leave lazy MMU mode
	163	* is issued. Some architectures may benefit from doing this, and it is
	164	* beneficial for both shadow and direct mode hypervisors, which may batch
	165	* the PTE updates which happen during this window. Note that using this
	166	* interface requires that read hazards be removed from the code. A read
	167	* hazard could result in the direct mode hypervisor case, since the actual
	168	* write to the page tables may not yet have taken place, so reads though
	169	* a raw PTE pointer after it has been modified are not guaranteed to be
	170	* up to date. This mode can only be entered and left under the protection of
	171	* the page table locks for all page tables which may be modified. In the UP
	172	* case, this is required so that preemption is disabled, and in the SMP case,
	173	* it must synchronize the delayed page table writes properly on other CPUs.
	174	*/
	175	#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
	176	#define arch_enter_lazy_mmu_mode() do {} while (0)
	177	#define arch_leave_lazy_mmu_mode() do {} while (0)
49f19710	178	#define arch_flush_lazy_mmu_mode() do {} while (0)
6606c3e0 ZA	179	#endif
6606c3e0 ZA	180
9226d125 ZA	181	/*
	182	* A facility to provide batching of the reload of page tables with the
	183	* actual context switch code for paravirtualized guests. By convention,
	184	* only one of the lazy modes (CPU, MMU) should be active at any given
	185	* time, entry should never be nested, and entry and exits should always
	186	* be paired. This is for sanity of maintaining and reasoning about the
	187	* kernel code.
	188	*/
	189	#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
	190	#define arch_enter_lazy_cpu_mode() do {} while (0)
	191	#define arch_leave_lazy_cpu_mode() do {} while (0)
49f19710	192	#define arch_flush_lazy_cpu_mode() do {} while (0)
9226d125 ZA	193	#endif
9226d125 ZA	194
1da177e4	195	/*
8f6c99c1 HD	196	* When walking page tables, get the address of the next boundary,
	197	* or the end address of the range if that comes earlier. Although no
	198	* vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
1da177e4 LT	199	*/
1da177e4 LT	200
1da177e4 LT	201	#define pgd_addr_end(addr, end) \
	202	({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
	203	(__boundary - 1 < (end) - 1)? __boundary: (end); \
	204	})
1da177e4 LT	205
	206	#ifndef pud_addr_end
	207	#define pud_addr_end(addr, end) \
	208	({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
	209	(__boundary - 1 < (end) - 1)? __boundary: (end); \
	210	})
	211	#endif
	212
	213	#ifndef pmd_addr_end
	214	#define pmd_addr_end(addr, end) \
	215	({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
	216	(__boundary - 1 < (end) - 1)? __boundary: (end); \
	217	})
	218	#endif
	219
1da177e4 LT	220	/*
	221	* When walking page tables, we usually want to skip any p?d_none entries;
	222	* and any p?d_bad entries - reporting the error before resetting to none.
	223	* Do the tests inline, but report and clear the bad entry in mm/memory.c.
	224	*/
	225	void pgd_clear_bad(pgd_t *);
	226	void pud_clear_bad(pud_t *);
	227	void pmd_clear_bad(pmd_t *);
	228
	229	static inline int pgd_none_or_clear_bad(pgd_t *pgd)
	230	{
	231	if (pgd_none(*pgd))
	232	return 1;
	233	if (unlikely(pgd_bad(*pgd))) {
	234	pgd_clear_bad(pgd);
	235	return 1;
	236	}
	237	return 0;
	238	}
	239
	240	static inline int pud_none_or_clear_bad(pud_t *pud)
	241	{
	242	if (pud_none(*pud))
	243	return 1;
	244	if (unlikely(pud_bad(*pud))) {
	245	pud_clear_bad(pud);
	246	return 1;
	247	}
	248	return 0;
	249	}
	250
	251	static inline int pmd_none_or_clear_bad(pmd_t *pmd)
	252	{
	253	if (pmd_none(*pmd))
	254	return 1;
	255	if (unlikely(pmd_bad(*pmd))) {
	256	pmd_clear_bad(pmd);
	257	return 1;
	258	}
	259	return 0;
	260	}
	261	#endif /* !__ASSEMBLY__ */
	262
	263	#endif /* _ASM_GENERIC_PGTABLE_H */