2 * Copyright IBM Corp. 2007, 2011
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/spinlock.h>
14 #include <linux/rcupdate.h>
15 #include <linux/slab.h>
16 #include <linux/swapops.h>
17 #include <linux/sysctl.h>
18 #include <linux/ksm.h>
19 #include <linux/mman.h>
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
27 static inline pte_t
ptep_flush_direct(struct mm_struct
*mm
,
28 unsigned long addr
, pte_t
*ptep
)
34 if (unlikely(pte_val(old
) & _PAGE_INVALID
))
36 active
= (mm
== current
->active_mm
) ? 1 : 0;
37 count
= atomic_add_return(0x10000, &mm
->context
.attach_count
);
38 if (MACHINE_HAS_TLB_LC
&& (count
& 0xffff) <= active
&&
39 cpumask_equal(mm_cpumask(mm
), cpumask_of(smp_processor_id())))
40 __ptep_ipte_local(addr
, ptep
);
42 __ptep_ipte(addr
, ptep
);
43 atomic_sub(0x10000, &mm
->context
.attach_count
);
47 static inline pte_t
ptep_flush_lazy(struct mm_struct
*mm
,
48 unsigned long addr
, pte_t
*ptep
)
54 if (unlikely(pte_val(old
) & _PAGE_INVALID
))
56 active
= (mm
== current
->active_mm
) ? 1 : 0;
57 count
= atomic_add_return(0x10000, &mm
->context
.attach_count
);
58 if ((count
& 0xffff) <= active
) {
59 pte_val(*ptep
) |= _PAGE_INVALID
;
60 mm
->context
.flush_mm
= 1;
62 __ptep_ipte(addr
, ptep
);
63 atomic_sub(0x10000, &mm
->context
.attach_count
);
67 static inline pgste_t
pgste_get_lock(pte_t
*ptep
)
69 unsigned long new = 0;
77 " nihh %0,0xff7f\n" /* clear PCL bit in old */
78 " oihh %1,0x0080\n" /* set PCL bit in new */
81 : "=&d" (old
), "=&d" (new), "=Q" (ptep
[PTRS_PER_PTE
])
82 : "Q" (ptep
[PTRS_PER_PTE
]) : "cc", "memory");
87 static inline void pgste_set_unlock(pte_t
*ptep
, pgste_t pgste
)
91 " nihh %1,0xff7f\n" /* clear PCL bit */
93 : "=Q" (ptep
[PTRS_PER_PTE
])
94 : "d" (pgste_val(pgste
)), "Q" (ptep
[PTRS_PER_PTE
])
100 static inline pgste_t
pgste_get(pte_t
*ptep
)
102 unsigned long pgste
= 0;
104 pgste
= *(unsigned long *)(ptep
+ PTRS_PER_PTE
);
106 return __pgste(pgste
);
109 static inline void pgste_set(pte_t
*ptep
, pgste_t pgste
)
112 *(pgste_t
*)(ptep
+ PTRS_PER_PTE
) = pgste
;
116 static inline pgste_t
pgste_update_all(pte_t pte
, pgste_t pgste
,
117 struct mm_struct
*mm
)
120 unsigned long address
, bits
, skey
;
122 if (!mm_use_skey(mm
) || pte_val(pte
) & _PAGE_INVALID
)
124 address
= pte_val(pte
) & PAGE_MASK
;
125 skey
= (unsigned long) page_get_storage_key(address
);
126 bits
= skey
& (_PAGE_CHANGED
| _PAGE_REFERENCED
);
127 /* Transfer page changed & referenced bit to guest bits in pgste */
128 pgste_val(pgste
) |= bits
<< 48; /* GR bit & GC bit */
129 /* Copy page access key and fetch protection bit to pgste */
130 pgste_val(pgste
) &= ~(PGSTE_ACC_BITS
| PGSTE_FP_BIT
);
131 pgste_val(pgste
) |= (skey
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
)) << 56;
137 static inline void pgste_set_key(pte_t
*ptep
, pgste_t pgste
, pte_t entry
,
138 struct mm_struct
*mm
)
141 unsigned long address
;
144 if (!mm_use_skey(mm
) || pte_val(entry
) & _PAGE_INVALID
)
146 VM_BUG_ON(!(pte_val(*ptep
) & _PAGE_INVALID
));
147 address
= pte_val(entry
) & PAGE_MASK
;
149 * Set page access key and fetch protection bit from pgste.
150 * The guest C/R information is still in the PGSTE, set real
153 nkey
= (pgste_val(pgste
) & (PGSTE_ACC_BITS
| PGSTE_FP_BIT
)) >> 56;
154 nkey
|= (pgste_val(pgste
) & (PGSTE_GR_BIT
| PGSTE_GC_BIT
)) >> 48;
155 page_set_storage_key(address
, nkey
, 0);
159 static inline pgste_t
pgste_set_pte(pte_t
*ptep
, pgste_t pgste
, pte_t entry
)
162 if ((pte_val(entry
) & _PAGE_PRESENT
) &&
163 (pte_val(entry
) & _PAGE_WRITE
) &&
164 !(pte_val(entry
) & _PAGE_INVALID
)) {
165 if (!MACHINE_HAS_ESOP
) {
167 * Without enhanced suppression-on-protection force
168 * the dirty bit on for all writable ptes.
170 pte_val(entry
) |= _PAGE_DIRTY
;
171 pte_val(entry
) &= ~_PAGE_PROTECT
;
173 if (!(pte_val(entry
) & _PAGE_PROTECT
))
174 /* This pte allows write access, set user-dirty */
175 pgste_val(pgste
) |= PGSTE_UC_BIT
;
182 static inline pgste_t
pgste_ipte_notify(struct mm_struct
*mm
,
184 pte_t
*ptep
, pgste_t pgste
)
187 if (pgste_val(pgste
) & PGSTE_IN_BIT
) {
188 pgste_val(pgste
) &= ~PGSTE_IN_BIT
;
189 ptep_notify(mm
, addr
, ptep
);
195 static inline pgste_t
ptep_xchg_start(struct mm_struct
*mm
,
196 unsigned long addr
, pte_t
*ptep
)
198 pgste_t pgste
= __pgste(0);
200 if (mm_has_pgste(mm
)) {
201 pgste
= pgste_get_lock(ptep
);
202 pgste
= pgste_ipte_notify(mm
, addr
, ptep
, pgste
);
207 static inline void ptep_xchg_commit(struct mm_struct
*mm
,
208 unsigned long addr
, pte_t
*ptep
,
209 pgste_t pgste
, pte_t old
, pte_t
new)
211 if (mm_has_pgste(mm
)) {
212 if (pte_val(old
) & _PAGE_INVALID
)
213 pgste_set_key(ptep
, pgste
, new, mm
);
214 if (pte_val(new) & _PAGE_INVALID
) {
215 pgste
= pgste_update_all(old
, pgste
, mm
);
216 if ((pgste_val(pgste
) & _PGSTE_GPS_USAGE_MASK
) ==
217 _PGSTE_GPS_USAGE_UNUSED
)
218 pte_val(old
) |= _PAGE_UNUSED
;
220 pgste
= pgste_set_pte(ptep
, pgste
, new);
221 pgste_set_unlock(ptep
, pgste
);
227 pte_t
ptep_xchg_direct(struct mm_struct
*mm
, unsigned long addr
,
228 pte_t
*ptep
, pte_t
new)
233 pgste
= ptep_xchg_start(mm
, addr
, ptep
);
234 old
= ptep_flush_direct(mm
, addr
, ptep
);
235 ptep_xchg_commit(mm
, addr
, ptep
, pgste
, old
, new);
238 EXPORT_SYMBOL(ptep_xchg_direct
);
240 pte_t
ptep_xchg_lazy(struct mm_struct
*mm
, unsigned long addr
,
241 pte_t
*ptep
, pte_t
new)
246 pgste
= ptep_xchg_start(mm
, addr
, ptep
);
247 old
= ptep_flush_lazy(mm
, addr
, ptep
);
248 ptep_xchg_commit(mm
, addr
, ptep
, pgste
, old
, new);
251 EXPORT_SYMBOL(ptep_xchg_lazy
);
253 pte_t
ptep_modify_prot_start(struct mm_struct
*mm
, unsigned long addr
,
259 pgste
= ptep_xchg_start(mm
, addr
, ptep
);
260 old
= ptep_flush_lazy(mm
, addr
, ptep
);
261 if (mm_has_pgste(mm
)) {
262 pgste
= pgste_update_all(old
, pgste
, mm
);
263 pgste_set(ptep
, pgste
);
267 EXPORT_SYMBOL(ptep_modify_prot_start
);
269 void ptep_modify_prot_commit(struct mm_struct
*mm
, unsigned long addr
,
270 pte_t
*ptep
, pte_t pte
)
274 if (mm_has_pgste(mm
)) {
275 pgste
= pgste_get(ptep
);
276 pgste_set_key(ptep
, pgste
, pte
, mm
);
277 pgste
= pgste_set_pte(ptep
, pgste
, pte
);
278 pgste_set_unlock(ptep
, pgste
);
283 EXPORT_SYMBOL(ptep_modify_prot_commit
);
285 static inline pmd_t
pmdp_flush_direct(struct mm_struct
*mm
,
286 unsigned long addr
, pmd_t
*pmdp
)
292 if (pmd_val(old
) & _SEGMENT_ENTRY_INVALID
)
294 if (!MACHINE_HAS_IDTE
) {
298 active
= (mm
== current
->active_mm
) ? 1 : 0;
299 count
= atomic_add_return(0x10000, &mm
->context
.attach_count
);
300 if (MACHINE_HAS_TLB_LC
&& (count
& 0xffff) <= active
&&
301 cpumask_equal(mm_cpumask(mm
), cpumask_of(smp_processor_id())))
302 __pmdp_idte_local(addr
, pmdp
);
304 __pmdp_idte(addr
, pmdp
);
305 atomic_sub(0x10000, &mm
->context
.attach_count
);
309 static inline pmd_t
pmdp_flush_lazy(struct mm_struct
*mm
,
310 unsigned long addr
, pmd_t
*pmdp
)
316 if (pmd_val(old
) & _SEGMENT_ENTRY_INVALID
)
318 active
= (mm
== current
->active_mm
) ? 1 : 0;
319 count
= atomic_add_return(0x10000, &mm
->context
.attach_count
);
320 if ((count
& 0xffff) <= active
) {
321 pmd_val(*pmdp
) |= _SEGMENT_ENTRY_INVALID
;
322 mm
->context
.flush_mm
= 1;
323 } else if (MACHINE_HAS_IDTE
)
324 __pmdp_idte(addr
, pmdp
);
327 atomic_sub(0x10000, &mm
->context
.attach_count
);
331 pmd_t
pmdp_xchg_direct(struct mm_struct
*mm
, unsigned long addr
,
332 pmd_t
*pmdp
, pmd_t
new)
336 old
= pmdp_flush_direct(mm
, addr
, pmdp
);
340 EXPORT_SYMBOL(pmdp_xchg_direct
);
342 pmd_t
pmdp_xchg_lazy(struct mm_struct
*mm
, unsigned long addr
,
343 pmd_t
*pmdp
, pmd_t
new)
347 old
= pmdp_flush_lazy(mm
, addr
, pmdp
);
351 EXPORT_SYMBOL(pmdp_xchg_lazy
);
353 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
354 void pgtable_trans_huge_deposit(struct mm_struct
*mm
, pmd_t
*pmdp
,
357 struct list_head
*lh
= (struct list_head
*) pgtable
;
359 assert_spin_locked(pmd_lockptr(mm
, pmdp
));
362 if (!pmd_huge_pte(mm
, pmdp
))
365 list_add(lh
, (struct list_head
*) pmd_huge_pte(mm
, pmdp
));
366 pmd_huge_pte(mm
, pmdp
) = pgtable
;
369 pgtable_t
pgtable_trans_huge_withdraw(struct mm_struct
*mm
, pmd_t
*pmdp
)
371 struct list_head
*lh
;
375 assert_spin_locked(pmd_lockptr(mm
, pmdp
));
378 pgtable
= pmd_huge_pte(mm
, pmdp
);
379 lh
= (struct list_head
*) pgtable
;
381 pmd_huge_pte(mm
, pmdp
) = NULL
;
383 pmd_huge_pte(mm
, pmdp
) = (pgtable_t
) lh
->next
;
386 ptep
= (pte_t
*) pgtable
;
387 pte_val(*ptep
) = _PAGE_INVALID
;
389 pte_val(*ptep
) = _PAGE_INVALID
;
392 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
395 void ptep_set_pte_at(struct mm_struct
*mm
, unsigned long addr
,
396 pte_t
*ptep
, pte_t entry
)
400 /* the mm_has_pgste() check is done in set_pte_at() */
401 pgste
= pgste_get_lock(ptep
);
402 pgste_val(pgste
) &= ~_PGSTE_GPS_ZERO
;
403 pgste_set_key(ptep
, pgste
, entry
, mm
);
404 pgste
= pgste_set_pte(ptep
, pgste
, entry
);
405 pgste_set_unlock(ptep
, pgste
);
408 void ptep_set_notify(struct mm_struct
*mm
, unsigned long addr
, pte_t
*ptep
)
412 pgste
= pgste_get_lock(ptep
);
413 pgste_val(pgste
) |= PGSTE_IN_BIT
;
414 pgste_set_unlock(ptep
, pgste
);
417 static void ptep_zap_swap_entry(struct mm_struct
*mm
, swp_entry_t entry
)
419 if (!non_swap_entry(entry
))
420 dec_mm_counter(mm
, MM_SWAPENTS
);
421 else if (is_migration_entry(entry
)) {
422 struct page
*page
= migration_entry_to_page(entry
);
424 dec_mm_counter(mm
, mm_counter(page
));
426 free_swap_and_cache(entry
);
429 void ptep_zap_unused(struct mm_struct
*mm
, unsigned long addr
,
430 pte_t
*ptep
, int reset
)
432 unsigned long pgstev
;
436 /* Zap unused and logically-zero pages */
437 pgste
= pgste_get_lock(ptep
);
438 pgstev
= pgste_val(pgste
);
441 ((pgstev
& _PGSTE_GPS_USAGE_MASK
) == _PGSTE_GPS_USAGE_UNUSED
||
442 (pgstev
& _PGSTE_GPS_ZERO
))) {
443 ptep_zap_swap_entry(mm
, pte_to_swp_entry(pte
));
444 pte_clear(mm
, addr
, ptep
);
447 pgste_val(pgste
) &= ~_PGSTE_GPS_USAGE_MASK
;
448 pgste_set_unlock(ptep
, pgste
);
451 void ptep_zap_key(struct mm_struct
*mm
, unsigned long addr
, pte_t
*ptep
)
456 /* Clear storage key */
457 pgste
= pgste_get_lock(ptep
);
458 pgste_val(pgste
) &= ~(PGSTE_ACC_BITS
| PGSTE_FP_BIT
|
459 PGSTE_GR_BIT
| PGSTE_GC_BIT
);
460 ptev
= pte_val(*ptep
);
461 if (!(ptev
& _PAGE_INVALID
) && (ptev
& _PAGE_WRITE
))
462 page_set_storage_key(ptev
& PAGE_MASK
, PAGE_DEFAULT_KEY
, 1);
463 pgste_set_unlock(ptep
, pgste
);
467 * Test and reset if a guest page is dirty
469 bool test_and_clear_guest_dirty(struct mm_struct
*mm
, unsigned long addr
)
477 ptep
= get_locked_pte(mm
, addr
, &ptl
);
481 pgste
= pgste_get_lock(ptep
);
482 dirty
= !!(pgste_val(pgste
) & PGSTE_UC_BIT
);
483 pgste_val(pgste
) &= ~PGSTE_UC_BIT
;
485 if (dirty
&& (pte_val(pte
) & _PAGE_PRESENT
)) {
486 pgste
= pgste_ipte_notify(mm
, addr
, ptep
, pgste
);
487 __ptep_ipte(addr
, ptep
);
488 if (MACHINE_HAS_ESOP
|| !(pte_val(pte
) & _PAGE_WRITE
))
489 pte_val(pte
) |= _PAGE_PROTECT
;
491 pte_val(pte
) |= _PAGE_INVALID
;
494 pgste_set_unlock(ptep
, pgste
);
499 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty
);
501 int set_guest_storage_key(struct mm_struct
*mm
, unsigned long addr
,
502 unsigned char key
, bool nq
)
509 down_read(&mm
->mmap_sem
);
510 ptep
= get_locked_pte(mm
, addr
, &ptl
);
511 if (unlikely(!ptep
)) {
512 up_read(&mm
->mmap_sem
);
516 new = old
= pgste_get_lock(ptep
);
517 pgste_val(new) &= ~(PGSTE_GR_BIT
| PGSTE_GC_BIT
|
518 PGSTE_ACC_BITS
| PGSTE_FP_BIT
);
519 keyul
= (unsigned long) key
;
520 pgste_val(new) |= (keyul
& (_PAGE_CHANGED
| _PAGE_REFERENCED
)) << 48;
521 pgste_val(new) |= (keyul
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
)) << 56;
522 if (!(pte_val(*ptep
) & _PAGE_INVALID
)) {
523 unsigned long address
, bits
, skey
;
525 address
= pte_val(*ptep
) & PAGE_MASK
;
526 skey
= (unsigned long) page_get_storage_key(address
);
527 bits
= skey
& (_PAGE_CHANGED
| _PAGE_REFERENCED
);
528 skey
= key
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
);
529 /* Set storage key ACC and FP */
530 page_set_storage_key(address
, skey
, !nq
);
531 /* Merge host changed & referenced into pgste */
532 pgste_val(new) |= bits
<< 52;
534 /* changing the guest storage key is considered a change of the page */
535 if ((pgste_val(new) ^ pgste_val(old
)) &
536 (PGSTE_ACC_BITS
| PGSTE_FP_BIT
| PGSTE_GR_BIT
| PGSTE_GC_BIT
))
537 pgste_val(new) |= PGSTE_UC_BIT
;
539 pgste_set_unlock(ptep
, new);
540 pte_unmap_unlock(ptep
, ptl
);
541 up_read(&mm
->mmap_sem
);
544 EXPORT_SYMBOL(set_guest_storage_key
);
546 unsigned char get_guest_storage_key(struct mm_struct
*mm
, unsigned long addr
)
553 down_read(&mm
->mmap_sem
);
554 ptep
= get_locked_pte(mm
, addr
, &ptl
);
555 if (unlikely(!ptep
)) {
556 up_read(&mm
->mmap_sem
);
559 pgste
= pgste_get_lock(ptep
);
561 if (pte_val(*ptep
) & _PAGE_INVALID
) {
562 key
= (pgste_val(pgste
) & PGSTE_ACC_BITS
) >> 56;
563 key
|= (pgste_val(pgste
) & PGSTE_FP_BIT
) >> 56;
564 key
|= (pgste_val(pgste
) & PGSTE_GR_BIT
) >> 48;
565 key
|= (pgste_val(pgste
) & PGSTE_GC_BIT
) >> 48;
567 key
= page_get_storage_key(pte_val(*ptep
) & PAGE_MASK
);
569 /* Reflect guest's logical view, not physical */
570 if (pgste_val(pgste
) & PGSTE_GR_BIT
)
571 key
|= _PAGE_REFERENCED
;
572 if (pgste_val(pgste
) & PGSTE_GC_BIT
)
573 key
|= _PAGE_CHANGED
;
576 pgste_set_unlock(ptep
, pgste
);
577 pte_unmap_unlock(ptep
, ptl
);
578 up_read(&mm
->mmap_sem
);
581 EXPORT_SYMBOL(get_guest_storage_key
);