2 * Copyright IBM Corp. 2007, 2011
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/highmem.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 #include <linux/rcupdate.h>
19 #include <linux/slab.h>
20 #include <linux/swapops.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
25 #include <asm/tlbflush.h>
26 #include <asm/mmu_context.h>
30 #define FRAG_MASK 0x0f
33 #define FRAG_MASK 0x03
37 unsigned long *crst_table_alloc(struct mm_struct
*mm
)
39 struct page
*page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
43 return (unsigned long *) page_to_phys(page
);
46 void crst_table_free(struct mm_struct
*mm
, unsigned long *table
)
48 free_pages((unsigned long) table
, ALLOC_ORDER
);
52 static void __crst_table_upgrade(void *arg
)
54 struct mm_struct
*mm
= arg
;
56 if (current
->active_mm
== mm
) {
63 int crst_table_upgrade(struct mm_struct
*mm
, unsigned long limit
)
65 unsigned long *table
, *pgd
;
69 BUG_ON(limit
> (1UL << 53));
72 table
= crst_table_alloc(mm
);
75 spin_lock_bh(&mm
->page_table_lock
);
76 if (mm
->context
.asce_limit
< limit
) {
77 pgd
= (unsigned long *) mm
->pgd
;
78 if (mm
->context
.asce_limit
<= (1UL << 31)) {
79 entry
= _REGION3_ENTRY_EMPTY
;
80 mm
->context
.asce_limit
= 1UL << 42;
81 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
85 entry
= _REGION2_ENTRY_EMPTY
;
86 mm
->context
.asce_limit
= 1UL << 53;
87 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
91 crst_table_init(table
, entry
);
92 pgd_populate(mm
, (pgd_t
*) table
, (pud_t
*) pgd
);
93 mm
->pgd
= (pgd_t
*) table
;
94 mm
->task_size
= mm
->context
.asce_limit
;
98 spin_unlock_bh(&mm
->page_table_lock
);
100 crst_table_free(mm
, table
);
101 if (mm
->context
.asce_limit
< limit
)
104 on_each_cpu(__crst_table_upgrade
, mm
, 0);
108 void crst_table_downgrade(struct mm_struct
*mm
, unsigned long limit
)
112 if (current
->active_mm
== mm
) {
116 while (mm
->context
.asce_limit
> limit
) {
118 switch (pgd_val(*pgd
) & _REGION_ENTRY_TYPE_MASK
) {
119 case _REGION_ENTRY_TYPE_R2
:
120 mm
->context
.asce_limit
= 1UL << 42;
121 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
125 case _REGION_ENTRY_TYPE_R3
:
126 mm
->context
.asce_limit
= 1UL << 31;
127 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
134 mm
->pgd
= (pgd_t
*) (pgd_val(*pgd
) & _REGION_ENTRY_ORIGIN
);
135 mm
->task_size
= mm
->context
.asce_limit
;
136 crst_table_free(mm
, (unsigned long *) pgd
);
138 if (current
->active_mm
== mm
)
146 * gmap_alloc - allocate a guest address space
147 * @mm: pointer to the parent mm_struct
149 * Returns a guest address space structure.
151 struct gmap
*gmap_alloc(struct mm_struct
*mm
)
155 unsigned long *table
;
157 gmap
= kzalloc(sizeof(struct gmap
), GFP_KERNEL
);
160 INIT_LIST_HEAD(&gmap
->crst_list
);
162 page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
165 list_add(&page
->lru
, &gmap
->crst_list
);
166 table
= (unsigned long *) page_to_phys(page
);
167 crst_table_init(table
, _REGION1_ENTRY_EMPTY
);
169 gmap
->asce
= _ASCE_TYPE_REGION1
| _ASCE_TABLE_LENGTH
|
170 _ASCE_USER_BITS
| __pa(table
);
171 list_add(&gmap
->list
, &mm
->context
.gmap_list
);
179 EXPORT_SYMBOL_GPL(gmap_alloc
);
181 static int gmap_unlink_segment(struct gmap
*gmap
, unsigned long *table
)
183 struct gmap_pgtable
*mp
;
184 struct gmap_rmap
*rmap
;
187 if (*table
& _SEGMENT_ENTRY_INVALID
)
189 page
= pfn_to_page(*table
>> PAGE_SHIFT
);
190 mp
= (struct gmap_pgtable
*) page
->index
;
191 list_for_each_entry(rmap
, &mp
->mapper
, list
) {
192 if (rmap
->entry
!= table
)
194 list_del(&rmap
->list
);
198 *table
= mp
->vmaddr
| _SEGMENT_ENTRY_INVALID
| _SEGMENT_ENTRY_PROTECT
;
202 static void gmap_flush_tlb(struct gmap
*gmap
)
204 if (MACHINE_HAS_IDTE
)
205 __tlb_flush_asce(gmap
->mm
, (unsigned long) gmap
->table
|
208 __tlb_flush_global();
212 * gmap_free - free a guest address space
213 * @gmap: pointer to the guest address space structure
215 void gmap_free(struct gmap
*gmap
)
217 struct page
*page
, *next
;
218 unsigned long *table
;
223 if (MACHINE_HAS_IDTE
)
224 __tlb_flush_asce(gmap
->mm
, (unsigned long) gmap
->table
|
227 __tlb_flush_global();
229 /* Free all segment & region tables. */
230 down_read(&gmap
->mm
->mmap_sem
);
231 spin_lock(&gmap
->mm
->page_table_lock
);
232 list_for_each_entry_safe(page
, next
, &gmap
->crst_list
, lru
) {
233 table
= (unsigned long *) page_to_phys(page
);
234 if ((*table
& _REGION_ENTRY_TYPE_MASK
) == 0)
235 /* Remove gmap rmap structures for segment table. */
236 for (i
= 0; i
< PTRS_PER_PMD
; i
++, table
++)
237 gmap_unlink_segment(gmap
, table
);
238 __free_pages(page
, ALLOC_ORDER
);
240 spin_unlock(&gmap
->mm
->page_table_lock
);
241 up_read(&gmap
->mm
->mmap_sem
);
242 list_del(&gmap
->list
);
245 EXPORT_SYMBOL_GPL(gmap_free
);
248 * gmap_enable - switch primary space to the guest address space
249 * @gmap: pointer to the guest address space structure
251 void gmap_enable(struct gmap
*gmap
)
253 S390_lowcore
.gmap
= (unsigned long) gmap
;
255 EXPORT_SYMBOL_GPL(gmap_enable
);
258 * gmap_disable - switch back to the standard primary address space
259 * @gmap: pointer to the guest address space structure
261 void gmap_disable(struct gmap
*gmap
)
263 S390_lowcore
.gmap
= 0UL;
265 EXPORT_SYMBOL_GPL(gmap_disable
);
268 * gmap_alloc_table is assumed to be called with mmap_sem held
270 static int gmap_alloc_table(struct gmap
*gmap
,
271 unsigned long *table
, unsigned long init
)
272 __releases(&gmap
->mm
->page_table_lock
)
273 __acquires(&gmap
->mm
->page_table_lock
)
278 /* since we dont free the gmap table until gmap_free we can unlock */
279 spin_unlock(&gmap
->mm
->page_table_lock
);
280 page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
281 spin_lock(&gmap
->mm
->page_table_lock
);
284 new = (unsigned long *) page_to_phys(page
);
285 crst_table_init(new, init
);
286 if (*table
& _REGION_ENTRY_INVALID
) {
287 list_add(&page
->lru
, &gmap
->crst_list
);
288 *table
= (unsigned long) new | _REGION_ENTRY_LENGTH
|
289 (*table
& _REGION_ENTRY_TYPE_MASK
);
291 __free_pages(page
, ALLOC_ORDER
);
296 * gmap_unmap_segment - unmap segment from the guest address space
297 * @gmap: pointer to the guest address space structure
298 * @addr: address in the guest address space
299 * @len: length of the memory area to unmap
301 * Returns 0 if the unmap succeeded, -EINVAL if not.
303 int gmap_unmap_segment(struct gmap
*gmap
, unsigned long to
, unsigned long len
)
305 unsigned long *table
;
309 if ((to
| len
) & (PMD_SIZE
- 1))
311 if (len
== 0 || to
+ len
< to
)
315 down_read(&gmap
->mm
->mmap_sem
);
316 spin_lock(&gmap
->mm
->page_table_lock
);
317 for (off
= 0; off
< len
; off
+= PMD_SIZE
) {
318 /* Walk the guest addr space page table */
319 table
= gmap
->table
+ (((to
+ off
) >> 53) & 0x7ff);
320 if (*table
& _REGION_ENTRY_INVALID
)
322 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
323 table
= table
+ (((to
+ off
) >> 42) & 0x7ff);
324 if (*table
& _REGION_ENTRY_INVALID
)
326 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
327 table
= table
+ (((to
+ off
) >> 31) & 0x7ff);
328 if (*table
& _REGION_ENTRY_INVALID
)
330 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
331 table
= table
+ (((to
+ off
) >> 20) & 0x7ff);
333 /* Clear segment table entry in guest address space. */
334 flush
|= gmap_unlink_segment(gmap
, table
);
335 *table
= _SEGMENT_ENTRY_INVALID
;
338 spin_unlock(&gmap
->mm
->page_table_lock
);
339 up_read(&gmap
->mm
->mmap_sem
);
341 gmap_flush_tlb(gmap
);
344 EXPORT_SYMBOL_GPL(gmap_unmap_segment
);
347 * gmap_mmap_segment - map a segment to the guest address space
348 * @gmap: pointer to the guest address space structure
349 * @from: source address in the parent address space
350 * @to: target address in the guest address space
352 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
354 int gmap_map_segment(struct gmap
*gmap
, unsigned long from
,
355 unsigned long to
, unsigned long len
)
357 unsigned long *table
;
361 if ((from
| to
| len
) & (PMD_SIZE
- 1))
363 if (len
== 0 || from
+ len
> TASK_MAX_SIZE
||
364 from
+ len
< from
|| to
+ len
< to
)
368 down_read(&gmap
->mm
->mmap_sem
);
369 spin_lock(&gmap
->mm
->page_table_lock
);
370 for (off
= 0; off
< len
; off
+= PMD_SIZE
) {
371 /* Walk the gmap address space page table */
372 table
= gmap
->table
+ (((to
+ off
) >> 53) & 0x7ff);
373 if ((*table
& _REGION_ENTRY_INVALID
) &&
374 gmap_alloc_table(gmap
, table
, _REGION2_ENTRY_EMPTY
))
376 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
377 table
= table
+ (((to
+ off
) >> 42) & 0x7ff);
378 if ((*table
& _REGION_ENTRY_INVALID
) &&
379 gmap_alloc_table(gmap
, table
, _REGION3_ENTRY_EMPTY
))
381 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
382 table
= table
+ (((to
+ off
) >> 31) & 0x7ff);
383 if ((*table
& _REGION_ENTRY_INVALID
) &&
384 gmap_alloc_table(gmap
, table
, _SEGMENT_ENTRY_EMPTY
))
386 table
= (unsigned long *) (*table
& _REGION_ENTRY_ORIGIN
);
387 table
= table
+ (((to
+ off
) >> 20) & 0x7ff);
389 /* Store 'from' address in an invalid segment table entry. */
390 flush
|= gmap_unlink_segment(gmap
, table
);
391 *table
= (from
+ off
) | (_SEGMENT_ENTRY_INVALID
|
392 _SEGMENT_ENTRY_PROTECT
);
394 spin_unlock(&gmap
->mm
->page_table_lock
);
395 up_read(&gmap
->mm
->mmap_sem
);
397 gmap_flush_tlb(gmap
);
401 spin_unlock(&gmap
->mm
->page_table_lock
);
402 up_read(&gmap
->mm
->mmap_sem
);
403 gmap_unmap_segment(gmap
, to
, len
);
406 EXPORT_SYMBOL_GPL(gmap_map_segment
);
408 static unsigned long *gmap_table_walk(unsigned long address
, struct gmap
*gmap
)
410 unsigned long *table
;
412 table
= gmap
->table
+ ((address
>> 53) & 0x7ff);
413 if (unlikely(*table
& _REGION_ENTRY_INVALID
))
414 return ERR_PTR(-EFAULT
);
415 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
416 table
= table
+ ((address
>> 42) & 0x7ff);
417 if (unlikely(*table
& _REGION_ENTRY_INVALID
))
418 return ERR_PTR(-EFAULT
);
419 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
420 table
= table
+ ((address
>> 31) & 0x7ff);
421 if (unlikely(*table
& _REGION_ENTRY_INVALID
))
422 return ERR_PTR(-EFAULT
);
423 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
424 table
= table
+ ((address
>> 20) & 0x7ff);
429 * __gmap_translate - translate a guest address to a user space address
430 * @address: guest address
431 * @gmap: pointer to guest mapping meta data structure
433 * Returns user space address which corresponds to the guest address or
434 * -EFAULT if no such mapping exists.
435 * This function does not establish potentially missing page table entries.
436 * The mmap_sem of the mm that belongs to the address space must be held
437 * when this function gets called.
439 unsigned long __gmap_translate(unsigned long address
, struct gmap
*gmap
)
441 unsigned long *segment_ptr
, vmaddr
, segment
;
442 struct gmap_pgtable
*mp
;
445 current
->thread
.gmap_addr
= address
;
446 segment_ptr
= gmap_table_walk(address
, gmap
);
447 if (IS_ERR(segment_ptr
))
448 return PTR_ERR(segment_ptr
);
449 /* Convert the gmap address to an mm address. */
450 segment
= *segment_ptr
;
451 if (!(segment
& _SEGMENT_ENTRY_INVALID
)) {
452 page
= pfn_to_page(segment
>> PAGE_SHIFT
);
453 mp
= (struct gmap_pgtable
*) page
->index
;
454 return mp
->vmaddr
| (address
& ~PMD_MASK
);
455 } else if (segment
& _SEGMENT_ENTRY_PROTECT
) {
456 vmaddr
= segment
& _SEGMENT_ENTRY_ORIGIN
;
457 return vmaddr
| (address
& ~PMD_MASK
);
461 EXPORT_SYMBOL_GPL(__gmap_translate
);
464 * gmap_translate - translate a guest address to a user space address
465 * @address: guest address
466 * @gmap: pointer to guest mapping meta data structure
468 * Returns user space address which corresponds to the guest address or
469 * -EFAULT if no such mapping exists.
470 * This function does not establish potentially missing page table entries.
472 unsigned long gmap_translate(unsigned long address
, struct gmap
*gmap
)
476 down_read(&gmap
->mm
->mmap_sem
);
477 rc
= __gmap_translate(address
, gmap
);
478 up_read(&gmap
->mm
->mmap_sem
);
481 EXPORT_SYMBOL_GPL(gmap_translate
);
483 static int gmap_connect_pgtable(unsigned long address
, unsigned long segment
,
484 unsigned long *segment_ptr
, struct gmap
*gmap
)
486 unsigned long vmaddr
;
487 struct vm_area_struct
*vma
;
488 struct gmap_pgtable
*mp
;
489 struct gmap_rmap
*rmap
;
490 struct mm_struct
*mm
;
497 vmaddr
= segment
& _SEGMENT_ENTRY_ORIGIN
;
498 vma
= find_vma(mm
, vmaddr
);
499 if (!vma
|| vma
->vm_start
> vmaddr
)
501 /* Walk the parent mm page table */
502 pgd
= pgd_offset(mm
, vmaddr
);
503 pud
= pud_alloc(mm
, pgd
, vmaddr
);
506 pmd
= pmd_alloc(mm
, pud
, vmaddr
);
509 if (!pmd_present(*pmd
) &&
510 __pte_alloc(mm
, vma
, pmd
, vmaddr
))
512 /* large pmds cannot yet be handled */
515 /* pmd now points to a valid segment table entry. */
516 rmap
= kmalloc(sizeof(*rmap
), GFP_KERNEL
|__GFP_REPEAT
);
519 /* Link gmap segment table entry location to page table. */
520 page
= pmd_page(*pmd
);
521 mp
= (struct gmap_pgtable
*) page
->index
;
523 rmap
->entry
= segment_ptr
;
524 rmap
->vmaddr
= address
& PMD_MASK
;
525 spin_lock(&mm
->page_table_lock
);
526 if (*segment_ptr
== segment
) {
527 list_add(&rmap
->list
, &mp
->mapper
);
528 /* Set gmap segment table entry to page table. */
529 *segment_ptr
= pmd_val(*pmd
) & PAGE_MASK
;
532 spin_unlock(&mm
->page_table_lock
);
537 static void gmap_disconnect_pgtable(struct mm_struct
*mm
, unsigned long *table
)
539 struct gmap_rmap
*rmap
, *next
;
540 struct gmap_pgtable
*mp
;
545 spin_lock(&mm
->page_table_lock
);
546 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
547 mp
= (struct gmap_pgtable
*) page
->index
;
548 list_for_each_entry_safe(rmap
, next
, &mp
->mapper
, list
) {
549 *rmap
->entry
= mp
->vmaddr
| (_SEGMENT_ENTRY_INVALID
|
550 _SEGMENT_ENTRY_PROTECT
);
551 list_del(&rmap
->list
);
555 spin_unlock(&mm
->page_table_lock
);
557 __tlb_flush_global();
561 * this function is assumed to be called with mmap_sem held
563 unsigned long __gmap_fault(unsigned long address
, struct gmap
*gmap
)
565 unsigned long *segment_ptr
, segment
;
566 struct gmap_pgtable
*mp
;
570 current
->thread
.gmap_addr
= address
;
571 segment_ptr
= gmap_table_walk(address
, gmap
);
572 if (IS_ERR(segment_ptr
))
574 /* Convert the gmap address to an mm address. */
576 segment
= *segment_ptr
;
577 if (!(segment
& _SEGMENT_ENTRY_INVALID
)) {
578 /* Page table is present */
579 page
= pfn_to_page(segment
>> PAGE_SHIFT
);
580 mp
= (struct gmap_pgtable
*) page
->index
;
581 return mp
->vmaddr
| (address
& ~PMD_MASK
);
583 if (!(segment
& _SEGMENT_ENTRY_PROTECT
))
584 /* Nothing mapped in the gmap address space. */
586 rc
= gmap_connect_pgtable(address
, segment
, segment_ptr
, gmap
);
593 unsigned long gmap_fault(unsigned long address
, struct gmap
*gmap
)
597 down_read(&gmap
->mm
->mmap_sem
);
598 rc
= __gmap_fault(address
, gmap
);
599 up_read(&gmap
->mm
->mmap_sem
);
603 EXPORT_SYMBOL_GPL(gmap_fault
);
605 static void gmap_zap_swap_entry(swp_entry_t entry
, struct mm_struct
*mm
)
607 if (!non_swap_entry(entry
))
608 dec_mm_counter(mm
, MM_SWAPENTS
);
609 else if (is_migration_entry(entry
)) {
610 struct page
*page
= migration_entry_to_page(entry
);
613 dec_mm_counter(mm
, MM_ANONPAGES
);
615 dec_mm_counter(mm
, MM_FILEPAGES
);
617 free_swap_and_cache(entry
);
621 * The mm->mmap_sem lock must be held
623 static void gmap_zap_unused(struct mm_struct
*mm
, unsigned long address
)
625 unsigned long ptev
, pgstev
;
630 ptep
= get_locked_pte(mm
, address
, &ptl
);
636 /* Zap unused and logically-zero pages */
637 pgste
= pgste_get_lock(ptep
);
638 pgstev
= pgste_val(pgste
);
640 if (((pgstev
& _PGSTE_GPS_USAGE_MASK
) == _PGSTE_GPS_USAGE_UNUSED
) ||
641 ((pgstev
& _PGSTE_GPS_ZERO
) && (ptev
& _PAGE_INVALID
))) {
642 gmap_zap_swap_entry(pte_to_swp_entry(pte
), mm
);
643 pte_clear(mm
, address
, ptep
);
645 pgste_set_unlock(ptep
, pgste
);
647 pte_unmap_unlock(*ptep
, ptl
);
651 * this function is assumed to be called with mmap_sem held
653 void __gmap_zap(unsigned long address
, struct gmap
*gmap
)
655 unsigned long *table
, *segment_ptr
;
656 unsigned long segment
, pgstev
, ptev
;
657 struct gmap_pgtable
*mp
;
660 segment_ptr
= gmap_table_walk(address
, gmap
);
661 if (IS_ERR(segment_ptr
))
663 segment
= *segment_ptr
;
664 if (segment
& _SEGMENT_ENTRY_INVALID
)
666 page
= pfn_to_page(segment
>> PAGE_SHIFT
);
667 mp
= (struct gmap_pgtable
*) page
->index
;
668 address
= mp
->vmaddr
| (address
& ~PMD_MASK
);
669 /* Page table is present */
670 table
= (unsigned long *)(segment
& _SEGMENT_ENTRY_ORIGIN
);
671 table
= table
+ ((address
>> 12) & 0xff);
672 pgstev
= table
[PTRS_PER_PTE
];
674 /* quick check, checked again with locks held */
675 if (((pgstev
& _PGSTE_GPS_USAGE_MASK
) == _PGSTE_GPS_USAGE_UNUSED
) ||
676 ((pgstev
& _PGSTE_GPS_ZERO
) && (ptev
& _PAGE_INVALID
)))
677 gmap_zap_unused(gmap
->mm
, address
);
679 EXPORT_SYMBOL_GPL(__gmap_zap
);
681 void gmap_discard(unsigned long from
, unsigned long to
, struct gmap
*gmap
)
684 unsigned long *table
, address
, size
;
685 struct vm_area_struct
*vma
;
686 struct gmap_pgtable
*mp
;
689 down_read(&gmap
->mm
->mmap_sem
);
691 while (address
< to
) {
692 /* Walk the gmap address space page table */
693 table
= gmap
->table
+ ((address
>> 53) & 0x7ff);
694 if (unlikely(*table
& _REGION_ENTRY_INVALID
)) {
695 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
698 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
699 table
= table
+ ((address
>> 42) & 0x7ff);
700 if (unlikely(*table
& _REGION_ENTRY_INVALID
)) {
701 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
704 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
705 table
= table
+ ((address
>> 31) & 0x7ff);
706 if (unlikely(*table
& _REGION_ENTRY_INVALID
)) {
707 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
710 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
711 table
= table
+ ((address
>> 20) & 0x7ff);
712 if (unlikely(*table
& _SEGMENT_ENTRY_INVALID
)) {
713 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
716 page
= pfn_to_page(*table
>> PAGE_SHIFT
);
717 mp
= (struct gmap_pgtable
*) page
->index
;
718 vma
= find_vma(gmap
->mm
, mp
->vmaddr
);
719 size
= min(to
- address
, PMD_SIZE
- (address
& ~PMD_MASK
));
720 zap_page_range(vma
, mp
->vmaddr
| (address
& ~PMD_MASK
),
722 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
724 up_read(&gmap
->mm
->mmap_sem
);
726 EXPORT_SYMBOL_GPL(gmap_discard
);
728 static LIST_HEAD(gmap_notifier_list
);
729 static DEFINE_SPINLOCK(gmap_notifier_lock
);
732 * gmap_register_ipte_notifier - register a pte invalidation callback
733 * @nb: pointer to the gmap notifier block
735 void gmap_register_ipte_notifier(struct gmap_notifier
*nb
)
737 spin_lock(&gmap_notifier_lock
);
738 list_add(&nb
->list
, &gmap_notifier_list
);
739 spin_unlock(&gmap_notifier_lock
);
741 EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier
);
744 * gmap_unregister_ipte_notifier - remove a pte invalidation callback
745 * @nb: pointer to the gmap notifier block
747 void gmap_unregister_ipte_notifier(struct gmap_notifier
*nb
)
749 spin_lock(&gmap_notifier_lock
);
750 list_del_init(&nb
->list
);
751 spin_unlock(&gmap_notifier_lock
);
753 EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier
);
756 * gmap_ipte_notify - mark a range of ptes for invalidation notification
757 * @gmap: pointer to guest mapping meta data structure
758 * @start: virtual address in the guest address space
761 * Returns 0 if for each page in the given range a gmap mapping exists and
762 * the invalidation notification could be set. If the gmap mapping is missing
763 * for one or more pages -EFAULT is returned. If no memory could be allocated
764 * -ENOMEM is returned. This function establishes missing page table entries.
766 int gmap_ipte_notify(struct gmap
*gmap
, unsigned long start
, unsigned long len
)
774 if ((start
& ~PAGE_MASK
) || (len
& ~PAGE_MASK
))
776 down_read(&gmap
->mm
->mmap_sem
);
778 /* Convert gmap address and connect the page tables */
779 addr
= __gmap_fault(start
, gmap
);
780 if (IS_ERR_VALUE(addr
)) {
784 /* Get the page mapped */
785 if (fixup_user_fault(current
, gmap
->mm
, addr
, FAULT_FLAG_WRITE
)) {
789 /* Walk the process page table, lock and get pte pointer */
790 ptep
= get_locked_pte(gmap
->mm
, addr
, &ptl
);
793 /* Set notification bit in the pgste of the pte */
795 if ((pte_val(entry
) & (_PAGE_INVALID
| _PAGE_PROTECT
)) == 0) {
796 pgste
= pgste_get_lock(ptep
);
797 pgste_val(pgste
) |= PGSTE_IN_BIT
;
798 pgste_set_unlock(ptep
, pgste
);
804 up_read(&gmap
->mm
->mmap_sem
);
807 EXPORT_SYMBOL_GPL(gmap_ipte_notify
);
810 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
811 * @mm: pointer to the process mm_struct
812 * @pte: pointer to the page table entry
814 * This function is assumed to be called with the page table lock held
815 * for the pte to notify.
817 void gmap_do_ipte_notify(struct mm_struct
*mm
, pte_t
*pte
)
819 unsigned long segment_offset
;
820 struct gmap_notifier
*nb
;
821 struct gmap_pgtable
*mp
;
822 struct gmap_rmap
*rmap
;
825 segment_offset
= ((unsigned long) pte
) & (255 * sizeof(pte_t
));
826 segment_offset
= segment_offset
* (4096 / sizeof(pte_t
));
827 page
= pfn_to_page(__pa(pte
) >> PAGE_SHIFT
);
828 mp
= (struct gmap_pgtable
*) page
->index
;
829 spin_lock(&gmap_notifier_lock
);
830 list_for_each_entry(rmap
, &mp
->mapper
, list
) {
831 list_for_each_entry(nb
, &gmap_notifier_list
, list
)
832 nb
->notifier_call(rmap
->gmap
,
833 rmap
->vmaddr
+ segment_offset
);
835 spin_unlock(&gmap_notifier_lock
);
837 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify
);
839 static inline int page_table_with_pgste(struct page
*page
)
841 return atomic_read(&page
->_mapcount
) == 0;
844 static inline unsigned long *page_table_alloc_pgste(struct mm_struct
*mm
,
845 unsigned long vmaddr
)
848 unsigned long *table
;
849 struct gmap_pgtable
*mp
;
851 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
854 mp
= kmalloc(sizeof(*mp
), GFP_KERNEL
|__GFP_REPEAT
);
859 if (!pgtable_page_ctor(page
)) {
864 mp
->vmaddr
= vmaddr
& PMD_MASK
;
865 INIT_LIST_HEAD(&mp
->mapper
);
866 page
->index
= (unsigned long) mp
;
867 atomic_set(&page
->_mapcount
, 0);
868 table
= (unsigned long *) page_to_phys(page
);
869 clear_table(table
, _PAGE_INVALID
, PAGE_SIZE
/2);
870 clear_table(table
+ PTRS_PER_PTE
, 0, PAGE_SIZE
/2);
874 static inline void page_table_free_pgste(unsigned long *table
)
877 struct gmap_pgtable
*mp
;
879 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
880 mp
= (struct gmap_pgtable
*) page
->index
;
881 BUG_ON(!list_empty(&mp
->mapper
));
882 pgtable_page_dtor(page
);
883 atomic_set(&page
->_mapcount
, -1);
888 static inline unsigned long page_table_reset_pte(struct mm_struct
*mm
, pmd_t
*pmd
,
889 unsigned long addr
, unsigned long end
, bool init_skey
)
891 pte_t
*start_pte
, *pte
;
895 start_pte
= pte_offset_map_lock(mm
, pmd
, addr
, &ptl
);
898 pgste
= pgste_get_lock(pte
);
899 pgste_val(pgste
) &= ~_PGSTE_GPS_USAGE_MASK
;
901 unsigned long address
;
903 pgste_val(pgste
) &= ~(PGSTE_ACC_BITS
| PGSTE_FP_BIT
|
904 PGSTE_GR_BIT
| PGSTE_GC_BIT
);
906 /* skip invalid and not writable pages */
907 if (pte_val(*pte
) & _PAGE_INVALID
||
908 !(pte_val(*pte
) & _PAGE_WRITE
)) {
909 pgste_set_unlock(pte
, pgste
);
913 address
= pte_val(*pte
) & PAGE_MASK
;
914 page_set_storage_key(address
, PAGE_DEFAULT_KEY
, 1);
916 pgste_set_unlock(pte
, pgste
);
917 } while (pte
++, addr
+= PAGE_SIZE
, addr
!= end
);
918 pte_unmap_unlock(start_pte
, ptl
);
923 static inline unsigned long page_table_reset_pmd(struct mm_struct
*mm
, pud_t
*pud
,
924 unsigned long addr
, unsigned long end
, bool init_skey
)
929 pmd
= pmd_offset(pud
, addr
);
931 next
= pmd_addr_end(addr
, end
);
932 if (pmd_none_or_clear_bad(pmd
))
934 next
= page_table_reset_pte(mm
, pmd
, addr
, next
, init_skey
);
935 } while (pmd
++, addr
= next
, addr
!= end
);
940 static inline unsigned long page_table_reset_pud(struct mm_struct
*mm
, pgd_t
*pgd
,
941 unsigned long addr
, unsigned long end
, bool init_skey
)
946 pud
= pud_offset(pgd
, addr
);
948 next
= pud_addr_end(addr
, end
);
949 if (pud_none_or_clear_bad(pud
))
951 next
= page_table_reset_pmd(mm
, pud
, addr
, next
, init_skey
);
952 } while (pud
++, addr
= next
, addr
!= end
);
957 void page_table_reset_pgste(struct mm_struct
*mm
, unsigned long start
,
958 unsigned long end
, bool init_skey
)
960 unsigned long addr
, next
;
963 down_write(&mm
->mmap_sem
);
964 if (init_skey
&& mm_use_skey(mm
))
967 pgd
= pgd_offset(mm
, addr
);
969 next
= pgd_addr_end(addr
, end
);
970 if (pgd_none_or_clear_bad(pgd
))
972 next
= page_table_reset_pud(mm
, pgd
, addr
, next
, init_skey
);
973 } while (pgd
++, addr
= next
, addr
!= end
);
975 current
->mm
->context
.use_skey
= 1;
977 up_write(&mm
->mmap_sem
);
979 EXPORT_SYMBOL(page_table_reset_pgste
);
981 int set_guest_storage_key(struct mm_struct
*mm
, unsigned long addr
,
982 unsigned long key
, bool nq
)
988 down_read(&mm
->mmap_sem
);
989 ptep
= get_locked_pte(current
->mm
, addr
, &ptl
);
990 if (unlikely(!ptep
)) {
991 up_read(&mm
->mmap_sem
);
995 new = old
= pgste_get_lock(ptep
);
996 pgste_val(new) &= ~(PGSTE_GR_BIT
| PGSTE_GC_BIT
|
997 PGSTE_ACC_BITS
| PGSTE_FP_BIT
);
998 pgste_val(new) |= (key
& (_PAGE_CHANGED
| _PAGE_REFERENCED
)) << 48;
999 pgste_val(new) |= (key
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
)) << 56;
1000 if (!(pte_val(*ptep
) & _PAGE_INVALID
)) {
1001 unsigned long address
, bits
, skey
;
1003 address
= pte_val(*ptep
) & PAGE_MASK
;
1004 skey
= (unsigned long) page_get_storage_key(address
);
1005 bits
= skey
& (_PAGE_CHANGED
| _PAGE_REFERENCED
);
1006 skey
= key
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
);
1007 /* Set storage key ACC and FP */
1008 page_set_storage_key(address
, skey
, !nq
);
1009 /* Merge host changed & referenced into pgste */
1010 pgste_val(new) |= bits
<< 52;
1012 /* changing the guest storage key is considered a change of the page */
1013 if ((pgste_val(new) ^ pgste_val(old
)) &
1014 (PGSTE_ACC_BITS
| PGSTE_FP_BIT
| PGSTE_GR_BIT
| PGSTE_GC_BIT
))
1015 pgste_val(new) |= PGSTE_UC_BIT
;
1017 pgste_set_unlock(ptep
, new);
1018 pte_unmap_unlock(*ptep
, ptl
);
1019 up_read(&mm
->mmap_sem
);
1022 EXPORT_SYMBOL(set_guest_storage_key
);
1024 #else /* CONFIG_PGSTE */
1026 static inline int page_table_with_pgste(struct page
*page
)
1031 static inline unsigned long *page_table_alloc_pgste(struct mm_struct
*mm
,
1032 unsigned long vmaddr
)
1037 void page_table_reset_pgste(struct mm_struct
*mm
, unsigned long start
,
1038 unsigned long end
, bool init_skey
)
1042 static inline void page_table_free_pgste(unsigned long *table
)
1046 static inline void gmap_disconnect_pgtable(struct mm_struct
*mm
,
1047 unsigned long *table
)
1051 #endif /* CONFIG_PGSTE */
1053 static inline unsigned int atomic_xor_bits(atomic_t
*v
, unsigned int bits
)
1055 unsigned int old
, new;
1058 old
= atomic_read(v
);
1060 } while (atomic_cmpxchg(v
, old
, new) != old
);
1065 * page table entry allocation/free routines.
1067 unsigned long *page_table_alloc(struct mm_struct
*mm
, unsigned long vmaddr
)
1069 unsigned long *uninitialized_var(table
);
1070 struct page
*uninitialized_var(page
);
1071 unsigned int mask
, bit
;
1073 if (mm_has_pgste(mm
))
1074 return page_table_alloc_pgste(mm
, vmaddr
);
1075 /* Allocate fragments of a 4K page as 1K/2K page table */
1076 spin_lock_bh(&mm
->context
.list_lock
);
1078 if (!list_empty(&mm
->context
.pgtable_list
)) {
1079 page
= list_first_entry(&mm
->context
.pgtable_list
,
1081 table
= (unsigned long *) page_to_phys(page
);
1082 mask
= atomic_read(&page
->_mapcount
);
1083 mask
= mask
| (mask
>> 4);
1085 if ((mask
& FRAG_MASK
) == FRAG_MASK
) {
1086 spin_unlock_bh(&mm
->context
.list_lock
);
1087 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
1090 if (!pgtable_page_ctor(page
)) {
1094 atomic_set(&page
->_mapcount
, 1);
1095 table
= (unsigned long *) page_to_phys(page
);
1096 clear_table(table
, _PAGE_INVALID
, PAGE_SIZE
);
1097 spin_lock_bh(&mm
->context
.list_lock
);
1098 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
1100 for (bit
= 1; mask
& bit
; bit
<<= 1)
1101 table
+= PTRS_PER_PTE
;
1102 mask
= atomic_xor_bits(&page
->_mapcount
, bit
);
1103 if ((mask
& FRAG_MASK
) == FRAG_MASK
)
1104 list_del(&page
->lru
);
1106 spin_unlock_bh(&mm
->context
.list_lock
);
1110 void page_table_free(struct mm_struct
*mm
, unsigned long *table
)
1113 unsigned int bit
, mask
;
1115 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
1116 if (page_table_with_pgste(page
)) {
1117 gmap_disconnect_pgtable(mm
, table
);
1118 return page_table_free_pgste(table
);
1120 /* Free 1K/2K page table fragment of a 4K page */
1121 bit
= 1 << ((__pa(table
) & ~PAGE_MASK
)/(PTRS_PER_PTE
*sizeof(pte_t
)));
1122 spin_lock_bh(&mm
->context
.list_lock
);
1123 if ((atomic_read(&page
->_mapcount
) & FRAG_MASK
) != FRAG_MASK
)
1124 list_del(&page
->lru
);
1125 mask
= atomic_xor_bits(&page
->_mapcount
, bit
);
1126 if (mask
& FRAG_MASK
)
1127 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
1128 spin_unlock_bh(&mm
->context
.list_lock
);
1130 pgtable_page_dtor(page
);
1131 atomic_set(&page
->_mapcount
, -1);
1136 static void __page_table_free_rcu(void *table
, unsigned bit
)
1140 if (bit
== FRAG_MASK
)
1141 return page_table_free_pgste(table
);
1142 /* Free 1K/2K page table fragment of a 4K page */
1143 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
1144 if (atomic_xor_bits(&page
->_mapcount
, bit
) == 0) {
1145 pgtable_page_dtor(page
);
1146 atomic_set(&page
->_mapcount
, -1);
1151 void page_table_free_rcu(struct mmu_gather
*tlb
, unsigned long *table
)
1153 struct mm_struct
*mm
;
1155 unsigned int bit
, mask
;
1158 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
1159 if (page_table_with_pgste(page
)) {
1160 gmap_disconnect_pgtable(mm
, table
);
1161 table
= (unsigned long *) (__pa(table
) | FRAG_MASK
);
1162 tlb_remove_table(tlb
, table
);
1165 bit
= 1 << ((__pa(table
) & ~PAGE_MASK
) / (PTRS_PER_PTE
*sizeof(pte_t
)));
1166 spin_lock_bh(&mm
->context
.list_lock
);
1167 if ((atomic_read(&page
->_mapcount
) & FRAG_MASK
) != FRAG_MASK
)
1168 list_del(&page
->lru
);
1169 mask
= atomic_xor_bits(&page
->_mapcount
, bit
| (bit
<< 4));
1170 if (mask
& FRAG_MASK
)
1171 list_add_tail(&page
->lru
, &mm
->context
.pgtable_list
);
1172 spin_unlock_bh(&mm
->context
.list_lock
);
1173 table
= (unsigned long *) (__pa(table
) | (bit
<< 4));
1174 tlb_remove_table(tlb
, table
);
1177 static void __tlb_remove_table(void *_table
)
1179 const unsigned long mask
= (FRAG_MASK
<< 4) | FRAG_MASK
;
1180 void *table
= (void *)((unsigned long) _table
& ~mask
);
1181 unsigned type
= (unsigned long) _table
& mask
;
1184 __page_table_free_rcu(table
, type
);
1186 free_pages((unsigned long) table
, ALLOC_ORDER
);
1189 static void tlb_remove_table_smp_sync(void *arg
)
1191 /* Simply deliver the interrupt */
1194 static void tlb_remove_table_one(void *table
)
1197 * This isn't an RCU grace period and hence the page-tables cannot be
1198 * assumed to be actually RCU-freed.
1200 * It is however sufficient for software page-table walkers that rely
1201 * on IRQ disabling. See the comment near struct mmu_table_batch.
1203 smp_call_function(tlb_remove_table_smp_sync
, NULL
, 1);
1204 __tlb_remove_table(table
);
1207 static void tlb_remove_table_rcu(struct rcu_head
*head
)
1209 struct mmu_table_batch
*batch
;
1212 batch
= container_of(head
, struct mmu_table_batch
, rcu
);
1214 for (i
= 0; i
< batch
->nr
; i
++)
1215 __tlb_remove_table(batch
->tables
[i
]);
1217 free_page((unsigned long)batch
);
1220 void tlb_table_flush(struct mmu_gather
*tlb
)
1222 struct mmu_table_batch
**batch
= &tlb
->batch
;
1225 call_rcu_sched(&(*batch
)->rcu
, tlb_remove_table_rcu
);
1230 void tlb_remove_table(struct mmu_gather
*tlb
, void *table
)
1232 struct mmu_table_batch
**batch
= &tlb
->batch
;
1234 tlb
->mm
->context
.flush_mm
= 1;
1235 if (*batch
== NULL
) {
1236 *batch
= (struct mmu_table_batch
*)
1237 __get_free_page(GFP_NOWAIT
| __GFP_NOWARN
);
1238 if (*batch
== NULL
) {
1239 __tlb_flush_mm_lazy(tlb
->mm
);
1240 tlb_remove_table_one(table
);
1245 (*batch
)->tables
[(*batch
)->nr
++] = table
;
1246 if ((*batch
)->nr
== MAX_TABLE_BATCH
)
1250 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1251 static inline void thp_split_vma(struct vm_area_struct
*vma
)
1255 for (addr
= vma
->vm_start
; addr
< vma
->vm_end
; addr
+= PAGE_SIZE
)
1256 follow_page(vma
, addr
, FOLL_SPLIT
);
1259 static inline void thp_split_mm(struct mm_struct
*mm
)
1261 struct vm_area_struct
*vma
;
1263 for (vma
= mm
->mmap
; vma
!= NULL
; vma
= vma
->vm_next
) {
1265 vma
->vm_flags
&= ~VM_HUGEPAGE
;
1266 vma
->vm_flags
|= VM_NOHUGEPAGE
;
1268 mm
->def_flags
|= VM_NOHUGEPAGE
;
1271 static inline void thp_split_mm(struct mm_struct
*mm
)
1274 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1276 static unsigned long page_table_realloc_pmd(struct mmu_gather
*tlb
,
1277 struct mm_struct
*mm
, pud_t
*pud
,
1278 unsigned long addr
, unsigned long end
)
1280 unsigned long next
, *table
, *new;
1285 pmd
= pmd_offset(pud
, addr
);
1287 next
= pmd_addr_end(addr
, end
);
1289 if (pmd_none_or_clear_bad(pmd
))
1291 table
= (unsigned long *) pmd_deref(*pmd
);
1292 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
1293 if (page_table_with_pgste(page
))
1295 /* Allocate new page table with pgstes */
1296 new = page_table_alloc_pgste(mm
, addr
);
1300 ptl
= pmd_lock(mm
, pmd
);
1301 if (likely((unsigned long *) pmd_deref(*pmd
) == table
)) {
1302 /* Nuke pmd entry pointing to the "short" page table */
1303 pmdp_flush_lazy(mm
, addr
, pmd
);
1305 /* Copy ptes from old table to new table */
1306 memcpy(new, table
, PAGE_SIZE
/2);
1307 clear_table(table
, _PAGE_INVALID
, PAGE_SIZE
/2);
1308 /* Establish new table */
1309 pmd_populate(mm
, pmd
, (pte_t
*) new);
1310 /* Free old table with rcu, there might be a walker! */
1311 page_table_free_rcu(tlb
, table
);
1316 page_table_free_pgste(new);
1319 } while (pmd
++, addr
= next
, addr
!= end
);
1324 static unsigned long page_table_realloc_pud(struct mmu_gather
*tlb
,
1325 struct mm_struct
*mm
, pgd_t
*pgd
,
1326 unsigned long addr
, unsigned long end
)
1331 pud
= pud_offset(pgd
, addr
);
1333 next
= pud_addr_end(addr
, end
);
1334 if (pud_none_or_clear_bad(pud
))
1336 next
= page_table_realloc_pmd(tlb
, mm
, pud
, addr
, next
);
1337 if (unlikely(IS_ERR_VALUE(next
)))
1339 } while (pud
++, addr
= next
, addr
!= end
);
1344 static unsigned long page_table_realloc(struct mmu_gather
*tlb
, struct mm_struct
*mm
,
1345 unsigned long addr
, unsigned long end
)
1350 pgd
= pgd_offset(mm
, addr
);
1352 next
= pgd_addr_end(addr
, end
);
1353 if (pgd_none_or_clear_bad(pgd
))
1355 next
= page_table_realloc_pud(tlb
, mm
, pgd
, addr
, next
);
1356 if (unlikely(IS_ERR_VALUE(next
)))
1358 } while (pgd
++, addr
= next
, addr
!= end
);
1364 * switch on pgstes for its userspace process (for kvm)
1366 int s390_enable_sie(void)
1368 struct task_struct
*tsk
= current
;
1369 struct mm_struct
*mm
= tsk
->mm
;
1370 struct mmu_gather tlb
;
1372 /* Do we have pgstes? if yes, we are done */
1373 if (mm_has_pgste(tsk
->mm
))
1376 down_write(&mm
->mmap_sem
);
1377 /* split thp mappings and disable thp for future mappings */
1379 /* Reallocate the page tables with pgstes */
1380 tlb_gather_mmu(&tlb
, mm
, 0, TASK_SIZE
);
1381 if (!page_table_realloc(&tlb
, mm
, 0, TASK_SIZE
))
1382 mm
->context
.has_pgste
= 1;
1383 tlb_finish_mmu(&tlb
, 0, TASK_SIZE
);
1384 up_write(&mm
->mmap_sem
);
1385 return mm
->context
.has_pgste
? 0 : -ENOMEM
;
1387 EXPORT_SYMBOL_GPL(s390_enable_sie
);
1390 * Enable storage key handling from now on and initialize the storage
1391 * keys with the default key.
1393 void s390_enable_skey(void)
1395 page_table_reset_pgste(current
->mm
, 0, TASK_SIZE
, true);
1397 EXPORT_SYMBOL_GPL(s390_enable_skey
);
1400 * Test and reset if a guest page is dirty
1402 bool gmap_test_and_clear_dirty(unsigned long address
, struct gmap
*gmap
)
1408 pte
= get_locked_pte(gmap
->mm
, address
, &ptl
);
1412 if (ptep_test_and_clear_user_dirty(gmap
->mm
, address
, pte
))
1418 EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty
);
1420 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1421 int pmdp_clear_flush_young(struct vm_area_struct
*vma
, unsigned long address
,
1424 VM_BUG_ON(address
& ~HPAGE_PMD_MASK
);
1425 /* No need to flush TLB
1426 * On s390 reference bits are in storage key and never in TLB */
1427 return pmdp_test_and_clear_young(vma
, address
, pmdp
);
1430 int pmdp_set_access_flags(struct vm_area_struct
*vma
,
1431 unsigned long address
, pmd_t
*pmdp
,
1432 pmd_t entry
, int dirty
)
1434 VM_BUG_ON(address
& ~HPAGE_PMD_MASK
);
1436 entry
= pmd_mkyoung(entry
);
1438 entry
= pmd_mkdirty(entry
);
1439 if (pmd_same(*pmdp
, entry
))
1441 pmdp_invalidate(vma
, address
, pmdp
);
1442 set_pmd_at(vma
->vm_mm
, address
, pmdp
, entry
);
1446 static void pmdp_splitting_flush_sync(void *arg
)
1448 /* Simply deliver the interrupt */
1451 void pmdp_splitting_flush(struct vm_area_struct
*vma
, unsigned long address
,
1454 VM_BUG_ON(address
& ~HPAGE_PMD_MASK
);
1455 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT
,
1456 (unsigned long *) pmdp
)) {
1457 /* need to serialize against gup-fast (IRQ disabled) */
1458 smp_call_function(pmdp_splitting_flush_sync
, NULL
, 1);
1462 void pgtable_trans_huge_deposit(struct mm_struct
*mm
, pmd_t
*pmdp
,
1465 struct list_head
*lh
= (struct list_head
*) pgtable
;
1467 assert_spin_locked(pmd_lockptr(mm
, pmdp
));
1470 if (!pmd_huge_pte(mm
, pmdp
))
1473 list_add(lh
, (struct list_head
*) pmd_huge_pte(mm
, pmdp
));
1474 pmd_huge_pte(mm
, pmdp
) = pgtable
;
1477 pgtable_t
pgtable_trans_huge_withdraw(struct mm_struct
*mm
, pmd_t
*pmdp
)
1479 struct list_head
*lh
;
1483 assert_spin_locked(pmd_lockptr(mm
, pmdp
));
1486 pgtable
= pmd_huge_pte(mm
, pmdp
);
1487 lh
= (struct list_head
*) pgtable
;
1489 pmd_huge_pte(mm
, pmdp
) = NULL
;
1491 pmd_huge_pte(mm
, pmdp
) = (pgtable_t
) lh
->next
;
1494 ptep
= (pte_t
*) pgtable
;
1495 pte_val(*ptep
) = _PAGE_INVALID
;
1497 pte_val(*ptep
) = _PAGE_INVALID
;
1500 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */