Commit | Line | Data |
---|---|---|
1e133ab2 MS |
1 | /* |
2 | * KVM guest address space mapping code | |
3 | * | |
4 | * Copyright IBM Corp. 2007, 2016 | |
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | |
6 | */ | |
7 | ||
8 | #include <linux/kernel.h> | |
9 | #include <linux/mm.h> | |
10 | #include <linux/swap.h> | |
11 | #include <linux/smp.h> | |
12 | #include <linux/spinlock.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/swapops.h> | |
15 | #include <linux/ksm.h> | |
16 | #include <linux/mman.h> | |
17 | ||
18 | #include <asm/pgtable.h> | |
19 | #include <asm/pgalloc.h> | |
20 | #include <asm/gmap.h> | |
21 | #include <asm/tlb.h> | |
22 | ||
23 | /** | |
24 | * gmap_alloc - allocate a guest address space | |
25 | * @mm: pointer to the parent mm_struct | |
9c650d09 | 26 | * @limit: maximum address of the gmap address space |
1e133ab2 MS |
27 | * |
28 | * Returns a guest address space structure. | |
29 | */ | |
30 | struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) | |
31 | { | |
32 | struct gmap *gmap; | |
33 | struct page *page; | |
34 | unsigned long *table; | |
35 | unsigned long etype, atype; | |
36 | ||
37 | if (limit < (1UL << 31)) { | |
38 | limit = (1UL << 31) - 1; | |
39 | atype = _ASCE_TYPE_SEGMENT; | |
40 | etype = _SEGMENT_ENTRY_EMPTY; | |
41 | } else if (limit < (1UL << 42)) { | |
42 | limit = (1UL << 42) - 1; | |
43 | atype = _ASCE_TYPE_REGION3; | |
44 | etype = _REGION3_ENTRY_EMPTY; | |
45 | } else if (limit < (1UL << 53)) { | |
46 | limit = (1UL << 53) - 1; | |
47 | atype = _ASCE_TYPE_REGION2; | |
48 | etype = _REGION2_ENTRY_EMPTY; | |
49 | } else { | |
50 | limit = -1UL; | |
51 | atype = _ASCE_TYPE_REGION1; | |
52 | etype = _REGION1_ENTRY_EMPTY; | |
53 | } | |
54 | gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); | |
55 | if (!gmap) | |
56 | goto out; | |
57 | INIT_LIST_HEAD(&gmap->crst_list); | |
58 | INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); | |
59 | INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); | |
60 | spin_lock_init(&gmap->guest_table_lock); | |
61 | gmap->mm = mm; | |
62 | page = alloc_pages(GFP_KERNEL, 2); | |
63 | if (!page) | |
64 | goto out_free; | |
65 | page->index = 0; | |
66 | list_add(&page->lru, &gmap->crst_list); | |
67 | table = (unsigned long *) page_to_phys(page); | |
68 | crst_table_init(table, etype); | |
69 | gmap->table = table; | |
70 | gmap->asce = atype | _ASCE_TABLE_LENGTH | | |
71 | _ASCE_USER_BITS | __pa(table); | |
72 | gmap->asce_end = limit; | |
73 | down_write(&mm->mmap_sem); | |
74 | list_add(&gmap->list, &mm->context.gmap_list); | |
75 | up_write(&mm->mmap_sem); | |
76 | return gmap; | |
77 | ||
78 | out_free: | |
79 | kfree(gmap); | |
80 | out: | |
81 | return NULL; | |
82 | } | |
83 | EXPORT_SYMBOL_GPL(gmap_alloc); | |
84 | ||
85 | static void gmap_flush_tlb(struct gmap *gmap) | |
86 | { | |
87 | if (MACHINE_HAS_IDTE) | |
88 | __tlb_flush_asce(gmap->mm, gmap->asce); | |
89 | else | |
90 | __tlb_flush_global(); | |
91 | } | |
92 | ||
93 | static void gmap_radix_tree_free(struct radix_tree_root *root) | |
94 | { | |
95 | struct radix_tree_iter iter; | |
96 | unsigned long indices[16]; | |
97 | unsigned long index; | |
98 | void **slot; | |
99 | int i, nr; | |
100 | ||
101 | /* A radix tree is freed by deleting all of its entries */ | |
102 | index = 0; | |
103 | do { | |
104 | nr = 0; | |
105 | radix_tree_for_each_slot(slot, root, &iter, index) { | |
106 | indices[nr] = iter.index; | |
107 | if (++nr == 16) | |
108 | break; | |
109 | } | |
110 | for (i = 0; i < nr; i++) { | |
111 | index = indices[i]; | |
112 | radix_tree_delete(root, index); | |
113 | } | |
114 | } while (nr > 0); | |
115 | } | |
116 | ||
117 | /** | |
118 | * gmap_free - free a guest address space | |
119 | * @gmap: pointer to the guest address space structure | |
120 | */ | |
121 | void gmap_free(struct gmap *gmap) | |
122 | { | |
123 | struct page *page, *next; | |
124 | ||
125 | /* Flush tlb. */ | |
126 | if (MACHINE_HAS_IDTE) | |
127 | __tlb_flush_asce(gmap->mm, gmap->asce); | |
128 | else | |
129 | __tlb_flush_global(); | |
130 | ||
131 | /* Free all segment & region tables. */ | |
132 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) | |
133 | __free_pages(page, 2); | |
134 | gmap_radix_tree_free(&gmap->guest_to_host); | |
135 | gmap_radix_tree_free(&gmap->host_to_guest); | |
136 | down_write(&gmap->mm->mmap_sem); | |
137 | list_del(&gmap->list); | |
138 | up_write(&gmap->mm->mmap_sem); | |
139 | kfree(gmap); | |
140 | } | |
141 | EXPORT_SYMBOL_GPL(gmap_free); | |
142 | ||
143 | /** | |
144 | * gmap_enable - switch primary space to the guest address space | |
145 | * @gmap: pointer to the guest address space structure | |
146 | */ | |
147 | void gmap_enable(struct gmap *gmap) | |
148 | { | |
149 | S390_lowcore.gmap = (unsigned long) gmap; | |
150 | } | |
151 | EXPORT_SYMBOL_GPL(gmap_enable); | |
152 | ||
153 | /** | |
154 | * gmap_disable - switch back to the standard primary address space | |
155 | * @gmap: pointer to the guest address space structure | |
156 | */ | |
157 | void gmap_disable(struct gmap *gmap) | |
158 | { | |
159 | S390_lowcore.gmap = 0UL; | |
160 | } | |
161 | EXPORT_SYMBOL_GPL(gmap_disable); | |
162 | ||
163 | /* | |
164 | * gmap_alloc_table is assumed to be called with mmap_sem held | |
165 | */ | |
166 | static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, | |
167 | unsigned long init, unsigned long gaddr) | |
168 | { | |
169 | struct page *page; | |
170 | unsigned long *new; | |
171 | ||
172 | /* since we dont free the gmap table until gmap_free we can unlock */ | |
173 | page = alloc_pages(GFP_KERNEL, 2); | |
174 | if (!page) | |
175 | return -ENOMEM; | |
176 | new = (unsigned long *) page_to_phys(page); | |
177 | crst_table_init(new, init); | |
178 | spin_lock(&gmap->mm->page_table_lock); | |
179 | if (*table & _REGION_ENTRY_INVALID) { | |
180 | list_add(&page->lru, &gmap->crst_list); | |
181 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | |
182 | (*table & _REGION_ENTRY_TYPE_MASK); | |
183 | page->index = gaddr; | |
184 | page = NULL; | |
185 | } | |
186 | spin_unlock(&gmap->mm->page_table_lock); | |
187 | if (page) | |
188 | __free_pages(page, 2); | |
189 | return 0; | |
190 | } | |
191 | ||
192 | /** | |
193 | * __gmap_segment_gaddr - find virtual address from segment pointer | |
194 | * @entry: pointer to a segment table entry in the guest address space | |
195 | * | |
196 | * Returns the virtual address in the guest address space for the segment | |
197 | */ | |
198 | static unsigned long __gmap_segment_gaddr(unsigned long *entry) | |
199 | { | |
200 | struct page *page; | |
201 | unsigned long offset, mask; | |
202 | ||
203 | offset = (unsigned long) entry / sizeof(unsigned long); | |
204 | offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; | |
205 | mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); | |
206 | page = virt_to_page((void *)((unsigned long) entry & mask)); | |
207 | return page->index + offset; | |
208 | } | |
209 | ||
210 | /** | |
211 | * __gmap_unlink_by_vmaddr - unlink a single segment via a host address | |
212 | * @gmap: pointer to the guest address space structure | |
213 | * @vmaddr: address in the host process address space | |
214 | * | |
215 | * Returns 1 if a TLB flush is required | |
216 | */ | |
217 | static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) | |
218 | { | |
219 | unsigned long *entry; | |
220 | int flush = 0; | |
221 | ||
222 | spin_lock(&gmap->guest_table_lock); | |
223 | entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); | |
224 | if (entry) { | |
225 | flush = (*entry != _SEGMENT_ENTRY_INVALID); | |
226 | *entry = _SEGMENT_ENTRY_INVALID; | |
227 | } | |
228 | spin_unlock(&gmap->guest_table_lock); | |
229 | return flush; | |
230 | } | |
231 | ||
232 | /** | |
233 | * __gmap_unmap_by_gaddr - unmap a single segment via a guest address | |
234 | * @gmap: pointer to the guest address space structure | |
235 | * @gaddr: address in the guest address space | |
236 | * | |
237 | * Returns 1 if a TLB flush is required | |
238 | */ | |
239 | static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) | |
240 | { | |
241 | unsigned long vmaddr; | |
242 | ||
243 | vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, | |
244 | gaddr >> PMD_SHIFT); | |
245 | return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; | |
246 | } | |
247 | ||
248 | /** | |
249 | * gmap_unmap_segment - unmap segment from the guest address space | |
250 | * @gmap: pointer to the guest address space structure | |
251 | * @to: address in the guest address space | |
252 | * @len: length of the memory area to unmap | |
253 | * | |
254 | * Returns 0 if the unmap succeeded, -EINVAL if not. | |
255 | */ | |
256 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |
257 | { | |
258 | unsigned long off; | |
259 | int flush; | |
260 | ||
261 | if ((to | len) & (PMD_SIZE - 1)) | |
262 | return -EINVAL; | |
263 | if (len == 0 || to + len < to) | |
264 | return -EINVAL; | |
265 | ||
266 | flush = 0; | |
267 | down_write(&gmap->mm->mmap_sem); | |
268 | for (off = 0; off < len; off += PMD_SIZE) | |
269 | flush |= __gmap_unmap_by_gaddr(gmap, to + off); | |
270 | up_write(&gmap->mm->mmap_sem); | |
271 | if (flush) | |
272 | gmap_flush_tlb(gmap); | |
273 | return 0; | |
274 | } | |
275 | EXPORT_SYMBOL_GPL(gmap_unmap_segment); | |
276 | ||
277 | /** | |
278 | * gmap_map_segment - map a segment to the guest address space | |
279 | * @gmap: pointer to the guest address space structure | |
280 | * @from: source address in the parent address space | |
281 | * @to: target address in the guest address space | |
282 | * @len: length of the memory area to map | |
283 | * | |
284 | * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. | |
285 | */ | |
286 | int gmap_map_segment(struct gmap *gmap, unsigned long from, | |
287 | unsigned long to, unsigned long len) | |
288 | { | |
289 | unsigned long off; | |
290 | int flush; | |
291 | ||
292 | if ((from | to | len) & (PMD_SIZE - 1)) | |
293 | return -EINVAL; | |
294 | if (len == 0 || from + len < from || to + len < to || | |
9c650d09 | 295 | from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) |
1e133ab2 MS |
296 | return -EINVAL; |
297 | ||
298 | flush = 0; | |
299 | down_write(&gmap->mm->mmap_sem); | |
300 | for (off = 0; off < len; off += PMD_SIZE) { | |
301 | /* Remove old translation */ | |
302 | flush |= __gmap_unmap_by_gaddr(gmap, to + off); | |
303 | /* Store new translation */ | |
304 | if (radix_tree_insert(&gmap->guest_to_host, | |
305 | (to + off) >> PMD_SHIFT, | |
306 | (void *) from + off)) | |
307 | break; | |
308 | } | |
309 | up_write(&gmap->mm->mmap_sem); | |
310 | if (flush) | |
311 | gmap_flush_tlb(gmap); | |
312 | if (off >= len) | |
313 | return 0; | |
314 | gmap_unmap_segment(gmap, to, len); | |
315 | return -ENOMEM; | |
316 | } | |
317 | EXPORT_SYMBOL_GPL(gmap_map_segment); | |
318 | ||
319 | /** | |
320 | * __gmap_translate - translate a guest address to a user space address | |
321 | * @gmap: pointer to guest mapping meta data structure | |
322 | * @gaddr: guest address | |
323 | * | |
324 | * Returns user space address which corresponds to the guest address or | |
325 | * -EFAULT if no such mapping exists. | |
326 | * This function does not establish potentially missing page table entries. | |
327 | * The mmap_sem of the mm that belongs to the address space must be held | |
328 | * when this function gets called. | |
329 | */ | |
330 | unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) | |
331 | { | |
332 | unsigned long vmaddr; | |
333 | ||
334 | vmaddr = (unsigned long) | |
335 | radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); | |
336 | return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; | |
337 | } | |
338 | EXPORT_SYMBOL_GPL(__gmap_translate); | |
339 | ||
340 | /** | |
341 | * gmap_translate - translate a guest address to a user space address | |
342 | * @gmap: pointer to guest mapping meta data structure | |
343 | * @gaddr: guest address | |
344 | * | |
345 | * Returns user space address which corresponds to the guest address or | |
346 | * -EFAULT if no such mapping exists. | |
347 | * This function does not establish potentially missing page table entries. | |
348 | */ | |
349 | unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) | |
350 | { | |
351 | unsigned long rc; | |
352 | ||
353 | down_read(&gmap->mm->mmap_sem); | |
354 | rc = __gmap_translate(gmap, gaddr); | |
355 | up_read(&gmap->mm->mmap_sem); | |
356 | return rc; | |
357 | } | |
358 | EXPORT_SYMBOL_GPL(gmap_translate); | |
359 | ||
360 | /** | |
361 | * gmap_unlink - disconnect a page table from the gmap shadow tables | |
362 | * @gmap: pointer to guest mapping meta data structure | |
363 | * @table: pointer to the host page table | |
364 | * @vmaddr: vm address associated with the host page table | |
365 | */ | |
366 | void gmap_unlink(struct mm_struct *mm, unsigned long *table, | |
367 | unsigned long vmaddr) | |
368 | { | |
369 | struct gmap *gmap; | |
370 | int flush; | |
371 | ||
372 | list_for_each_entry(gmap, &mm->context.gmap_list, list) { | |
373 | flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); | |
374 | if (flush) | |
375 | gmap_flush_tlb(gmap); | |
376 | } | |
377 | } | |
378 | ||
379 | /** | |
380 | * gmap_link - set up shadow page tables to connect a host to a guest address | |
381 | * @gmap: pointer to guest mapping meta data structure | |
382 | * @gaddr: guest address | |
383 | * @vmaddr: vm address | |
384 | * | |
385 | * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT | |
386 | * if the vm address is already mapped to a different guest segment. | |
387 | * The mmap_sem of the mm that belongs to the address space must be held | |
388 | * when this function gets called. | |
389 | */ | |
390 | int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | |
391 | { | |
392 | struct mm_struct *mm; | |
393 | unsigned long *table; | |
394 | spinlock_t *ptl; | |
395 | pgd_t *pgd; | |
396 | pud_t *pud; | |
397 | pmd_t *pmd; | |
398 | int rc; | |
399 | ||
400 | /* Create higher level tables in the gmap page table */ | |
401 | table = gmap->table; | |
402 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { | |
403 | table += (gaddr >> 53) & 0x7ff; | |
404 | if ((*table & _REGION_ENTRY_INVALID) && | |
405 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, | |
406 | gaddr & 0xffe0000000000000UL)) | |
407 | return -ENOMEM; | |
408 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
409 | } | |
410 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { | |
411 | table += (gaddr >> 42) & 0x7ff; | |
412 | if ((*table & _REGION_ENTRY_INVALID) && | |
413 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, | |
414 | gaddr & 0xfffffc0000000000UL)) | |
415 | return -ENOMEM; | |
416 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
417 | } | |
418 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { | |
419 | table += (gaddr >> 31) & 0x7ff; | |
420 | if ((*table & _REGION_ENTRY_INVALID) && | |
421 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, | |
422 | gaddr & 0xffffffff80000000UL)) | |
423 | return -ENOMEM; | |
424 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
425 | } | |
426 | table += (gaddr >> 20) & 0x7ff; | |
427 | /* Walk the parent mm page table */ | |
428 | mm = gmap->mm; | |
429 | pgd = pgd_offset(mm, vmaddr); | |
430 | VM_BUG_ON(pgd_none(*pgd)); | |
431 | pud = pud_offset(pgd, vmaddr); | |
432 | VM_BUG_ON(pud_none(*pud)); | |
433 | pmd = pmd_offset(pud, vmaddr); | |
434 | VM_BUG_ON(pmd_none(*pmd)); | |
435 | /* large pmds cannot yet be handled */ | |
436 | if (pmd_large(*pmd)) | |
437 | return -EFAULT; | |
438 | /* Link gmap segment table entry location to page table. */ | |
439 | rc = radix_tree_preload(GFP_KERNEL); | |
440 | if (rc) | |
441 | return rc; | |
442 | ptl = pmd_lock(mm, pmd); | |
443 | spin_lock(&gmap->guest_table_lock); | |
444 | if (*table == _SEGMENT_ENTRY_INVALID) { | |
445 | rc = radix_tree_insert(&gmap->host_to_guest, | |
446 | vmaddr >> PMD_SHIFT, table); | |
447 | if (!rc) | |
448 | *table = pmd_val(*pmd); | |
449 | } else | |
450 | rc = 0; | |
451 | spin_unlock(&gmap->guest_table_lock); | |
452 | spin_unlock(ptl); | |
453 | radix_tree_preload_end(); | |
454 | return rc; | |
455 | } | |
456 | ||
457 | /** | |
458 | * gmap_fault - resolve a fault on a guest address | |
459 | * @gmap: pointer to guest mapping meta data structure | |
460 | * @gaddr: guest address | |
461 | * @fault_flags: flags to pass down to handle_mm_fault() | |
462 | * | |
463 | * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT | |
464 | * if the vm address is already mapped to a different guest segment. | |
465 | */ | |
466 | int gmap_fault(struct gmap *gmap, unsigned long gaddr, | |
467 | unsigned int fault_flags) | |
468 | { | |
469 | unsigned long vmaddr; | |
470 | int rc; | |
471 | bool unlocked; | |
472 | ||
473 | down_read(&gmap->mm->mmap_sem); | |
474 | ||
475 | retry: | |
476 | unlocked = false; | |
477 | vmaddr = __gmap_translate(gmap, gaddr); | |
478 | if (IS_ERR_VALUE(vmaddr)) { | |
479 | rc = vmaddr; | |
480 | goto out_up; | |
481 | } | |
482 | if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, | |
483 | &unlocked)) { | |
484 | rc = -EFAULT; | |
485 | goto out_up; | |
486 | } | |
487 | /* | |
488 | * In the case that fixup_user_fault unlocked the mmap_sem during | |
489 | * faultin redo __gmap_translate to not race with a map/unmap_segment. | |
490 | */ | |
491 | if (unlocked) | |
492 | goto retry; | |
493 | ||
494 | rc = __gmap_link(gmap, gaddr, vmaddr); | |
495 | out_up: | |
496 | up_read(&gmap->mm->mmap_sem); | |
497 | return rc; | |
498 | } | |
499 | EXPORT_SYMBOL_GPL(gmap_fault); | |
500 | ||
501 | /* | |
502 | * this function is assumed to be called with mmap_sem held | |
503 | */ | |
504 | void __gmap_zap(struct gmap *gmap, unsigned long gaddr) | |
505 | { | |
506 | unsigned long vmaddr; | |
507 | spinlock_t *ptl; | |
508 | pte_t *ptep; | |
509 | ||
510 | /* Find the vm address for the guest address */ | |
511 | vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, | |
512 | gaddr >> PMD_SHIFT); | |
513 | if (vmaddr) { | |
514 | vmaddr |= gaddr & ~PMD_MASK; | |
515 | /* Get pointer to the page table entry */ | |
516 | ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); | |
517 | if (likely(ptep)) | |
518 | ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); | |
519 | pte_unmap_unlock(ptep, ptl); | |
520 | } | |
521 | } | |
522 | EXPORT_SYMBOL_GPL(__gmap_zap); | |
523 | ||
524 | void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) | |
525 | { | |
526 | unsigned long gaddr, vmaddr, size; | |
527 | struct vm_area_struct *vma; | |
528 | ||
529 | down_read(&gmap->mm->mmap_sem); | |
530 | for (gaddr = from; gaddr < to; | |
531 | gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { | |
532 | /* Find the vm address for the guest address */ | |
533 | vmaddr = (unsigned long) | |
534 | radix_tree_lookup(&gmap->guest_to_host, | |
535 | gaddr >> PMD_SHIFT); | |
536 | if (!vmaddr) | |
537 | continue; | |
538 | vmaddr |= gaddr & ~PMD_MASK; | |
539 | /* Find vma in the parent mm */ | |
540 | vma = find_vma(gmap->mm, vmaddr); | |
541 | size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); | |
542 | zap_page_range(vma, vmaddr, size, NULL); | |
543 | } | |
544 | up_read(&gmap->mm->mmap_sem); | |
545 | } | |
546 | EXPORT_SYMBOL_GPL(gmap_discard); | |
547 | ||
548 | static LIST_HEAD(gmap_notifier_list); | |
549 | static DEFINE_SPINLOCK(gmap_notifier_lock); | |
550 | ||
551 | /** | |
552 | * gmap_register_ipte_notifier - register a pte invalidation callback | |
553 | * @nb: pointer to the gmap notifier block | |
554 | */ | |
555 | void gmap_register_ipte_notifier(struct gmap_notifier *nb) | |
556 | { | |
557 | spin_lock(&gmap_notifier_lock); | |
558 | list_add(&nb->list, &gmap_notifier_list); | |
559 | spin_unlock(&gmap_notifier_lock); | |
560 | } | |
561 | EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); | |
562 | ||
563 | /** | |
564 | * gmap_unregister_ipte_notifier - remove a pte invalidation callback | |
565 | * @nb: pointer to the gmap notifier block | |
566 | */ | |
567 | void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) | |
568 | { | |
569 | spin_lock(&gmap_notifier_lock); | |
570 | list_del_init(&nb->list); | |
571 | spin_unlock(&gmap_notifier_lock); | |
572 | } | |
573 | EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); | |
574 | ||
414d3b07 MS |
575 | /** |
576 | * gmap_call_notifier - call all registered invalidation callbacks | |
577 | * @gmap: pointer to guest mapping meta data structure | |
578 | * @start: start virtual address in the guest address space | |
579 | * @end: end virtual address in the guest address space | |
580 | */ | |
581 | static void gmap_call_notifier(struct gmap *gmap, unsigned long start, | |
582 | unsigned long end) | |
583 | { | |
584 | struct gmap_notifier *nb; | |
585 | ||
586 | list_for_each_entry(nb, &gmap_notifier_list, list) | |
587 | nb->notifier_call(gmap, start, end); | |
588 | } | |
589 | ||
1e133ab2 MS |
590 | /** |
591 | * gmap_ipte_notify - mark a range of ptes for invalidation notification | |
592 | * @gmap: pointer to guest mapping meta data structure | |
593 | * @gaddr: virtual address in the guest address space | |
594 | * @len: size of area | |
595 | * | |
596 | * Returns 0 if for each page in the given range a gmap mapping exists and | |
597 | * the invalidation notification could be set. If the gmap mapping is missing | |
598 | * for one or more pages -EFAULT is returned. If no memory could be allocated | |
599 | * -ENOMEM is returned. This function establishes missing page table entries. | |
600 | */ | |
601 | int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) | |
602 | { | |
603 | unsigned long addr; | |
604 | spinlock_t *ptl; | |
605 | pte_t *ptep; | |
606 | bool unlocked; | |
607 | int rc = 0; | |
608 | ||
609 | if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) | |
610 | return -EINVAL; | |
611 | down_read(&gmap->mm->mmap_sem); | |
612 | while (len) { | |
613 | unlocked = false; | |
614 | /* Convert gmap address and connect the page tables */ | |
615 | addr = __gmap_translate(gmap, gaddr); | |
616 | if (IS_ERR_VALUE(addr)) { | |
617 | rc = addr; | |
618 | break; | |
619 | } | |
620 | /* Get the page mapped */ | |
621 | if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, | |
622 | &unlocked)) { | |
623 | rc = -EFAULT; | |
624 | break; | |
625 | } | |
626 | /* While trying to map mmap_sem got unlocked. Let us retry */ | |
627 | if (unlocked) | |
628 | continue; | |
629 | rc = __gmap_link(gmap, gaddr, addr); | |
630 | if (rc) | |
631 | break; | |
632 | /* Walk the process page table, lock and get pte pointer */ | |
633 | ptep = get_locked_pte(gmap->mm, addr, &ptl); | |
634 | VM_BUG_ON(!ptep); | |
635 | /* Set notification bit in the pgste of the pte */ | |
636 | if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { | |
637 | ptep_set_notify(gmap->mm, addr, ptep); | |
638 | gaddr += PAGE_SIZE; | |
639 | len -= PAGE_SIZE; | |
640 | } | |
641 | pte_unmap_unlock(ptep, ptl); | |
642 | } | |
643 | up_read(&gmap->mm->mmap_sem); | |
644 | return rc; | |
645 | } | |
646 | EXPORT_SYMBOL_GPL(gmap_ipte_notify); | |
647 | ||
648 | /** | |
649 | * ptep_notify - call all invalidation callbacks for a specific pte. | |
650 | * @mm: pointer to the process mm_struct | |
651 | * @addr: virtual address in the process address space | |
652 | * @pte: pointer to the page table entry | |
653 | * | |
654 | * This function is assumed to be called with the page table lock held | |
655 | * for the pte to notify. | |
656 | */ | |
657 | void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) | |
658 | { | |
659 | unsigned long offset, gaddr; | |
660 | unsigned long *table; | |
1e133ab2 MS |
661 | struct gmap *gmap; |
662 | ||
663 | offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); | |
664 | offset = offset * (4096 / sizeof(pte_t)); | |
665 | spin_lock(&gmap_notifier_lock); | |
666 | list_for_each_entry(gmap, &mm->context.gmap_list, list) { | |
667 | table = radix_tree_lookup(&gmap->host_to_guest, | |
668 | vmaddr >> PMD_SHIFT); | |
669 | if (!table) | |
670 | continue; | |
671 | gaddr = __gmap_segment_gaddr(table) + offset; | |
414d3b07 | 672 | gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); |
1e133ab2 MS |
673 | } |
674 | spin_unlock(&gmap_notifier_lock); | |
675 | } | |
676 | EXPORT_SYMBOL_GPL(ptep_notify); | |
677 | ||
678 | static inline void thp_split_mm(struct mm_struct *mm) | |
679 | { | |
680 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
681 | struct vm_area_struct *vma; | |
682 | unsigned long addr; | |
683 | ||
684 | for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { | |
685 | for (addr = vma->vm_start; | |
686 | addr < vma->vm_end; | |
687 | addr += PAGE_SIZE) | |
688 | follow_page(vma, addr, FOLL_SPLIT); | |
689 | vma->vm_flags &= ~VM_HUGEPAGE; | |
690 | vma->vm_flags |= VM_NOHUGEPAGE; | |
691 | } | |
692 | mm->def_flags |= VM_NOHUGEPAGE; | |
693 | #endif | |
694 | } | |
695 | ||
696 | /* | |
697 | * switch on pgstes for its userspace process (for kvm) | |
698 | */ | |
699 | int s390_enable_sie(void) | |
700 | { | |
701 | struct mm_struct *mm = current->mm; | |
702 | ||
703 | /* Do we have pgstes? if yes, we are done */ | |
704 | if (mm_has_pgste(mm)) | |
705 | return 0; | |
706 | /* Fail if the page tables are 2K */ | |
707 | if (!mm_alloc_pgste(mm)) | |
708 | return -EINVAL; | |
709 | down_write(&mm->mmap_sem); | |
710 | mm->context.has_pgste = 1; | |
711 | /* split thp mappings and disable thp for future mappings */ | |
712 | thp_split_mm(mm); | |
713 | up_write(&mm->mmap_sem); | |
714 | return 0; | |
715 | } | |
716 | EXPORT_SYMBOL_GPL(s390_enable_sie); | |
717 | ||
718 | /* | |
719 | * Enable storage key handling from now on and initialize the storage | |
720 | * keys with the default key. | |
721 | */ | |
722 | static int __s390_enable_skey(pte_t *pte, unsigned long addr, | |
723 | unsigned long next, struct mm_walk *walk) | |
724 | { | |
725 | /* | |
726 | * Remove all zero page mappings, | |
727 | * after establishing a policy to forbid zero page mappings | |
728 | * following faults for that page will get fresh anonymous pages | |
729 | */ | |
730 | if (is_zero_pfn(pte_pfn(*pte))) | |
731 | ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); | |
732 | /* Clear storage key */ | |
733 | ptep_zap_key(walk->mm, addr, pte); | |
734 | return 0; | |
735 | } | |
736 | ||
737 | int s390_enable_skey(void) | |
738 | { | |
739 | struct mm_walk walk = { .pte_entry = __s390_enable_skey }; | |
740 | struct mm_struct *mm = current->mm; | |
741 | struct vm_area_struct *vma; | |
742 | int rc = 0; | |
743 | ||
744 | down_write(&mm->mmap_sem); | |
745 | if (mm_use_skey(mm)) | |
746 | goto out_up; | |
747 | ||
748 | mm->context.use_skey = 1; | |
749 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
750 | if (ksm_madvise(vma, vma->vm_start, vma->vm_end, | |
751 | MADV_UNMERGEABLE, &vma->vm_flags)) { | |
752 | mm->context.use_skey = 0; | |
753 | rc = -ENOMEM; | |
754 | goto out_up; | |
755 | } | |
756 | } | |
757 | mm->def_flags &= ~VM_MERGEABLE; | |
758 | ||
759 | walk.mm = mm; | |
760 | walk_page_range(0, TASK_SIZE, &walk); | |
761 | ||
762 | out_up: | |
763 | up_write(&mm->mmap_sem); | |
764 | return rc; | |
765 | } | |
766 | EXPORT_SYMBOL_GPL(s390_enable_skey); | |
767 | ||
768 | /* | |
769 | * Reset CMMA state, make all pages stable again. | |
770 | */ | |
771 | static int __s390_reset_cmma(pte_t *pte, unsigned long addr, | |
772 | unsigned long next, struct mm_walk *walk) | |
773 | { | |
774 | ptep_zap_unused(walk->mm, addr, pte, 1); | |
775 | return 0; | |
776 | } | |
777 | ||
778 | void s390_reset_cmma(struct mm_struct *mm) | |
779 | { | |
780 | struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; | |
781 | ||
782 | down_write(&mm->mmap_sem); | |
783 | walk.mm = mm; | |
784 | walk_page_range(0, TASK_SIZE, &walk); | |
785 | up_write(&mm->mmap_sem); | |
786 | } | |
787 | EXPORT_SYMBOL_GPL(s390_reset_cmma); |