Commit | Line | Data |
---|---|---|
749cf76c CD |
1 | /* |
2 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | |
3 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License, version 2, as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
17 | */ | |
342cd0ab CD |
18 | |
19 | #include <linux/mman.h> | |
20 | #include <linux/kvm_host.h> | |
21 | #include <linux/io.h> | |
22 | #include <asm/idmap.h> | |
23 | #include <asm/pgalloc.h> | |
94f8e641 | 24 | #include <asm/cacheflush.h> |
342cd0ab CD |
25 | #include <asm/kvm_arm.h> |
26 | #include <asm/kvm_mmu.h> | |
d5d8184d | 27 | #include <asm/kvm_asm.h> |
94f8e641 | 28 | #include <asm/kvm_emulate.h> |
342cd0ab | 29 | #include <asm/mach/map.h> |
d5d8184d CD |
30 | #include <trace/events/kvm.h> |
31 | ||
32 | #include "trace.h" | |
342cd0ab CD |
33 | |
34 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; | |
35 | ||
36 | static DEFINE_MUTEX(kvm_hyp_pgd_mutex); | |
37 | ||
d5d8184d CD |
38 | static void kvm_tlb_flush_vmid(struct kvm *kvm) |
39 | { | |
40 | kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); | |
41 | } | |
42 | ||
342cd0ab CD |
43 | static void kvm_set_pte(pte_t *pte, pte_t new_pte) |
44 | { | |
45 | pte_val(*pte) = new_pte; | |
46 | /* | |
47 | * flush_pmd_entry just takes a void pointer and cleans the necessary | |
48 | * cache entries, so we can reuse the function for ptes. | |
49 | */ | |
50 | flush_pmd_entry(pte); | |
51 | } | |
52 | ||
d5d8184d CD |
53 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
54 | int min, int max) | |
55 | { | |
56 | void *page; | |
57 | ||
58 | BUG_ON(max > KVM_NR_MEM_OBJS); | |
59 | if (cache->nobjs >= min) | |
60 | return 0; | |
61 | while (cache->nobjs < max) { | |
62 | page = (void *)__get_free_page(PGALLOC_GFP); | |
63 | if (!page) | |
64 | return -ENOMEM; | |
65 | cache->objects[cache->nobjs++] = page; | |
66 | } | |
67 | return 0; | |
68 | } | |
69 | ||
70 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | |
71 | { | |
72 | while (mc->nobjs) | |
73 | free_page((unsigned long)mc->objects[--mc->nobjs]); | |
74 | } | |
75 | ||
76 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) | |
77 | { | |
78 | void *p; | |
79 | ||
80 | BUG_ON(!mc || !mc->nobjs); | |
81 | p = mc->objects[--mc->nobjs]; | |
82 | return p; | |
83 | } | |
84 | ||
342cd0ab CD |
85 | static void free_ptes(pmd_t *pmd, unsigned long addr) |
86 | { | |
87 | pte_t *pte; | |
88 | unsigned int i; | |
89 | ||
90 | for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { | |
91 | if (!pmd_none(*pmd) && pmd_table(*pmd)) { | |
92 | pte = pte_offset_kernel(pmd, addr); | |
93 | pte_free_kernel(NULL, pte); | |
94 | } | |
95 | pmd++; | |
96 | } | |
97 | } | |
98 | ||
99 | /** | |
100 | * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables | |
101 | * | |
102 | * Assumes this is a page table used strictly in Hyp-mode and therefore contains | |
103 | * only mappings in the kernel memory area, which is above PAGE_OFFSET. | |
104 | */ | |
105 | void free_hyp_pmds(void) | |
106 | { | |
107 | pgd_t *pgd; | |
108 | pud_t *pud; | |
109 | pmd_t *pmd; | |
110 | unsigned long addr; | |
111 | ||
112 | mutex_lock(&kvm_hyp_pgd_mutex); | |
113 | for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { | |
114 | pgd = hyp_pgd + pgd_index(addr); | |
115 | pud = pud_offset(pgd, addr); | |
116 | ||
117 | if (pud_none(*pud)) | |
118 | continue; | |
119 | BUG_ON(pud_bad(*pud)); | |
120 | ||
121 | pmd = pmd_offset(pud, addr); | |
122 | free_ptes(pmd, addr); | |
123 | pmd_free(NULL, pmd); | |
124 | pud_clear(pud); | |
125 | } | |
126 | mutex_unlock(&kvm_hyp_pgd_mutex); | |
127 | } | |
128 | ||
129 | static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, | |
130 | unsigned long end) | |
131 | { | |
132 | pte_t *pte; | |
133 | unsigned long addr; | |
134 | struct page *page; | |
135 | ||
136 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | |
137 | pte = pte_offset_kernel(pmd, addr); | |
138 | BUG_ON(!virt_addr_valid(addr)); | |
139 | page = virt_to_page(addr); | |
140 | kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); | |
141 | } | |
142 | } | |
143 | ||
144 | static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, | |
145 | unsigned long end, | |
146 | unsigned long *pfn_base) | |
147 | { | |
148 | pte_t *pte; | |
149 | unsigned long addr; | |
150 | ||
151 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | |
152 | pte = pte_offset_kernel(pmd, addr); | |
153 | BUG_ON(pfn_valid(*pfn_base)); | |
154 | kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); | |
155 | (*pfn_base)++; | |
156 | } | |
157 | } | |
158 | ||
159 | static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, | |
160 | unsigned long end, unsigned long *pfn_base) | |
161 | { | |
162 | pmd_t *pmd; | |
163 | pte_t *pte; | |
164 | unsigned long addr, next; | |
165 | ||
166 | for (addr = start; addr < end; addr = next) { | |
167 | pmd = pmd_offset(pud, addr); | |
168 | ||
169 | BUG_ON(pmd_sect(*pmd)); | |
170 | ||
171 | if (pmd_none(*pmd)) { | |
172 | pte = pte_alloc_one_kernel(NULL, addr); | |
173 | if (!pte) { | |
174 | kvm_err("Cannot allocate Hyp pte\n"); | |
175 | return -ENOMEM; | |
176 | } | |
177 | pmd_populate_kernel(NULL, pmd, pte); | |
178 | } | |
179 | ||
180 | next = pmd_addr_end(addr, end); | |
181 | ||
182 | /* | |
183 | * If pfn_base is NULL, we map kernel pages into HYP with the | |
184 | * virtual address. Otherwise, this is considered an I/O | |
185 | * mapping and we map the physical region starting at | |
186 | * *pfn_base to [start, end[. | |
187 | */ | |
188 | if (!pfn_base) | |
189 | create_hyp_pte_mappings(pmd, addr, next); | |
190 | else | |
191 | create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); | |
192 | } | |
193 | ||
194 | return 0; | |
195 | } | |
196 | ||
197 | static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) | |
198 | { | |
199 | unsigned long start = (unsigned long)from; | |
200 | unsigned long end = (unsigned long)to; | |
201 | pgd_t *pgd; | |
202 | pud_t *pud; | |
203 | pmd_t *pmd; | |
204 | unsigned long addr, next; | |
205 | int err = 0; | |
206 | ||
207 | BUG_ON(start > end); | |
208 | if (start < PAGE_OFFSET) | |
209 | return -EINVAL; | |
210 | ||
211 | mutex_lock(&kvm_hyp_pgd_mutex); | |
212 | for (addr = start; addr < end; addr = next) { | |
213 | pgd = hyp_pgd + pgd_index(addr); | |
214 | pud = pud_offset(pgd, addr); | |
215 | ||
216 | if (pud_none_or_clear_bad(pud)) { | |
217 | pmd = pmd_alloc_one(NULL, addr); | |
218 | if (!pmd) { | |
219 | kvm_err("Cannot allocate Hyp pmd\n"); | |
220 | err = -ENOMEM; | |
221 | goto out; | |
222 | } | |
223 | pud_populate(NULL, pud, pmd); | |
224 | } | |
225 | ||
226 | next = pgd_addr_end(addr, end); | |
227 | err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); | |
228 | if (err) | |
229 | goto out; | |
230 | } | |
231 | out: | |
232 | mutex_unlock(&kvm_hyp_pgd_mutex); | |
233 | return err; | |
234 | } | |
235 | ||
236 | /** | |
237 | * create_hyp_mappings - map a kernel virtual address range in Hyp mode | |
238 | * @from: The virtual kernel start address of the range | |
239 | * @to: The virtual kernel end address of the range (exclusive) | |
240 | * | |
241 | * The same virtual address as the kernel virtual address is also used in | |
242 | * Hyp-mode mapping to the same underlying physical pages. | |
243 | * | |
244 | * Note: Wrapping around zero in the "to" address is not supported. | |
245 | */ | |
246 | int create_hyp_mappings(void *from, void *to) | |
247 | { | |
248 | return __create_hyp_mappings(from, to, NULL); | |
249 | } | |
250 | ||
251 | /** | |
252 | * create_hyp_io_mappings - map a physical IO range in Hyp mode | |
253 | * @from: The virtual HYP start address of the range | |
254 | * @to: The virtual HYP end address of the range (exclusive) | |
255 | * @addr: The physical start address which gets mapped | |
256 | */ | |
257 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) | |
258 | { | |
259 | unsigned long pfn = __phys_to_pfn(addr); | |
260 | return __create_hyp_mappings(from, to, &pfn); | |
261 | } | |
262 | ||
d5d8184d CD |
263 | /** |
264 | * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. | |
265 | * @kvm: The KVM struct pointer for the VM. | |
266 | * | |
267 | * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can | |
268 | * support either full 40-bit input addresses or limited to 32-bit input | |
269 | * addresses). Clears the allocated pages. | |
270 | * | |
271 | * Note we don't need locking here as this is only called when the VM is | |
272 | * created, which can only be done once. | |
273 | */ | |
274 | int kvm_alloc_stage2_pgd(struct kvm *kvm) | |
275 | { | |
276 | pgd_t *pgd; | |
277 | ||
278 | if (kvm->arch.pgd != NULL) { | |
279 | kvm_err("kvm_arch already initialized?\n"); | |
280 | return -EINVAL; | |
281 | } | |
282 | ||
283 | pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); | |
284 | if (!pgd) | |
285 | return -ENOMEM; | |
286 | ||
287 | /* stage-2 pgd must be aligned to its size */ | |
288 | VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); | |
289 | ||
290 | memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); | |
291 | clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); | |
292 | kvm->arch.pgd = pgd; | |
293 | ||
294 | return 0; | |
295 | } | |
296 | ||
297 | static void clear_pud_entry(pud_t *pud) | |
298 | { | |
299 | pmd_t *pmd_table = pmd_offset(pud, 0); | |
300 | pud_clear(pud); | |
301 | pmd_free(NULL, pmd_table); | |
302 | put_page(virt_to_page(pud)); | |
303 | } | |
304 | ||
305 | static void clear_pmd_entry(pmd_t *pmd) | |
306 | { | |
307 | pte_t *pte_table = pte_offset_kernel(pmd, 0); | |
308 | pmd_clear(pmd); | |
309 | pte_free_kernel(NULL, pte_table); | |
310 | put_page(virt_to_page(pmd)); | |
311 | } | |
312 | ||
313 | static bool pmd_empty(pmd_t *pmd) | |
314 | { | |
315 | struct page *pmd_page = virt_to_page(pmd); | |
316 | return page_count(pmd_page) == 1; | |
317 | } | |
318 | ||
319 | static void clear_pte_entry(pte_t *pte) | |
320 | { | |
321 | if (pte_present(*pte)) { | |
322 | kvm_set_pte(pte, __pte(0)); | |
323 | put_page(virt_to_page(pte)); | |
324 | } | |
325 | } | |
326 | ||
327 | static bool pte_empty(pte_t *pte) | |
328 | { | |
329 | struct page *pte_page = virt_to_page(pte); | |
330 | return page_count(pte_page) == 1; | |
331 | } | |
332 | ||
333 | /** | |
334 | * unmap_stage2_range -- Clear stage2 page table entries to unmap a range | |
335 | * @kvm: The VM pointer | |
336 | * @start: The intermediate physical base address of the range to unmap | |
337 | * @size: The size of the area to unmap | |
338 | * | |
339 | * Clear a range of stage-2 mappings, lowering the various ref-counts. Must | |
340 | * be called while holding mmu_lock (unless for freeing the stage2 pgd before | |
341 | * destroying the VM), otherwise another faulting VCPU may come in and mess | |
342 | * with things behind our backs. | |
343 | */ | |
344 | static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) | |
345 | { | |
346 | pgd_t *pgd; | |
347 | pud_t *pud; | |
348 | pmd_t *pmd; | |
349 | pte_t *pte; | |
350 | phys_addr_t addr = start, end = start + size; | |
351 | u64 range; | |
352 | ||
353 | while (addr < end) { | |
354 | pgd = kvm->arch.pgd + pgd_index(addr); | |
355 | pud = pud_offset(pgd, addr); | |
356 | if (pud_none(*pud)) { | |
357 | addr += PUD_SIZE; | |
358 | continue; | |
359 | } | |
360 | ||
361 | pmd = pmd_offset(pud, addr); | |
362 | if (pmd_none(*pmd)) { | |
363 | addr += PMD_SIZE; | |
364 | continue; | |
365 | } | |
366 | ||
367 | pte = pte_offset_kernel(pmd, addr); | |
368 | clear_pte_entry(pte); | |
369 | range = PAGE_SIZE; | |
370 | ||
371 | /* If we emptied the pte, walk back up the ladder */ | |
372 | if (pte_empty(pte)) { | |
373 | clear_pmd_entry(pmd); | |
374 | range = PMD_SIZE; | |
375 | if (pmd_empty(pmd)) { | |
376 | clear_pud_entry(pud); | |
377 | range = PUD_SIZE; | |
378 | } | |
379 | } | |
380 | ||
381 | addr += range; | |
382 | } | |
383 | } | |
384 | ||
385 | /** | |
386 | * kvm_free_stage2_pgd - free all stage-2 tables | |
387 | * @kvm: The KVM struct pointer for the VM. | |
388 | * | |
389 | * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all | |
390 | * underlying level-2 and level-3 tables before freeing the actual level-1 table | |
391 | * and setting the struct pointer to NULL. | |
392 | * | |
393 | * Note we don't need locking here as this is only called when the VM is | |
394 | * destroyed, which can only be done once. | |
395 | */ | |
396 | void kvm_free_stage2_pgd(struct kvm *kvm) | |
397 | { | |
398 | if (kvm->arch.pgd == NULL) | |
399 | return; | |
400 | ||
401 | unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); | |
402 | free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); | |
403 | kvm->arch.pgd = NULL; | |
404 | } | |
405 | ||
406 | ||
407 | static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | |
408 | phys_addr_t addr, const pte_t *new_pte, bool iomap) | |
409 | { | |
410 | pgd_t *pgd; | |
411 | pud_t *pud; | |
412 | pmd_t *pmd; | |
413 | pte_t *pte, old_pte; | |
414 | ||
415 | /* Create 2nd stage page table mapping - Level 1 */ | |
416 | pgd = kvm->arch.pgd + pgd_index(addr); | |
417 | pud = pud_offset(pgd, addr); | |
418 | if (pud_none(*pud)) { | |
419 | if (!cache) | |
420 | return 0; /* ignore calls from kvm_set_spte_hva */ | |
421 | pmd = mmu_memory_cache_alloc(cache); | |
422 | pud_populate(NULL, pud, pmd); | |
423 | pmd += pmd_index(addr); | |
424 | get_page(virt_to_page(pud)); | |
425 | } else | |
426 | pmd = pmd_offset(pud, addr); | |
427 | ||
428 | /* Create 2nd stage page table mapping - Level 2 */ | |
429 | if (pmd_none(*pmd)) { | |
430 | if (!cache) | |
431 | return 0; /* ignore calls from kvm_set_spte_hva */ | |
432 | pte = mmu_memory_cache_alloc(cache); | |
433 | clean_pte_table(pte); | |
434 | pmd_populate_kernel(NULL, pmd, pte); | |
435 | pte += pte_index(addr); | |
436 | get_page(virt_to_page(pmd)); | |
437 | } else | |
438 | pte = pte_offset_kernel(pmd, addr); | |
439 | ||
440 | if (iomap && pte_present(*pte)) | |
441 | return -EFAULT; | |
442 | ||
443 | /* Create 2nd stage page table mapping - Level 3 */ | |
444 | old_pte = *pte; | |
445 | kvm_set_pte(pte, *new_pte); | |
446 | if (pte_present(old_pte)) | |
447 | kvm_tlb_flush_vmid(kvm); | |
448 | else | |
449 | get_page(virt_to_page(pte)); | |
450 | ||
451 | return 0; | |
452 | } | |
453 | ||
454 | /** | |
455 | * kvm_phys_addr_ioremap - map a device range to guest IPA | |
456 | * | |
457 | * @kvm: The KVM pointer | |
458 | * @guest_ipa: The IPA at which to insert the mapping | |
459 | * @pa: The physical address of the device | |
460 | * @size: The size of the mapping | |
461 | */ | |
462 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | |
463 | phys_addr_t pa, unsigned long size) | |
464 | { | |
465 | phys_addr_t addr, end; | |
466 | int ret = 0; | |
467 | unsigned long pfn; | |
468 | struct kvm_mmu_memory_cache cache = { 0, }; | |
469 | ||
470 | end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; | |
471 | pfn = __phys_to_pfn(pa); | |
472 | ||
473 | for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { | |
474 | pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR); | |
475 | ||
476 | ret = mmu_topup_memory_cache(&cache, 2, 2); | |
477 | if (ret) | |
478 | goto out; | |
479 | spin_lock(&kvm->mmu_lock); | |
480 | ret = stage2_set_pte(kvm, &cache, addr, &pte, true); | |
481 | spin_unlock(&kvm->mmu_lock); | |
482 | if (ret) | |
483 | goto out; | |
484 | ||
485 | pfn++; | |
486 | } | |
487 | ||
488 | out: | |
489 | mmu_free_memory_cache(&cache); | |
490 | return ret; | |
491 | } | |
492 | ||
94f8e641 CD |
493 | static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) |
494 | { | |
495 | /* | |
496 | * If we are going to insert an instruction page and the icache is | |
497 | * either VIPT or PIPT, there is a potential problem where the host | |
498 | * (or another VM) may have used the same page as this guest, and we | |
499 | * read incorrect data from the icache. If we're using a PIPT cache, | |
500 | * we can invalidate just that page, but if we are using a VIPT cache | |
501 | * we need to invalidate the entire icache - damn shame - as written | |
502 | * in the ARM ARM (DDI 0406C.b - Page B3-1393). | |
503 | * | |
504 | * VIVT caches are tagged using both the ASID and the VMID and doesn't | |
505 | * need any kind of flushing (DDI 0406C.b - Page B3-1392). | |
506 | */ | |
507 | if (icache_is_pipt()) { | |
508 | unsigned long hva = gfn_to_hva(kvm, gfn); | |
509 | __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); | |
510 | } else if (!icache_is_vivt_asid_tagged()) { | |
511 | /* any kind of VIPT cache */ | |
512 | __flush_icache_all(); | |
513 | } | |
514 | } | |
515 | ||
516 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |
517 | gfn_t gfn, struct kvm_memory_slot *memslot, | |
518 | unsigned long fault_status) | |
519 | { | |
520 | pte_t new_pte; | |
521 | pfn_t pfn; | |
522 | int ret; | |
523 | bool write_fault, writable; | |
524 | unsigned long mmu_seq; | |
525 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; | |
526 | ||
527 | write_fault = kvm_is_write_fault(vcpu->arch.hsr); | |
528 | if (fault_status == FSC_PERM && !write_fault) { | |
529 | kvm_err("Unexpected L2 read permission error\n"); | |
530 | return -EFAULT; | |
531 | } | |
532 | ||
533 | /* We need minimum second+third level pages */ | |
534 | ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); | |
535 | if (ret) | |
536 | return ret; | |
537 | ||
538 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | |
539 | /* | |
540 | * Ensure the read of mmu_notifier_seq happens before we call | |
541 | * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk | |
542 | * the page we just got a reference to gets unmapped before we have a | |
543 | * chance to grab the mmu_lock, which ensure that if the page gets | |
544 | * unmapped afterwards, the call to kvm_unmap_hva will take it away | |
545 | * from us again properly. This smp_rmb() interacts with the smp_wmb() | |
546 | * in kvm_mmu_notifier_invalidate_<page|range_end>. | |
547 | */ | |
548 | smp_rmb(); | |
549 | ||
550 | pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); | |
551 | if (is_error_pfn(pfn)) | |
552 | return -EFAULT; | |
553 | ||
554 | new_pte = pfn_pte(pfn, PAGE_S2); | |
555 | coherent_icache_guest_page(vcpu->kvm, gfn); | |
556 | ||
557 | spin_lock(&vcpu->kvm->mmu_lock); | |
558 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | |
559 | goto out_unlock; | |
560 | if (writable) { | |
561 | pte_val(new_pte) |= L_PTE_S2_RDWR; | |
562 | kvm_set_pfn_dirty(pfn); | |
563 | } | |
564 | stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); | |
565 | ||
566 | out_unlock: | |
567 | spin_unlock(&vcpu->kvm->mmu_lock); | |
568 | kvm_release_pfn_clean(pfn); | |
569 | return 0; | |
570 | } | |
571 | ||
572 | /** | |
573 | * kvm_handle_guest_abort - handles all 2nd stage aborts | |
574 | * @vcpu: the VCPU pointer | |
575 | * @run: the kvm_run structure | |
576 | * | |
577 | * Any abort that gets to the host is almost guaranteed to be caused by a | |
578 | * missing second stage translation table entry, which can mean that either the | |
579 | * guest simply needs more memory and we must allocate an appropriate page or it | |
580 | * can mean that the guest tried to access I/O memory, which is emulated by user | |
581 | * space. The distinction is based on the IPA causing the fault and whether this | |
582 | * memory region has been registered as standard RAM by user space. | |
583 | */ | |
342cd0ab CD |
584 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) |
585 | { | |
94f8e641 CD |
586 | unsigned long hsr_ec; |
587 | unsigned long fault_status; | |
588 | phys_addr_t fault_ipa; | |
589 | struct kvm_memory_slot *memslot; | |
590 | bool is_iabt; | |
591 | gfn_t gfn; | |
592 | int ret, idx; | |
593 | ||
594 | hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; | |
595 | is_iabt = (hsr_ec == HSR_EC_IABT); | |
596 | fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; | |
597 | ||
598 | trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr, | |
599 | vcpu->arch.hxfar, fault_ipa); | |
600 | ||
601 | /* Check the stage-2 fault is trans. fault or write fault */ | |
602 | fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE); | |
603 | if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { | |
604 | kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n", | |
605 | hsr_ec, fault_status); | |
606 | return -EFAULT; | |
607 | } | |
608 | ||
609 | idx = srcu_read_lock(&vcpu->kvm->srcu); | |
610 | ||
611 | gfn = fault_ipa >> PAGE_SHIFT; | |
612 | if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { | |
613 | if (is_iabt) { | |
614 | /* Prefetch Abort on I/O address */ | |
615 | kvm_inject_pabt(vcpu, vcpu->arch.hxfar); | |
616 | ret = 1; | |
617 | goto out_unlock; | |
618 | } | |
619 | ||
620 | if (fault_status != FSC_FAULT) { | |
621 | kvm_err("Unsupported fault status on io memory: %#lx\n", | |
622 | fault_status); | |
623 | ret = -EFAULT; | |
624 | goto out_unlock; | |
625 | } | |
626 | ||
627 | kvm_pr_unimpl("I/O address abort..."); | |
628 | ret = 0; | |
629 | goto out_unlock; | |
630 | } | |
631 | ||
632 | memslot = gfn_to_memslot(vcpu->kvm, gfn); | |
633 | if (!memslot->user_alloc) { | |
634 | kvm_err("non user-alloc memslots not supported\n"); | |
635 | ret = -EINVAL; | |
636 | goto out_unlock; | |
637 | } | |
638 | ||
639 | ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); | |
640 | if (ret == 0) | |
641 | ret = 1; | |
642 | out_unlock: | |
643 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | |
644 | return ret; | |
342cd0ab CD |
645 | } |
646 | ||
d5d8184d CD |
647 | static void handle_hva_to_gpa(struct kvm *kvm, |
648 | unsigned long start, | |
649 | unsigned long end, | |
650 | void (*handler)(struct kvm *kvm, | |
651 | gpa_t gpa, void *data), | |
652 | void *data) | |
653 | { | |
654 | struct kvm_memslots *slots; | |
655 | struct kvm_memory_slot *memslot; | |
656 | ||
657 | slots = kvm_memslots(kvm); | |
658 | ||
659 | /* we only care about the pages that the guest sees */ | |
660 | kvm_for_each_memslot(memslot, slots) { | |
661 | unsigned long hva_start, hva_end; | |
662 | gfn_t gfn, gfn_end; | |
663 | ||
664 | hva_start = max(start, memslot->userspace_addr); | |
665 | hva_end = min(end, memslot->userspace_addr + | |
666 | (memslot->npages << PAGE_SHIFT)); | |
667 | if (hva_start >= hva_end) | |
668 | continue; | |
669 | ||
670 | /* | |
671 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | |
672 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | |
673 | */ | |
674 | gfn = hva_to_gfn_memslot(hva_start, memslot); | |
675 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | |
676 | ||
677 | for (; gfn < gfn_end; ++gfn) { | |
678 | gpa_t gpa = gfn << PAGE_SHIFT; | |
679 | handler(kvm, gpa, data); | |
680 | } | |
681 | } | |
682 | } | |
683 | ||
684 | static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) | |
685 | { | |
686 | unmap_stage2_range(kvm, gpa, PAGE_SIZE); | |
687 | kvm_tlb_flush_vmid(kvm); | |
688 | } | |
689 | ||
690 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |
691 | { | |
692 | unsigned long end = hva + PAGE_SIZE; | |
693 | ||
694 | if (!kvm->arch.pgd) | |
695 | return 0; | |
696 | ||
697 | trace_kvm_unmap_hva(hva); | |
698 | handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); | |
699 | return 0; | |
700 | } | |
701 | ||
702 | int kvm_unmap_hva_range(struct kvm *kvm, | |
703 | unsigned long start, unsigned long end) | |
704 | { | |
705 | if (!kvm->arch.pgd) | |
706 | return 0; | |
707 | ||
708 | trace_kvm_unmap_hva_range(start, end); | |
709 | handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); | |
710 | return 0; | |
711 | } | |
712 | ||
713 | static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) | |
714 | { | |
715 | pte_t *pte = (pte_t *)data; | |
716 | ||
717 | stage2_set_pte(kvm, NULL, gpa, pte, false); | |
718 | } | |
719 | ||
720 | ||
721 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | |
722 | { | |
723 | unsigned long end = hva + PAGE_SIZE; | |
724 | pte_t stage2_pte; | |
725 | ||
726 | if (!kvm->arch.pgd) | |
727 | return; | |
728 | ||
729 | trace_kvm_set_spte_hva(hva); | |
730 | stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); | |
731 | handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); | |
732 | } | |
733 | ||
734 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |
735 | { | |
736 | mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); | |
737 | } | |
738 | ||
342cd0ab CD |
739 | phys_addr_t kvm_mmu_get_httbr(void) |
740 | { | |
741 | VM_BUG_ON(!virt_addr_valid(hyp_pgd)); | |
742 | return virt_to_phys(hyp_pgd); | |
743 | } | |
744 | ||
745 | int kvm_mmu_init(void) | |
746 | { | |
d5d8184d CD |
747 | if (!hyp_pgd) { |
748 | kvm_err("Hyp mode PGD not allocated\n"); | |
749 | return -ENOMEM; | |
750 | } | |
751 | ||
752 | return 0; | |
342cd0ab CD |
753 | } |
754 | ||
755 | /** | |
756 | * kvm_clear_idmap - remove all idmaps from the hyp pgd | |
757 | * | |
758 | * Free the underlying pmds for all pgds in range and clear the pgds (but | |
759 | * don't free them) afterwards. | |
760 | */ | |
761 | void kvm_clear_hyp_idmap(void) | |
762 | { | |
763 | unsigned long addr, end; | |
764 | unsigned long next; | |
765 | pgd_t *pgd = hyp_pgd; | |
766 | pud_t *pud; | |
767 | pmd_t *pmd; | |
768 | ||
769 | addr = virt_to_phys(__hyp_idmap_text_start); | |
770 | end = virt_to_phys(__hyp_idmap_text_end); | |
771 | ||
772 | pgd += pgd_index(addr); | |
773 | do { | |
774 | next = pgd_addr_end(addr, end); | |
775 | if (pgd_none_or_clear_bad(pgd)) | |
776 | continue; | |
777 | pud = pud_offset(pgd, addr); | |
778 | pmd = pmd_offset(pud, addr); | |
779 | ||
780 | pud_clear(pud); | |
781 | clean_pmd_entry(pmd); | |
782 | pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); | |
783 | } while (pgd++, addr = next, addr < end); | |
784 | } |