Commit | Line | Data |
---|---|---|
6aa8b732 AK |
1 | /* |
2 | * Kernel-based Virtual Machine driver for Linux | |
3 | * | |
4 | * This module enables machines with Intel VT-x extensions to run virtual | |
5 | * machines without emulation or binary translation. | |
6 | * | |
7 | * MMU support | |
8 | * | |
9 | * Copyright (C) 2006 Qumranet, Inc. | |
10 | * | |
11 | * Authors: | |
12 | * Yaniv Kamay <yaniv@qumranet.com> | |
13 | * Avi Kivity <avi@qumranet.com> | |
14 | * | |
15 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
16 | * the COPYING file in the top-level directory. | |
17 | * | |
18 | */ | |
e495606d AK |
19 | |
20 | #include "vmx.h" | |
21 | #include "kvm.h" | |
22 | ||
6aa8b732 AK |
23 | #include <linux/types.h> |
24 | #include <linux/string.h> | |
6aa8b732 AK |
25 | #include <linux/mm.h> |
26 | #include <linux/highmem.h> | |
27 | #include <linux/module.h> | |
28 | ||
e495606d AK |
29 | #include <asm/page.h> |
30 | #include <asm/cmpxchg.h> | |
6aa8b732 | 31 | |
37a7d8b0 AK |
32 | #undef MMU_DEBUG |
33 | ||
34 | #undef AUDIT | |
35 | ||
36 | #ifdef AUDIT | |
37 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg); | |
38 | #else | |
39 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | |
40 | #endif | |
41 | ||
42 | #ifdef MMU_DEBUG | |
43 | ||
44 | #define pgprintk(x...) do { if (dbg) printk(x); } while (0) | |
45 | #define rmap_printk(x...) do { if (dbg) printk(x); } while (0) | |
46 | ||
47 | #else | |
48 | ||
49 | #define pgprintk(x...) do { } while (0) | |
50 | #define rmap_printk(x...) do { } while (0) | |
51 | ||
52 | #endif | |
53 | ||
54 | #if defined(MMU_DEBUG) || defined(AUDIT) | |
55 | static int dbg = 1; | |
56 | #endif | |
6aa8b732 | 57 | |
d6c69ee9 YD |
58 | #ifndef MMU_DEBUG |
59 | #define ASSERT(x) do { } while (0) | |
60 | #else | |
6aa8b732 AK |
61 | #define ASSERT(x) \ |
62 | if (!(x)) { \ | |
63 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ | |
64 | __FILE__, __LINE__, #x); \ | |
65 | } | |
d6c69ee9 | 66 | #endif |
6aa8b732 | 67 | |
cea0f0e7 AK |
68 | #define PT64_PT_BITS 9 |
69 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | |
70 | #define PT32_PT_BITS 10 | |
71 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) | |
6aa8b732 AK |
72 | |
73 | #define PT_WRITABLE_SHIFT 1 | |
74 | ||
75 | #define PT_PRESENT_MASK (1ULL << 0) | |
76 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | |
77 | #define PT_USER_MASK (1ULL << 2) | |
78 | #define PT_PWT_MASK (1ULL << 3) | |
79 | #define PT_PCD_MASK (1ULL << 4) | |
80 | #define PT_ACCESSED_MASK (1ULL << 5) | |
81 | #define PT_DIRTY_MASK (1ULL << 6) | |
82 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | |
83 | #define PT_PAT_MASK (1ULL << 7) | |
84 | #define PT_GLOBAL_MASK (1ULL << 8) | |
85 | #define PT64_NX_MASK (1ULL << 63) | |
86 | ||
87 | #define PT_PAT_SHIFT 7 | |
88 | #define PT_DIR_PAT_SHIFT 12 | |
89 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | |
90 | ||
91 | #define PT32_DIR_PSE36_SIZE 4 | |
92 | #define PT32_DIR_PSE36_SHIFT 13 | |
93 | #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | |
94 | ||
95 | ||
6aa8b732 AK |
96 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
97 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | |
98 | ||
6aa8b732 AK |
99 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) |
100 | ||
6aa8b732 AK |
101 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
102 | ||
103 | #define PT64_LEVEL_BITS 9 | |
104 | ||
105 | #define PT64_LEVEL_SHIFT(level) \ | |
106 | ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS ) | |
107 | ||
108 | #define PT64_LEVEL_MASK(level) \ | |
109 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | |
110 | ||
111 | #define PT64_INDEX(address, level)\ | |
112 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | |
113 | ||
114 | ||
115 | #define PT32_LEVEL_BITS 10 | |
116 | ||
117 | #define PT32_LEVEL_SHIFT(level) \ | |
118 | ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS ) | |
119 | ||
120 | #define PT32_LEVEL_MASK(level) \ | |
121 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | |
122 | ||
123 | #define PT32_INDEX(address, level)\ | |
124 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | |
125 | ||
126 | ||
27aba766 | 127 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) |
6aa8b732 AK |
128 | #define PT64_DIR_BASE_ADDR_MASK \ |
129 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | |
130 | ||
131 | #define PT32_BASE_ADDR_MASK PAGE_MASK | |
132 | #define PT32_DIR_BASE_ADDR_MASK \ | |
133 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1)) | |
134 | ||
135 | ||
136 | #define PFERR_PRESENT_MASK (1U << 0) | |
137 | #define PFERR_WRITE_MASK (1U << 1) | |
138 | #define PFERR_USER_MASK (1U << 2) | |
73b1087e | 139 | #define PFERR_FETCH_MASK (1U << 4) |
6aa8b732 AK |
140 | |
141 | #define PT64_ROOT_LEVEL 4 | |
142 | #define PT32_ROOT_LEVEL 2 | |
143 | #define PT32E_ROOT_LEVEL 3 | |
144 | ||
145 | #define PT_DIRECTORY_LEVEL 2 | |
146 | #define PT_PAGE_TABLE_LEVEL 1 | |
147 | ||
cd4a4e53 AK |
148 | #define RMAP_EXT 4 |
149 | ||
150 | struct kvm_rmap_desc { | |
151 | u64 *shadow_ptes[RMAP_EXT]; | |
152 | struct kvm_rmap_desc *more; | |
153 | }; | |
154 | ||
b5a33a75 AK |
155 | static struct kmem_cache *pte_chain_cache; |
156 | static struct kmem_cache *rmap_desc_cache; | |
d3d25b04 | 157 | static struct kmem_cache *mmu_page_header_cache; |
b5a33a75 | 158 | |
6aa8b732 AK |
159 | static int is_write_protection(struct kvm_vcpu *vcpu) |
160 | { | |
707d92fa | 161 | return vcpu->cr0 & X86_CR0_WP; |
6aa8b732 AK |
162 | } |
163 | ||
164 | static int is_cpuid_PSE36(void) | |
165 | { | |
166 | return 1; | |
167 | } | |
168 | ||
73b1087e AK |
169 | static int is_nx(struct kvm_vcpu *vcpu) |
170 | { | |
171 | return vcpu->shadow_efer & EFER_NX; | |
172 | } | |
173 | ||
6aa8b732 AK |
174 | static int is_present_pte(unsigned long pte) |
175 | { | |
176 | return pte & PT_PRESENT_MASK; | |
177 | } | |
178 | ||
179 | static int is_writeble_pte(unsigned long pte) | |
180 | { | |
181 | return pte & PT_WRITABLE_MASK; | |
182 | } | |
183 | ||
184 | static int is_io_pte(unsigned long pte) | |
185 | { | |
186 | return pte & PT_SHADOW_IO_MARK; | |
187 | } | |
188 | ||
cd4a4e53 AK |
189 | static int is_rmap_pte(u64 pte) |
190 | { | |
191 | return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK)) | |
192 | == (PT_WRITABLE_MASK | PT_PRESENT_MASK); | |
193 | } | |
194 | ||
e663ee64 AK |
195 | static void set_shadow_pte(u64 *sptep, u64 spte) |
196 | { | |
197 | #ifdef CONFIG_X86_64 | |
198 | set_64bit((unsigned long *)sptep, spte); | |
199 | #else | |
200 | set_64bit((unsigned long long *)sptep, spte); | |
201 | #endif | |
202 | } | |
203 | ||
e2dec939 | 204 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
2e3e5882 | 205 | struct kmem_cache *base_cache, int min) |
714b93da AK |
206 | { |
207 | void *obj; | |
208 | ||
209 | if (cache->nobjs >= min) | |
e2dec939 | 210 | return 0; |
714b93da | 211 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
2e3e5882 | 212 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); |
714b93da | 213 | if (!obj) |
e2dec939 | 214 | return -ENOMEM; |
714b93da AK |
215 | cache->objects[cache->nobjs++] = obj; |
216 | } | |
e2dec939 | 217 | return 0; |
714b93da AK |
218 | } |
219 | ||
220 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | |
221 | { | |
222 | while (mc->nobjs) | |
223 | kfree(mc->objects[--mc->nobjs]); | |
224 | } | |
225 | ||
c1158e63 | 226 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
2e3e5882 | 227 | int min) |
c1158e63 AK |
228 | { |
229 | struct page *page; | |
230 | ||
231 | if (cache->nobjs >= min) | |
232 | return 0; | |
233 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | |
2e3e5882 | 234 | page = alloc_page(GFP_KERNEL); |
c1158e63 AK |
235 | if (!page) |
236 | return -ENOMEM; | |
237 | set_page_private(page, 0); | |
238 | cache->objects[cache->nobjs++] = page_address(page); | |
239 | } | |
240 | return 0; | |
241 | } | |
242 | ||
243 | static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) | |
244 | { | |
245 | while (mc->nobjs) | |
c4d198d5 | 246 | free_page((unsigned long)mc->objects[--mc->nobjs]); |
c1158e63 AK |
247 | } |
248 | ||
2e3e5882 | 249 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) |
714b93da | 250 | { |
e2dec939 AK |
251 | int r; |
252 | ||
2e3e5882 | 253 | kvm_mmu_free_some_pages(vcpu); |
e2dec939 | 254 | r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, |
2e3e5882 | 255 | pte_chain_cache, 4); |
e2dec939 AK |
256 | if (r) |
257 | goto out; | |
258 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, | |
2e3e5882 | 259 | rmap_desc_cache, 1); |
d3d25b04 AK |
260 | if (r) |
261 | goto out; | |
2e3e5882 | 262 | r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4); |
d3d25b04 AK |
263 | if (r) |
264 | goto out; | |
265 | r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, | |
2e3e5882 | 266 | mmu_page_header_cache, 4); |
e2dec939 AK |
267 | out: |
268 | return r; | |
714b93da AK |
269 | } |
270 | ||
271 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |
272 | { | |
273 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); | |
274 | mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); | |
c1158e63 | 275 | mmu_free_memory_cache_page(&vcpu->mmu_page_cache); |
d3d25b04 | 276 | mmu_free_memory_cache(&vcpu->mmu_page_header_cache); |
714b93da AK |
277 | } |
278 | ||
279 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | |
280 | size_t size) | |
281 | { | |
282 | void *p; | |
283 | ||
284 | BUG_ON(!mc->nobjs); | |
285 | p = mc->objects[--mc->nobjs]; | |
286 | memset(p, 0, size); | |
287 | return p; | |
288 | } | |
289 | ||
714b93da AK |
290 | static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) |
291 | { | |
292 | return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, | |
293 | sizeof(struct kvm_pte_chain)); | |
294 | } | |
295 | ||
90cb0529 | 296 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) |
714b93da | 297 | { |
90cb0529 | 298 | kfree(pc); |
714b93da AK |
299 | } |
300 | ||
301 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | |
302 | { | |
303 | return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache, | |
304 | sizeof(struct kvm_rmap_desc)); | |
305 | } | |
306 | ||
90cb0529 | 307 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) |
714b93da | 308 | { |
90cb0529 | 309 | kfree(rd); |
714b93da AK |
310 | } |
311 | ||
cd4a4e53 AK |
312 | /* |
313 | * Reverse mapping data structures: | |
314 | * | |
315 | * If page->private bit zero is zero, then page->private points to the | |
316 | * shadow page table entry that points to page_address(page). | |
317 | * | |
318 | * If page->private bit zero is one, (then page->private & ~1) points | |
319 | * to a struct kvm_rmap_desc containing more mappings. | |
320 | */ | |
714b93da | 321 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) |
cd4a4e53 AK |
322 | { |
323 | struct page *page; | |
324 | struct kvm_rmap_desc *desc; | |
325 | int i; | |
326 | ||
327 | if (!is_rmap_pte(*spte)) | |
328 | return; | |
329 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | |
5972e953 | 330 | if (!page_private(page)) { |
cd4a4e53 | 331 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
5972e953 MR |
332 | set_page_private(page,(unsigned long)spte); |
333 | } else if (!(page_private(page) & 1)) { | |
cd4a4e53 | 334 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); |
714b93da | 335 | desc = mmu_alloc_rmap_desc(vcpu); |
5972e953 | 336 | desc->shadow_ptes[0] = (u64 *)page_private(page); |
cd4a4e53 | 337 | desc->shadow_ptes[1] = spte; |
5972e953 | 338 | set_page_private(page,(unsigned long)desc | 1); |
cd4a4e53 AK |
339 | } else { |
340 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | |
5972e953 | 341 | desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); |
cd4a4e53 AK |
342 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) |
343 | desc = desc->more; | |
344 | if (desc->shadow_ptes[RMAP_EXT-1]) { | |
714b93da | 345 | desc->more = mmu_alloc_rmap_desc(vcpu); |
cd4a4e53 AK |
346 | desc = desc->more; |
347 | } | |
348 | for (i = 0; desc->shadow_ptes[i]; ++i) | |
349 | ; | |
350 | desc->shadow_ptes[i] = spte; | |
351 | } | |
352 | } | |
353 | ||
90cb0529 | 354 | static void rmap_desc_remove_entry(struct page *page, |
cd4a4e53 AK |
355 | struct kvm_rmap_desc *desc, |
356 | int i, | |
357 | struct kvm_rmap_desc *prev_desc) | |
358 | { | |
359 | int j; | |
360 | ||
361 | for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) | |
362 | ; | |
363 | desc->shadow_ptes[i] = desc->shadow_ptes[j]; | |
11718b4d | 364 | desc->shadow_ptes[j] = NULL; |
cd4a4e53 AK |
365 | if (j != 0) |
366 | return; | |
367 | if (!prev_desc && !desc->more) | |
5972e953 | 368 | set_page_private(page,(unsigned long)desc->shadow_ptes[0]); |
cd4a4e53 AK |
369 | else |
370 | if (prev_desc) | |
371 | prev_desc->more = desc->more; | |
372 | else | |
5972e953 | 373 | set_page_private(page,(unsigned long)desc->more | 1); |
90cb0529 | 374 | mmu_free_rmap_desc(desc); |
cd4a4e53 AK |
375 | } |
376 | ||
90cb0529 | 377 | static void rmap_remove(u64 *spte) |
cd4a4e53 AK |
378 | { |
379 | struct page *page; | |
380 | struct kvm_rmap_desc *desc; | |
381 | struct kvm_rmap_desc *prev_desc; | |
382 | int i; | |
383 | ||
384 | if (!is_rmap_pte(*spte)) | |
385 | return; | |
386 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | |
5972e953 | 387 | if (!page_private(page)) { |
cd4a4e53 AK |
388 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
389 | BUG(); | |
5972e953 | 390 | } else if (!(page_private(page) & 1)) { |
cd4a4e53 | 391 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); |
5972e953 | 392 | if ((u64 *)page_private(page) != spte) { |
cd4a4e53 AK |
393 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", |
394 | spte, *spte); | |
395 | BUG(); | |
396 | } | |
5972e953 | 397 | set_page_private(page,0); |
cd4a4e53 AK |
398 | } else { |
399 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); | |
5972e953 | 400 | desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); |
cd4a4e53 AK |
401 | prev_desc = NULL; |
402 | while (desc) { | |
403 | for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) | |
404 | if (desc->shadow_ptes[i] == spte) { | |
90cb0529 | 405 | rmap_desc_remove_entry(page, |
714b93da | 406 | desc, i, |
cd4a4e53 AK |
407 | prev_desc); |
408 | return; | |
409 | } | |
410 | prev_desc = desc; | |
411 | desc = desc->more; | |
412 | } | |
413 | BUG(); | |
414 | } | |
415 | } | |
416 | ||
714b93da | 417 | static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) |
374cbac0 | 418 | { |
714b93da | 419 | struct kvm *kvm = vcpu->kvm; |
374cbac0 | 420 | struct page *page; |
374cbac0 AK |
421 | struct kvm_rmap_desc *desc; |
422 | u64 *spte; | |
423 | ||
954bbbc2 AK |
424 | page = gfn_to_page(kvm, gfn); |
425 | BUG_ON(!page); | |
374cbac0 | 426 | |
5972e953 MR |
427 | while (page_private(page)) { |
428 | if (!(page_private(page) & 1)) | |
429 | spte = (u64 *)page_private(page); | |
374cbac0 | 430 | else { |
5972e953 | 431 | desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); |
374cbac0 AK |
432 | spte = desc->shadow_ptes[0]; |
433 | } | |
434 | BUG_ON(!spte); | |
27aba766 AK |
435 | BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT |
436 | != page_to_pfn(page)); | |
374cbac0 AK |
437 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
438 | BUG_ON(!(*spte & PT_WRITABLE_MASK)); | |
439 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | |
90cb0529 | 440 | rmap_remove(spte); |
e663ee64 | 441 | set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); |
88a97f0b | 442 | kvm_flush_remote_tlbs(vcpu->kvm); |
374cbac0 AK |
443 | } |
444 | } | |
445 | ||
d6c69ee9 | 446 | #ifdef MMU_DEBUG |
47ad8e68 | 447 | static int is_empty_shadow_page(u64 *spt) |
6aa8b732 | 448 | { |
139bdb2d AK |
449 | u64 *pos; |
450 | u64 *end; | |
451 | ||
47ad8e68 | 452 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
139bdb2d AK |
453 | if (*pos != 0) { |
454 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | |
455 | pos, *pos); | |
6aa8b732 | 456 | return 0; |
139bdb2d | 457 | } |
6aa8b732 AK |
458 | return 1; |
459 | } | |
d6c69ee9 | 460 | #endif |
6aa8b732 | 461 | |
90cb0529 | 462 | static void kvm_mmu_free_page(struct kvm *kvm, |
4b02d6da | 463 | struct kvm_mmu_page *page_head) |
260746c0 | 464 | { |
47ad8e68 | 465 | ASSERT(is_empty_shadow_page(page_head->spt)); |
d3d25b04 | 466 | list_del(&page_head->link); |
c1158e63 | 467 | __free_page(virt_to_page(page_head->spt)); |
90cb0529 AK |
468 | kfree(page_head); |
469 | ++kvm->n_free_mmu_pages; | |
260746c0 AK |
470 | } |
471 | ||
cea0f0e7 AK |
472 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
473 | { | |
474 | return gfn; | |
475 | } | |
476 | ||
25c0de2c AK |
477 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
478 | u64 *parent_pte) | |
6aa8b732 AK |
479 | { |
480 | struct kvm_mmu_page *page; | |
481 | ||
d3d25b04 | 482 | if (!vcpu->kvm->n_free_mmu_pages) |
25c0de2c | 483 | return NULL; |
6aa8b732 | 484 | |
d3d25b04 AK |
485 | page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, |
486 | sizeof *page); | |
487 | page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); | |
488 | set_page_private(virt_to_page(page->spt), (unsigned long)page); | |
489 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | |
47ad8e68 | 490 | ASSERT(is_empty_shadow_page(page->spt)); |
6aa8b732 | 491 | page->slot_bitmap = 0; |
cea0f0e7 | 492 | page->multimapped = 0; |
6aa8b732 | 493 | page->parent_pte = parent_pte; |
ebeace86 | 494 | --vcpu->kvm->n_free_mmu_pages; |
25c0de2c | 495 | return page; |
6aa8b732 AK |
496 | } |
497 | ||
714b93da AK |
498 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, |
499 | struct kvm_mmu_page *page, u64 *parent_pte) | |
cea0f0e7 AK |
500 | { |
501 | struct kvm_pte_chain *pte_chain; | |
502 | struct hlist_node *node; | |
503 | int i; | |
504 | ||
505 | if (!parent_pte) | |
506 | return; | |
507 | if (!page->multimapped) { | |
508 | u64 *old = page->parent_pte; | |
509 | ||
510 | if (!old) { | |
511 | page->parent_pte = parent_pte; | |
512 | return; | |
513 | } | |
514 | page->multimapped = 1; | |
714b93da | 515 | pte_chain = mmu_alloc_pte_chain(vcpu); |
cea0f0e7 AK |
516 | INIT_HLIST_HEAD(&page->parent_ptes); |
517 | hlist_add_head(&pte_chain->link, &page->parent_ptes); | |
518 | pte_chain->parent_ptes[0] = old; | |
519 | } | |
520 | hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) { | |
521 | if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) | |
522 | continue; | |
523 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) | |
524 | if (!pte_chain->parent_ptes[i]) { | |
525 | pte_chain->parent_ptes[i] = parent_pte; | |
526 | return; | |
527 | } | |
528 | } | |
714b93da | 529 | pte_chain = mmu_alloc_pte_chain(vcpu); |
cea0f0e7 AK |
530 | BUG_ON(!pte_chain); |
531 | hlist_add_head(&pte_chain->link, &page->parent_ptes); | |
532 | pte_chain->parent_ptes[0] = parent_pte; | |
533 | } | |
534 | ||
90cb0529 | 535 | static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page, |
cea0f0e7 AK |
536 | u64 *parent_pte) |
537 | { | |
538 | struct kvm_pte_chain *pte_chain; | |
539 | struct hlist_node *node; | |
540 | int i; | |
541 | ||
542 | if (!page->multimapped) { | |
543 | BUG_ON(page->parent_pte != parent_pte); | |
544 | page->parent_pte = NULL; | |
545 | return; | |
546 | } | |
547 | hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) | |
548 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | |
549 | if (!pte_chain->parent_ptes[i]) | |
550 | break; | |
551 | if (pte_chain->parent_ptes[i] != parent_pte) | |
552 | continue; | |
697fe2e2 AK |
553 | while (i + 1 < NR_PTE_CHAIN_ENTRIES |
554 | && pte_chain->parent_ptes[i + 1]) { | |
cea0f0e7 AK |
555 | pte_chain->parent_ptes[i] |
556 | = pte_chain->parent_ptes[i + 1]; | |
557 | ++i; | |
558 | } | |
559 | pte_chain->parent_ptes[i] = NULL; | |
697fe2e2 AK |
560 | if (i == 0) { |
561 | hlist_del(&pte_chain->link); | |
90cb0529 | 562 | mmu_free_pte_chain(pte_chain); |
697fe2e2 AK |
563 | if (hlist_empty(&page->parent_ptes)) { |
564 | page->multimapped = 0; | |
565 | page->parent_pte = NULL; | |
566 | } | |
567 | } | |
cea0f0e7 AK |
568 | return; |
569 | } | |
570 | BUG(); | |
571 | } | |
572 | ||
573 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu, | |
574 | gfn_t gfn) | |
575 | { | |
576 | unsigned index; | |
577 | struct hlist_head *bucket; | |
578 | struct kvm_mmu_page *page; | |
579 | struct hlist_node *node; | |
580 | ||
581 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | |
582 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | |
583 | bucket = &vcpu->kvm->mmu_page_hash[index]; | |
584 | hlist_for_each_entry(page, node, bucket, hash_link) | |
585 | if (page->gfn == gfn && !page->role.metaphysical) { | |
586 | pgprintk("%s: found role %x\n", | |
587 | __FUNCTION__, page->role.word); | |
588 | return page; | |
589 | } | |
590 | return NULL; | |
591 | } | |
592 | ||
593 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |
594 | gfn_t gfn, | |
595 | gva_t gaddr, | |
596 | unsigned level, | |
597 | int metaphysical, | |
d28c6cfb | 598 | unsigned hugepage_access, |
cea0f0e7 AK |
599 | u64 *parent_pte) |
600 | { | |
601 | union kvm_mmu_page_role role; | |
602 | unsigned index; | |
603 | unsigned quadrant; | |
604 | struct hlist_head *bucket; | |
605 | struct kvm_mmu_page *page; | |
606 | struct hlist_node *node; | |
607 | ||
608 | role.word = 0; | |
609 | role.glevels = vcpu->mmu.root_level; | |
610 | role.level = level; | |
611 | role.metaphysical = metaphysical; | |
d28c6cfb | 612 | role.hugepage_access = hugepage_access; |
cea0f0e7 AK |
613 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { |
614 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | |
615 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | |
616 | role.quadrant = quadrant; | |
617 | } | |
618 | pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, | |
619 | gfn, role.word); | |
620 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | |
621 | bucket = &vcpu->kvm->mmu_page_hash[index]; | |
622 | hlist_for_each_entry(page, node, bucket, hash_link) | |
623 | if (page->gfn == gfn && page->role.word == role.word) { | |
714b93da | 624 | mmu_page_add_parent_pte(vcpu, page, parent_pte); |
cea0f0e7 AK |
625 | pgprintk("%s: found\n", __FUNCTION__); |
626 | return page; | |
627 | } | |
628 | page = kvm_mmu_alloc_page(vcpu, parent_pte); | |
629 | if (!page) | |
630 | return page; | |
631 | pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); | |
632 | page->gfn = gfn; | |
633 | page->role = role; | |
634 | hlist_add_head(&page->hash_link, bucket); | |
374cbac0 | 635 | if (!metaphysical) |
714b93da | 636 | rmap_write_protect(vcpu, gfn); |
cea0f0e7 AK |
637 | return page; |
638 | } | |
639 | ||
90cb0529 | 640 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
a436036b AK |
641 | struct kvm_mmu_page *page) |
642 | { | |
697fe2e2 AK |
643 | unsigned i; |
644 | u64 *pt; | |
645 | u64 ent; | |
646 | ||
47ad8e68 | 647 | pt = page->spt; |
697fe2e2 AK |
648 | |
649 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { | |
650 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | |
651 | if (pt[i] & PT_PRESENT_MASK) | |
90cb0529 | 652 | rmap_remove(&pt[i]); |
697fe2e2 AK |
653 | pt[i] = 0; |
654 | } | |
90cb0529 | 655 | kvm_flush_remote_tlbs(kvm); |
697fe2e2 AK |
656 | return; |
657 | } | |
658 | ||
659 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | |
660 | ent = pt[i]; | |
661 | ||
662 | pt[i] = 0; | |
663 | if (!(ent & PT_PRESENT_MASK)) | |
664 | continue; | |
665 | ent &= PT64_BASE_ADDR_MASK; | |
90cb0529 | 666 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); |
697fe2e2 | 667 | } |
90cb0529 | 668 | kvm_flush_remote_tlbs(kvm); |
a436036b AK |
669 | } |
670 | ||
90cb0529 | 671 | static void kvm_mmu_put_page(struct kvm_mmu_page *page, |
cea0f0e7 AK |
672 | u64 *parent_pte) |
673 | { | |
90cb0529 | 674 | mmu_page_remove_parent_pte(page, parent_pte); |
a436036b AK |
675 | } |
676 | ||
90cb0529 | 677 | static void kvm_mmu_zap_page(struct kvm *kvm, |
a436036b AK |
678 | struct kvm_mmu_page *page) |
679 | { | |
680 | u64 *parent_pte; | |
681 | ||
682 | while (page->multimapped || page->parent_pte) { | |
683 | if (!page->multimapped) | |
684 | parent_pte = page->parent_pte; | |
685 | else { | |
686 | struct kvm_pte_chain *chain; | |
687 | ||
688 | chain = container_of(page->parent_ptes.first, | |
689 | struct kvm_pte_chain, link); | |
690 | parent_pte = chain->parent_ptes[0]; | |
691 | } | |
697fe2e2 | 692 | BUG_ON(!parent_pte); |
90cb0529 | 693 | kvm_mmu_put_page(page, parent_pte); |
e663ee64 | 694 | set_shadow_pte(parent_pte, 0); |
a436036b | 695 | } |
90cb0529 | 696 | kvm_mmu_page_unlink_children(kvm, page); |
3bb65a22 AK |
697 | if (!page->root_count) { |
698 | hlist_del(&page->hash_link); | |
90cb0529 | 699 | kvm_mmu_free_page(kvm, page); |
36868f7b | 700 | } else |
90cb0529 | 701 | list_move(&page->link, &kvm->active_mmu_pages); |
a436036b AK |
702 | } |
703 | ||
704 | static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) | |
705 | { | |
706 | unsigned index; | |
707 | struct hlist_head *bucket; | |
708 | struct kvm_mmu_page *page; | |
709 | struct hlist_node *node, *n; | |
710 | int r; | |
711 | ||
712 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | |
713 | r = 0; | |
714 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | |
715 | bucket = &vcpu->kvm->mmu_page_hash[index]; | |
716 | hlist_for_each_entry_safe(page, node, n, bucket, hash_link) | |
717 | if (page->gfn == gfn && !page->role.metaphysical) { | |
697fe2e2 AK |
718 | pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, |
719 | page->role.word); | |
90cb0529 | 720 | kvm_mmu_zap_page(vcpu->kvm, page); |
a436036b AK |
721 | r = 1; |
722 | } | |
723 | return r; | |
cea0f0e7 AK |
724 | } |
725 | ||
97a0a01e AK |
726 | static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) |
727 | { | |
728 | struct kvm_mmu_page *page; | |
729 | ||
730 | while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | |
731 | pgprintk("%s: zap %lx %x\n", | |
732 | __FUNCTION__, gfn, page->role.word); | |
90cb0529 | 733 | kvm_mmu_zap_page(vcpu->kvm, page); |
97a0a01e AK |
734 | } |
735 | } | |
736 | ||
6aa8b732 AK |
737 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) |
738 | { | |
739 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | |
740 | struct kvm_mmu_page *page_head = page_header(__pa(pte)); | |
741 | ||
742 | __set_bit(slot, &page_head->slot_bitmap); | |
743 | } | |
744 | ||
745 | hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | |
746 | { | |
747 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); | |
748 | ||
749 | return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa; | |
750 | } | |
751 | ||
752 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | |
753 | { | |
6aa8b732 AK |
754 | struct page *page; |
755 | ||
756 | ASSERT((gpa & HPA_ERR_MASK) == 0); | |
954bbbc2 AK |
757 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
758 | if (!page) | |
6aa8b732 | 759 | return gpa | HPA_ERR_MASK; |
6aa8b732 AK |
760 | return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) |
761 | | (gpa & (PAGE_SIZE-1)); | |
762 | } | |
763 | ||
764 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) | |
765 | { | |
766 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | |
767 | ||
768 | if (gpa == UNMAPPED_GVA) | |
769 | return UNMAPPED_GVA; | |
770 | return gpa_to_hpa(vcpu, gpa); | |
771 | } | |
772 | ||
039576c0 AK |
773 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) |
774 | { | |
775 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | |
776 | ||
777 | if (gpa == UNMAPPED_GVA) | |
778 | return NULL; | |
779 | return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); | |
780 | } | |
781 | ||
6aa8b732 AK |
782 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
783 | { | |
784 | } | |
785 | ||
786 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |
787 | { | |
788 | int level = PT32E_ROOT_LEVEL; | |
789 | hpa_t table_addr = vcpu->mmu.root_hpa; | |
790 | ||
791 | for (; ; level--) { | |
792 | u32 index = PT64_INDEX(v, level); | |
793 | u64 *table; | |
cea0f0e7 | 794 | u64 pte; |
6aa8b732 AK |
795 | |
796 | ASSERT(VALID_PAGE(table_addr)); | |
797 | table = __va(table_addr); | |
798 | ||
799 | if (level == 1) { | |
cea0f0e7 AK |
800 | pte = table[index]; |
801 | if (is_present_pte(pte) && is_writeble_pte(pte)) | |
802 | return 0; | |
6aa8b732 AK |
803 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); |
804 | page_header_update_slot(vcpu->kvm, table, v); | |
805 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | |
806 | PT_USER_MASK; | |
714b93da | 807 | rmap_add(vcpu, &table[index]); |
6aa8b732 AK |
808 | return 0; |
809 | } | |
810 | ||
811 | if (table[index] == 0) { | |
25c0de2c | 812 | struct kvm_mmu_page *new_table; |
cea0f0e7 | 813 | gfn_t pseudo_gfn; |
6aa8b732 | 814 | |
cea0f0e7 AK |
815 | pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK) |
816 | >> PAGE_SHIFT; | |
817 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | |
818 | v, level - 1, | |
d28c6cfb | 819 | 1, 0, &table[index]); |
25c0de2c | 820 | if (!new_table) { |
6aa8b732 AK |
821 | pgprintk("nonpaging_map: ENOMEM\n"); |
822 | return -ENOMEM; | |
823 | } | |
824 | ||
47ad8e68 | 825 | table[index] = __pa(new_table->spt) | PT_PRESENT_MASK |
25c0de2c | 826 | | PT_WRITABLE_MASK | PT_USER_MASK; |
6aa8b732 AK |
827 | } |
828 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | |
829 | } | |
830 | } | |
831 | ||
17ac10ad AK |
832 | static void mmu_free_roots(struct kvm_vcpu *vcpu) |
833 | { | |
834 | int i; | |
3bb65a22 | 835 | struct kvm_mmu_page *page; |
17ac10ad | 836 | |
7b53aa56 AK |
837 | if (!VALID_PAGE(vcpu->mmu.root_hpa)) |
838 | return; | |
17ac10ad AK |
839 | #ifdef CONFIG_X86_64 |
840 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { | |
841 | hpa_t root = vcpu->mmu.root_hpa; | |
842 | ||
3bb65a22 AK |
843 | page = page_header(root); |
844 | --page->root_count; | |
17ac10ad AK |
845 | vcpu->mmu.root_hpa = INVALID_PAGE; |
846 | return; | |
847 | } | |
848 | #endif | |
849 | for (i = 0; i < 4; ++i) { | |
850 | hpa_t root = vcpu->mmu.pae_root[i]; | |
851 | ||
417726a3 | 852 | if (root) { |
417726a3 AK |
853 | root &= PT64_BASE_ADDR_MASK; |
854 | page = page_header(root); | |
855 | --page->root_count; | |
856 | } | |
17ac10ad AK |
857 | vcpu->mmu.pae_root[i] = INVALID_PAGE; |
858 | } | |
859 | vcpu->mmu.root_hpa = INVALID_PAGE; | |
860 | } | |
861 | ||
862 | static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |
863 | { | |
864 | int i; | |
cea0f0e7 | 865 | gfn_t root_gfn; |
3bb65a22 AK |
866 | struct kvm_mmu_page *page; |
867 | ||
cea0f0e7 | 868 | root_gfn = vcpu->cr3 >> PAGE_SHIFT; |
17ac10ad AK |
869 | |
870 | #ifdef CONFIG_X86_64 | |
871 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { | |
872 | hpa_t root = vcpu->mmu.root_hpa; | |
873 | ||
874 | ASSERT(!VALID_PAGE(root)); | |
68a99f6d | 875 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, |
d28c6cfb | 876 | PT64_ROOT_LEVEL, 0, 0, NULL); |
47ad8e68 | 877 | root = __pa(page->spt); |
3bb65a22 | 878 | ++page->root_count; |
17ac10ad AK |
879 | vcpu->mmu.root_hpa = root; |
880 | return; | |
881 | } | |
882 | #endif | |
883 | for (i = 0; i < 4; ++i) { | |
884 | hpa_t root = vcpu->mmu.pae_root[i]; | |
885 | ||
886 | ASSERT(!VALID_PAGE(root)); | |
417726a3 AK |
887 | if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) { |
888 | if (!is_present_pte(vcpu->pdptrs[i])) { | |
889 | vcpu->mmu.pae_root[i] = 0; | |
890 | continue; | |
891 | } | |
cea0f0e7 | 892 | root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; |
417726a3 | 893 | } else if (vcpu->mmu.root_level == 0) |
cea0f0e7 | 894 | root_gfn = 0; |
68a99f6d | 895 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
cea0f0e7 | 896 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
d28c6cfb | 897 | 0, NULL); |
47ad8e68 | 898 | root = __pa(page->spt); |
3bb65a22 | 899 | ++page->root_count; |
17ac10ad AK |
900 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; |
901 | } | |
902 | vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); | |
903 | } | |
904 | ||
6aa8b732 AK |
905 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
906 | { | |
907 | return vaddr; | |
908 | } | |
909 | ||
910 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |
911 | u32 error_code) | |
912 | { | |
6aa8b732 | 913 | gpa_t addr = gva; |
ebeace86 | 914 | hpa_t paddr; |
e2dec939 | 915 | int r; |
6aa8b732 | 916 | |
e2dec939 AK |
917 | r = mmu_topup_memory_caches(vcpu); |
918 | if (r) | |
919 | return r; | |
714b93da | 920 | |
6aa8b732 AK |
921 | ASSERT(vcpu); |
922 | ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); | |
923 | ||
6aa8b732 | 924 | |
ebeace86 | 925 | paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); |
6aa8b732 | 926 | |
ebeace86 AK |
927 | if (is_error_hpa(paddr)) |
928 | return 1; | |
6aa8b732 | 929 | |
ebeace86 | 930 | return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); |
6aa8b732 AK |
931 | } |
932 | ||
6aa8b732 AK |
933 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
934 | { | |
17ac10ad | 935 | mmu_free_roots(vcpu); |
6aa8b732 AK |
936 | } |
937 | ||
938 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |
939 | { | |
940 | struct kvm_mmu *context = &vcpu->mmu; | |
941 | ||
942 | context->new_cr3 = nonpaging_new_cr3; | |
943 | context->page_fault = nonpaging_page_fault; | |
6aa8b732 AK |
944 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
945 | context->free = nonpaging_free; | |
cea0f0e7 | 946 | context->root_level = 0; |
6aa8b732 | 947 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
17c3ba9d | 948 | context->root_hpa = INVALID_PAGE; |
6aa8b732 AK |
949 | return 0; |
950 | } | |
951 | ||
6aa8b732 AK |
952 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
953 | { | |
1165f5fe | 954 | ++vcpu->stat.tlb_flush; |
cbdd1bea | 955 | kvm_x86_ops->tlb_flush(vcpu); |
6aa8b732 AK |
956 | } |
957 | ||
958 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | |
959 | { | |
374cbac0 | 960 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); |
cea0f0e7 | 961 | mmu_free_roots(vcpu); |
6aa8b732 AK |
962 | } |
963 | ||
6aa8b732 AK |
964 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
965 | u64 addr, | |
966 | u32 err_code) | |
967 | { | |
cbdd1bea | 968 | kvm_x86_ops->inject_page_fault(vcpu, addr, err_code); |
6aa8b732 AK |
969 | } |
970 | ||
6aa8b732 AK |
971 | static void paging_free(struct kvm_vcpu *vcpu) |
972 | { | |
973 | nonpaging_free(vcpu); | |
974 | } | |
975 | ||
976 | #define PTTYPE 64 | |
977 | #include "paging_tmpl.h" | |
978 | #undef PTTYPE | |
979 | ||
980 | #define PTTYPE 32 | |
981 | #include "paging_tmpl.h" | |
982 | #undef PTTYPE | |
983 | ||
17ac10ad | 984 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) |
6aa8b732 AK |
985 | { |
986 | struct kvm_mmu *context = &vcpu->mmu; | |
987 | ||
988 | ASSERT(is_pae(vcpu)); | |
989 | context->new_cr3 = paging_new_cr3; | |
990 | context->page_fault = paging64_page_fault; | |
6aa8b732 AK |
991 | context->gva_to_gpa = paging64_gva_to_gpa; |
992 | context->free = paging_free; | |
17ac10ad AK |
993 | context->root_level = level; |
994 | context->shadow_root_level = level; | |
17c3ba9d | 995 | context->root_hpa = INVALID_PAGE; |
6aa8b732 AK |
996 | return 0; |
997 | } | |
998 | ||
17ac10ad AK |
999 | static int paging64_init_context(struct kvm_vcpu *vcpu) |
1000 | { | |
1001 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); | |
1002 | } | |
1003 | ||
6aa8b732 AK |
1004 | static int paging32_init_context(struct kvm_vcpu *vcpu) |
1005 | { | |
1006 | struct kvm_mmu *context = &vcpu->mmu; | |
1007 | ||
1008 | context->new_cr3 = paging_new_cr3; | |
1009 | context->page_fault = paging32_page_fault; | |
6aa8b732 AK |
1010 | context->gva_to_gpa = paging32_gva_to_gpa; |
1011 | context->free = paging_free; | |
1012 | context->root_level = PT32_ROOT_LEVEL; | |
1013 | context->shadow_root_level = PT32E_ROOT_LEVEL; | |
17c3ba9d | 1014 | context->root_hpa = INVALID_PAGE; |
6aa8b732 AK |
1015 | return 0; |
1016 | } | |
1017 | ||
1018 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | |
1019 | { | |
17ac10ad | 1020 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
6aa8b732 AK |
1021 | } |
1022 | ||
1023 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |
1024 | { | |
1025 | ASSERT(vcpu); | |
1026 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | |
1027 | ||
1028 | if (!is_paging(vcpu)) | |
1029 | return nonpaging_init_context(vcpu); | |
a9058ecd | 1030 | else if (is_long_mode(vcpu)) |
6aa8b732 AK |
1031 | return paging64_init_context(vcpu); |
1032 | else if (is_pae(vcpu)) | |
1033 | return paging32E_init_context(vcpu); | |
1034 | else | |
1035 | return paging32_init_context(vcpu); | |
1036 | } | |
1037 | ||
1038 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | |
1039 | { | |
1040 | ASSERT(vcpu); | |
1041 | if (VALID_PAGE(vcpu->mmu.root_hpa)) { | |
1042 | vcpu->mmu.free(vcpu); | |
1043 | vcpu->mmu.root_hpa = INVALID_PAGE; | |
1044 | } | |
1045 | } | |
1046 | ||
1047 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | |
17c3ba9d AK |
1048 | { |
1049 | destroy_kvm_mmu(vcpu); | |
1050 | return init_kvm_mmu(vcpu); | |
1051 | } | |
8668a3c4 | 1052 | EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); |
17c3ba9d AK |
1053 | |
1054 | int kvm_mmu_load(struct kvm_vcpu *vcpu) | |
6aa8b732 | 1055 | { |
714b93da AK |
1056 | int r; |
1057 | ||
11ec2804 | 1058 | mutex_lock(&vcpu->kvm->lock); |
e2dec939 | 1059 | r = mmu_topup_memory_caches(vcpu); |
17c3ba9d AK |
1060 | if (r) |
1061 | goto out; | |
1062 | mmu_alloc_roots(vcpu); | |
cbdd1bea | 1063 | kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); |
17c3ba9d | 1064 | kvm_mmu_flush_tlb(vcpu); |
714b93da | 1065 | out: |
11ec2804 | 1066 | mutex_unlock(&vcpu->kvm->lock); |
714b93da | 1067 | return r; |
6aa8b732 | 1068 | } |
17c3ba9d AK |
1069 | EXPORT_SYMBOL_GPL(kvm_mmu_load); |
1070 | ||
1071 | void kvm_mmu_unload(struct kvm_vcpu *vcpu) | |
1072 | { | |
1073 | mmu_free_roots(vcpu); | |
1074 | } | |
6aa8b732 | 1075 | |
09072daf | 1076 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, |
ac1b714e AK |
1077 | struct kvm_mmu_page *page, |
1078 | u64 *spte) | |
1079 | { | |
1080 | u64 pte; | |
1081 | struct kvm_mmu_page *child; | |
1082 | ||
1083 | pte = *spte; | |
1084 | if (is_present_pte(pte)) { | |
1085 | if (page->role.level == PT_PAGE_TABLE_LEVEL) | |
90cb0529 | 1086 | rmap_remove(spte); |
ac1b714e AK |
1087 | else { |
1088 | child = page_header(pte & PT64_BASE_ADDR_MASK); | |
90cb0529 | 1089 | mmu_page_remove_parent_pte(child, spte); |
ac1b714e AK |
1090 | } |
1091 | } | |
7f2145ad | 1092 | set_shadow_pte(spte, 0); |
d9e368d6 | 1093 | kvm_flush_remote_tlbs(vcpu->kvm); |
ac1b714e AK |
1094 | } |
1095 | ||
0028425f AK |
1096 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
1097 | struct kvm_mmu_page *page, | |
1098 | u64 *spte, | |
1099 | const void *new, int bytes) | |
1100 | { | |
1101 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | |
1102 | return; | |
1103 | ||
1104 | if (page->role.glevels == PT32_ROOT_LEVEL) | |
1105 | paging32_update_pte(vcpu, page, spte, new, bytes); | |
1106 | else | |
1107 | paging64_update_pte(vcpu, page, spte, new, bytes); | |
1108 | } | |
1109 | ||
09072daf | 1110 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
fe551881 | 1111 | const u8 *new, int bytes) |
da4a00f0 | 1112 | { |
9b7a0325 AK |
1113 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1114 | struct kvm_mmu_page *page; | |
0e7bc4b9 | 1115 | struct hlist_node *node, *n; |
9b7a0325 AK |
1116 | struct hlist_head *bucket; |
1117 | unsigned index; | |
1118 | u64 *spte; | |
9b7a0325 | 1119 | unsigned offset = offset_in_page(gpa); |
0e7bc4b9 | 1120 | unsigned pte_size; |
9b7a0325 | 1121 | unsigned page_offset; |
0e7bc4b9 | 1122 | unsigned misaligned; |
fce0657f | 1123 | unsigned quadrant; |
9b7a0325 | 1124 | int level; |
86a5ba02 | 1125 | int flooded = 0; |
ac1b714e | 1126 | int npte; |
9b7a0325 | 1127 | |
da4a00f0 | 1128 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); |
86a5ba02 AK |
1129 | if (gfn == vcpu->last_pt_write_gfn) { |
1130 | ++vcpu->last_pt_write_count; | |
1131 | if (vcpu->last_pt_write_count >= 3) | |
1132 | flooded = 1; | |
1133 | } else { | |
1134 | vcpu->last_pt_write_gfn = gfn; | |
1135 | vcpu->last_pt_write_count = 1; | |
1136 | } | |
9b7a0325 AK |
1137 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; |
1138 | bucket = &vcpu->kvm->mmu_page_hash[index]; | |
0e7bc4b9 | 1139 | hlist_for_each_entry_safe(page, node, n, bucket, hash_link) { |
9b7a0325 AK |
1140 | if (page->gfn != gfn || page->role.metaphysical) |
1141 | continue; | |
0e7bc4b9 AK |
1142 | pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; |
1143 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | |
e925c5ba | 1144 | misaligned |= bytes < 4; |
86a5ba02 | 1145 | if (misaligned || flooded) { |
0e7bc4b9 AK |
1146 | /* |
1147 | * Misaligned accesses are too much trouble to fix | |
1148 | * up; also, they usually indicate a page is not used | |
1149 | * as a page table. | |
86a5ba02 AK |
1150 | * |
1151 | * If we're seeing too many writes to a page, | |
1152 | * it may no longer be a page table, or we may be | |
1153 | * forking, in which case it is better to unmap the | |
1154 | * page. | |
0e7bc4b9 AK |
1155 | */ |
1156 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | |
1157 | gpa, bytes, page->role.word); | |
90cb0529 | 1158 | kvm_mmu_zap_page(vcpu->kvm, page); |
0e7bc4b9 AK |
1159 | continue; |
1160 | } | |
9b7a0325 AK |
1161 | page_offset = offset; |
1162 | level = page->role.level; | |
ac1b714e | 1163 | npte = 1; |
9b7a0325 | 1164 | if (page->role.glevels == PT32_ROOT_LEVEL) { |
ac1b714e AK |
1165 | page_offset <<= 1; /* 32->64 */ |
1166 | /* | |
1167 | * A 32-bit pde maps 4MB while the shadow pdes map | |
1168 | * only 2MB. So we need to double the offset again | |
1169 | * and zap two pdes instead of one. | |
1170 | */ | |
1171 | if (level == PT32_ROOT_LEVEL) { | |
6b8d0f9b | 1172 | page_offset &= ~7; /* kill rounding error */ |
ac1b714e AK |
1173 | page_offset <<= 1; |
1174 | npte = 2; | |
1175 | } | |
fce0657f | 1176 | quadrant = page_offset >> PAGE_SHIFT; |
9b7a0325 | 1177 | page_offset &= ~PAGE_MASK; |
fce0657f AK |
1178 | if (quadrant != page->role.quadrant) |
1179 | continue; | |
9b7a0325 | 1180 | } |
47ad8e68 | 1181 | spte = &page->spt[page_offset / sizeof(*spte)]; |
ac1b714e | 1182 | while (npte--) { |
09072daf | 1183 | mmu_pte_write_zap_pte(vcpu, page, spte); |
0028425f | 1184 | mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); |
ac1b714e | 1185 | ++spte; |
9b7a0325 | 1186 | } |
9b7a0325 | 1187 | } |
da4a00f0 AK |
1188 | } |
1189 | ||
a436036b AK |
1190 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
1191 | { | |
1192 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | |
1193 | ||
1194 | return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT); | |
1195 | } | |
1196 | ||
22d95b12 | 1197 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
ebeace86 AK |
1198 | { |
1199 | while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) { | |
1200 | struct kvm_mmu_page *page; | |
1201 | ||
1202 | page = container_of(vcpu->kvm->active_mmu_pages.prev, | |
1203 | struct kvm_mmu_page, link); | |
90cb0529 | 1204 | kvm_mmu_zap_page(vcpu->kvm, page); |
ebeace86 AK |
1205 | } |
1206 | } | |
ebeace86 | 1207 | |
6aa8b732 AK |
1208 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
1209 | { | |
f51234c2 | 1210 | struct kvm_mmu_page *page; |
6aa8b732 | 1211 | |
f51234c2 AK |
1212 | while (!list_empty(&vcpu->kvm->active_mmu_pages)) { |
1213 | page = container_of(vcpu->kvm->active_mmu_pages.next, | |
1214 | struct kvm_mmu_page, link); | |
90cb0529 | 1215 | kvm_mmu_zap_page(vcpu->kvm, page); |
f51234c2 | 1216 | } |
17ac10ad | 1217 | free_page((unsigned long)vcpu->mmu.pae_root); |
6aa8b732 AK |
1218 | } |
1219 | ||
1220 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |
1221 | { | |
17ac10ad | 1222 | struct page *page; |
6aa8b732 AK |
1223 | int i; |
1224 | ||
1225 | ASSERT(vcpu); | |
1226 | ||
d3d25b04 | 1227 | vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES; |
17ac10ad AK |
1228 | |
1229 | /* | |
1230 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. | |
1231 | * Therefore we need to allocate shadow page tables in the first | |
1232 | * 4GB of memory, which happens to fit the DMA32 zone. | |
1233 | */ | |
1234 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | |
1235 | if (!page) | |
1236 | goto error_1; | |
1237 | vcpu->mmu.pae_root = page_address(page); | |
1238 | for (i = 0; i < 4; ++i) | |
1239 | vcpu->mmu.pae_root[i] = INVALID_PAGE; | |
1240 | ||
6aa8b732 AK |
1241 | return 0; |
1242 | ||
1243 | error_1: | |
1244 | free_mmu_pages(vcpu); | |
1245 | return -ENOMEM; | |
1246 | } | |
1247 | ||
8018c27b | 1248 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
6aa8b732 | 1249 | { |
6aa8b732 AK |
1250 | ASSERT(vcpu); |
1251 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | |
6aa8b732 | 1252 | |
8018c27b IM |
1253 | return alloc_mmu_pages(vcpu); |
1254 | } | |
6aa8b732 | 1255 | |
8018c27b IM |
1256 | int kvm_mmu_setup(struct kvm_vcpu *vcpu) |
1257 | { | |
1258 | ASSERT(vcpu); | |
1259 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | |
2c264957 | 1260 | |
8018c27b | 1261 | return init_kvm_mmu(vcpu); |
6aa8b732 AK |
1262 | } |
1263 | ||
1264 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |
1265 | { | |
1266 | ASSERT(vcpu); | |
1267 | ||
1268 | destroy_kvm_mmu(vcpu); | |
1269 | free_mmu_pages(vcpu); | |
714b93da | 1270 | mmu_free_memory_caches(vcpu); |
6aa8b732 AK |
1271 | } |
1272 | ||
90cb0529 | 1273 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
6aa8b732 AK |
1274 | { |
1275 | struct kvm_mmu_page *page; | |
1276 | ||
1277 | list_for_each_entry(page, &kvm->active_mmu_pages, link) { | |
1278 | int i; | |
1279 | u64 *pt; | |
1280 | ||
1281 | if (!test_bit(slot, &page->slot_bitmap)) | |
1282 | continue; | |
1283 | ||
47ad8e68 | 1284 | pt = page->spt; |
6aa8b732 AK |
1285 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
1286 | /* avoid RMW */ | |
cd4a4e53 | 1287 | if (pt[i] & PT_WRITABLE_MASK) { |
90cb0529 | 1288 | rmap_remove(&pt[i]); |
6aa8b732 | 1289 | pt[i] &= ~PT_WRITABLE_MASK; |
cd4a4e53 | 1290 | } |
6aa8b732 AK |
1291 | } |
1292 | } | |
37a7d8b0 | 1293 | |
90cb0529 | 1294 | void kvm_mmu_zap_all(struct kvm *kvm) |
e0fa826f | 1295 | { |
90cb0529 | 1296 | struct kvm_mmu_page *page, *node; |
e0fa826f | 1297 | |
90cb0529 AK |
1298 | list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link) |
1299 | kvm_mmu_zap_page(kvm, page); | |
e0fa826f | 1300 | |
90cb0529 | 1301 | kvm_flush_remote_tlbs(kvm); |
e0fa826f DL |
1302 | } |
1303 | ||
b5a33a75 AK |
1304 | void kvm_mmu_module_exit(void) |
1305 | { | |
1306 | if (pte_chain_cache) | |
1307 | kmem_cache_destroy(pte_chain_cache); | |
1308 | if (rmap_desc_cache) | |
1309 | kmem_cache_destroy(rmap_desc_cache); | |
d3d25b04 AK |
1310 | if (mmu_page_header_cache) |
1311 | kmem_cache_destroy(mmu_page_header_cache); | |
b5a33a75 AK |
1312 | } |
1313 | ||
1314 | int kvm_mmu_module_init(void) | |
1315 | { | |
1316 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | |
1317 | sizeof(struct kvm_pte_chain), | |
20c2df83 | 1318 | 0, 0, NULL); |
b5a33a75 AK |
1319 | if (!pte_chain_cache) |
1320 | goto nomem; | |
1321 | rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", | |
1322 | sizeof(struct kvm_rmap_desc), | |
20c2df83 | 1323 | 0, 0, NULL); |
b5a33a75 AK |
1324 | if (!rmap_desc_cache) |
1325 | goto nomem; | |
1326 | ||
d3d25b04 AK |
1327 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", |
1328 | sizeof(struct kvm_mmu_page), | |
20c2df83 | 1329 | 0, 0, NULL); |
d3d25b04 AK |
1330 | if (!mmu_page_header_cache) |
1331 | goto nomem; | |
1332 | ||
b5a33a75 AK |
1333 | return 0; |
1334 | ||
1335 | nomem: | |
1336 | kvm_mmu_module_exit(); | |
1337 | return -ENOMEM; | |
1338 | } | |
1339 | ||
37a7d8b0 AK |
1340 | #ifdef AUDIT |
1341 | ||
1342 | static const char *audit_msg; | |
1343 | ||
1344 | static gva_t canonicalize(gva_t gva) | |
1345 | { | |
1346 | #ifdef CONFIG_X86_64 | |
1347 | gva = (long long)(gva << 16) >> 16; | |
1348 | #endif | |
1349 | return gva; | |
1350 | } | |
1351 | ||
1352 | static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |
1353 | gva_t va, int level) | |
1354 | { | |
1355 | u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK); | |
1356 | int i; | |
1357 | gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1)); | |
1358 | ||
1359 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | |
1360 | u64 ent = pt[i]; | |
1361 | ||
2807696c | 1362 | if (!(ent & PT_PRESENT_MASK)) |
37a7d8b0 AK |
1363 | continue; |
1364 | ||
1365 | va = canonicalize(va); | |
1366 | if (level > 1) | |
1367 | audit_mappings_page(vcpu, ent, va, level - 1); | |
1368 | else { | |
1369 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); | |
1370 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); | |
1371 | ||
1372 | if ((ent & PT_PRESENT_MASK) | |
1373 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | |
1374 | printk(KERN_ERR "audit error: (%s) levels %d" | |
1375 | " gva %lx gpa %llx hpa %llx ent %llx\n", | |
1376 | audit_msg, vcpu->mmu.root_level, | |
1377 | va, gpa, hpa, ent); | |
1378 | } | |
1379 | } | |
1380 | } | |
1381 | ||
1382 | static void audit_mappings(struct kvm_vcpu *vcpu) | |
1383 | { | |
1ea252af | 1384 | unsigned i; |
37a7d8b0 AK |
1385 | |
1386 | if (vcpu->mmu.root_level == 4) | |
1387 | audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); | |
1388 | else | |
1389 | for (i = 0; i < 4; ++i) | |
1390 | if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK) | |
1391 | audit_mappings_page(vcpu, | |
1392 | vcpu->mmu.pae_root[i], | |
1393 | i << 30, | |
1394 | 2); | |
1395 | } | |
1396 | ||
1397 | static int count_rmaps(struct kvm_vcpu *vcpu) | |
1398 | { | |
1399 | int nmaps = 0; | |
1400 | int i, j, k; | |
1401 | ||
1402 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | |
1403 | struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; | |
1404 | struct kvm_rmap_desc *d; | |
1405 | ||
1406 | for (j = 0; j < m->npages; ++j) { | |
1407 | struct page *page = m->phys_mem[j]; | |
1408 | ||
1409 | if (!page->private) | |
1410 | continue; | |
1411 | if (!(page->private & 1)) { | |
1412 | ++nmaps; | |
1413 | continue; | |
1414 | } | |
1415 | d = (struct kvm_rmap_desc *)(page->private & ~1ul); | |
1416 | while (d) { | |
1417 | for (k = 0; k < RMAP_EXT; ++k) | |
1418 | if (d->shadow_ptes[k]) | |
1419 | ++nmaps; | |
1420 | else | |
1421 | break; | |
1422 | d = d->more; | |
1423 | } | |
1424 | } | |
1425 | } | |
1426 | return nmaps; | |
1427 | } | |
1428 | ||
1429 | static int count_writable_mappings(struct kvm_vcpu *vcpu) | |
1430 | { | |
1431 | int nmaps = 0; | |
1432 | struct kvm_mmu_page *page; | |
1433 | int i; | |
1434 | ||
1435 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | |
47ad8e68 | 1436 | u64 *pt = page->spt; |
37a7d8b0 AK |
1437 | |
1438 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | |
1439 | continue; | |
1440 | ||
1441 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | |
1442 | u64 ent = pt[i]; | |
1443 | ||
1444 | if (!(ent & PT_PRESENT_MASK)) | |
1445 | continue; | |
1446 | if (!(ent & PT_WRITABLE_MASK)) | |
1447 | continue; | |
1448 | ++nmaps; | |
1449 | } | |
1450 | } | |
1451 | return nmaps; | |
1452 | } | |
1453 | ||
1454 | static void audit_rmap(struct kvm_vcpu *vcpu) | |
1455 | { | |
1456 | int n_rmap = count_rmaps(vcpu); | |
1457 | int n_actual = count_writable_mappings(vcpu); | |
1458 | ||
1459 | if (n_rmap != n_actual) | |
1460 | printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", | |
1461 | __FUNCTION__, audit_msg, n_rmap, n_actual); | |
1462 | } | |
1463 | ||
1464 | static void audit_write_protection(struct kvm_vcpu *vcpu) | |
1465 | { | |
1466 | struct kvm_mmu_page *page; | |
1467 | ||
1468 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | |
1469 | hfn_t hfn; | |
1470 | struct page *pg; | |
1471 | ||
1472 | if (page->role.metaphysical) | |
1473 | continue; | |
1474 | ||
1475 | hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT) | |
1476 | >> PAGE_SHIFT; | |
1477 | pg = pfn_to_page(hfn); | |
1478 | if (pg->private) | |
1479 | printk(KERN_ERR "%s: (%s) shadow page has writable" | |
1480 | " mappings: gfn %lx role %x\n", | |
1481 | __FUNCTION__, audit_msg, page->gfn, | |
1482 | page->role.word); | |
1483 | } | |
1484 | } | |
1485 | ||
1486 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) | |
1487 | { | |
1488 | int olddbg = dbg; | |
1489 | ||
1490 | dbg = 0; | |
1491 | audit_msg = msg; | |
1492 | audit_rmap(vcpu); | |
1493 | audit_write_protection(vcpu); | |
1494 | audit_mappings(vcpu); | |
1495 | dbg = olddbg; | |
1496 | } | |
1497 | ||
1498 | #endif |