#include "vmx.h"
#include "kvm.h"
+#include "x86.h"
#include <linux/types.h>
#include <linux/string.h>
#define PT32_DIR_PSE36_SIZE 4
#define PT32_DIR_PSE36_SHIFT 13
-#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
+#define PT32_DIR_PSE36_MASK \
+ (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
#define PT_FIRST_AVAIL_BITS_SHIFT 9
#define PT64_LEVEL_BITS 9
#define PT64_LEVEL_SHIFT(level) \
- ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
+ (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
#define PT64_LEVEL_MASK(level) \
(((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
#define PT32_LEVEL_BITS 10
#define PT32_LEVEL_SHIFT(level) \
- ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
+ (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
#define PT32_LEVEL_MASK(level) \
(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
return pte & PT_WRITABLE_MASK;
}
+static int is_dirty_pte(unsigned long pte)
+{
+ return pte & PT_DIRTY_MASK;
+}
+
static int is_io_pte(unsigned long pte)
{
return pte & PT_SHADOW_IO_MARK;
static int is_rmap_pte(u64 pte)
{
- return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
- == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
+ return pte != shadow_trap_nonpresent_pte
+ && pte != shadow_notrap_nonpresent_pte;
}
static void set_shadow_pte(u64 *sptep, u64 spte)
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
struct kvm_mmu_page *page;
+ struct page *release_page;
unsigned long *rmapp;
int i;
if (!is_rmap_pte(*spte))
return;
page = page_header(__pa(spte));
+ release_page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ if (is_writeble_pte(*spte))
+ kvm_release_page_dirty(release_page);
+ else
+ kvm_release_page_clean(release_page);
rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
}
}
-static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
{
struct kvm_rmap_desc *desc;
+ struct kvm_rmap_desc *prev_desc;
+ u64 *prev_spte;
+ int i;
+
+ if (!*rmapp)
+ return NULL;
+ else if (!(*rmapp & 1)) {
+ if (!spte)
+ return (u64 *)*rmapp;
+ return NULL;
+ }
+ desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
+ prev_desc = NULL;
+ prev_spte = NULL;
+ while (desc) {
+ for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
+ if (prev_spte == spte)
+ return desc->shadow_ptes[i];
+ prev_spte = desc->shadow_ptes[i];
+ }
+ desc = desc->more;
+ }
+ return NULL;
+}
+
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+{
unsigned long *rmapp;
u64 *spte;
- gfn = unalias_gfn(vcpu->kvm, gfn);
- rmapp = gfn_to_rmap(vcpu->kvm, gfn);
+ gfn = unalias_gfn(kvm, gfn);
+ rmapp = gfn_to_rmap(kvm, gfn);
- while (*rmapp) {
- if (!(*rmapp & 1))
- spte = (u64 *)*rmapp;
- else {
- desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
- spte = desc->shadow_ptes[0];
- }
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
BUG_ON(!spte);
BUG_ON(!(*spte & PT_PRESENT_MASK));
- BUG_ON(!(*spte & PT_WRITABLE_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
- rmap_remove(vcpu->kvm, spte);
- set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
- kvm_flush_remote_tlbs(vcpu->kvm);
+ if (is_writeble_pte(*spte))
+ set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
+ kvm_flush_remote_tlbs(kvm);
+ spte = rmap_next(kvm, rmapp, spte);
}
}
BUG();
}
-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm,
gfn_t gfn)
{
unsigned index;
pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
- bucket = &vcpu->kvm->mmu_page_hash[index];
+ bucket = &kvm->mmu_page_hash[index];
hlist_for_each_entry(page, node, bucket, hash_link)
if (page->gfn == gfn && !page->role.metaphysical) {
pgprintk("%s: found role %x\n",
hlist_add_head(&page->hash_link, bucket);
vcpu->mmu.prefetch_page(vcpu, page);
if (!metaphysical)
- rmap_write_protect(vcpu, gfn);
+ rmap_write_protect(vcpu->kvm, gfn);
return page;
}
{
u64 *parent_pte;
+ ++kvm->stat.mmu_shadow_zapped;
while (page->multimapped || page->parent_pte) {
if (!page->multimapped)
parent_pte = page->parent_pte;
kvm->n_alloc_mmu_pages = kvm_nr_mmu_pages;
}
-static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
unsigned index;
struct hlist_head *bucket;
pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
r = 0;
index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
- bucket = &vcpu->kvm->mmu_page_hash[index];
+ bucket = &kvm->mmu_page_hash[index];
hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
if (page->gfn == gfn && !page->role.metaphysical) {
pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
page->role.word);
- kvm_mmu_zap_page(vcpu->kvm, page);
+ kvm_mmu_zap_page(kvm, page);
r = 1;
}
return r;
}
-static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
+static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *page;
- while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
+ while ((page = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
pgprintk("%s: zap %lx %x\n",
__FUNCTION__, gfn, page->role.word);
- kvm_mmu_zap_page(vcpu->kvm, page);
+ kvm_mmu_zap_page(kvm, page);
}
}
__set_bit(slot, &page_head->slot_bitmap);
}
-hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
- hpa_t hpa = gpa_to_hpa(vcpu, gpa);
-
- return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
-}
-
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
{
struct page *page;
+ hpa_t hpa;
ASSERT((gpa & HPA_ERR_MASK) == 0);
- page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
- if (!page)
- return gpa | HPA_ERR_MASK;
- return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
- | (gpa & (PAGE_SIZE-1));
+ page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
+ hpa = ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | (gpa & (PAGE_SIZE-1));
+ if (is_error_page(page))
+ return hpa | HPA_ERR_MASK;
+ return hpa;
}
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
if (gpa == UNMAPPED_GVA)
return UNMAPPED_GVA;
- return gpa_to_hpa(vcpu, gpa);
+ return gpa_to_hpa(vcpu->kvm, gpa);
}
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
if (gpa == UNMAPPED_GVA)
return NULL;
- return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
+ return pfn_to_page(gpa_to_hpa(vcpu->kvm, gpa) >> PAGE_SHIFT);
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
int level = PT32E_ROOT_LEVEL;
hpa_t table_addr = vcpu->mmu.root_hpa;
+ struct page *page;
+ page = pfn_to_page(p >> PAGE_SHIFT);
for (; ; level--) {
u32 index = PT64_INDEX(v, level);
u64 *table;
table = __va(table_addr);
if (level == 1) {
+ int was_rmapped;
+
pte = table[index];
- if (is_shadow_present_pte(pte) && is_writeble_pte(pte))
+ was_rmapped = is_rmap_pte(pte);
+ if (is_shadow_present_pte(pte) && is_writeble_pte(pte)) {
+ kvm_release_page_clean(page);
return 0;
+ }
mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
page_header_update_slot(vcpu->kvm, table, v);
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
PT_USER_MASK;
- rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+ if (!was_rmapped)
+ rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+ else
+ kvm_release_page_clean(page);
+
return 0;
}
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
- 1, 0, &table[index]);
+ 1, 3, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
+ kvm_release_page_clean(page);
return -ENOMEM;
}
ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
- paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
+ paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
- if (is_error_hpa(paddr))
+ if (is_error_hpa(paddr)) {
+ kvm_release_page_clean(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT));
return 1;
+ }
return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
}
const void *new, int bytes,
int offset_in_pte)
{
- if (page->role.level != PT_PAGE_TABLE_LEVEL)
+ if (page->role.level != PT_PAGE_TABLE_LEVEL) {
+ ++vcpu->kvm->stat.mmu_pde_zapped;
return;
+ }
+ ++vcpu->kvm->stat.mmu_pte_updated;
if (page->role.glevels == PT32_ROOT_LEVEL)
paging32_update_pte(vcpu, page, spte, new, bytes,
offset_in_pte);
int npte;
pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+ ++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write");
if (gfn == vcpu->last_pt_write_gfn
&& !last_updated_pte_accessed(vcpu)) {
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
gpa, bytes, page->role.word);
kvm_mmu_zap_page(vcpu->kvm, page);
+ ++vcpu->kvm->stat.mmu_flooded;
continue;
}
page_offset = offset;
{
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
- return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
+ return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
}
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
page = container_of(vcpu->kvm->active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(vcpu->kvm, page);
+ ++vcpu->kvm->stat.mmu_recycled;
+ }
+}
+
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
+{
+ int r;
+ enum emulation_result er;
+
+ mutex_lock(&vcpu->kvm->lock);
+ r = vcpu->mmu.page_fault(vcpu, cr2, error_code);
+ if (r < 0)
+ goto out;
+
+ if (!r) {
+ r = 1;
+ goto out;
+ }
+
+ r = mmu_topup_memory_caches(vcpu);
+ if (r)
+ goto out;
+
+ er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
+ mutex_unlock(&vcpu->kvm->lock);
+
+ switch (er) {
+ case EMULATE_DONE:
+ return 1;
+ case EMULATE_DO_MMIO:
+ ++vcpu->stat.mmio_exits;
+ return 0;
+ case EMULATE_FAIL:
+ kvm_report_emulation_failure(vcpu, "pagetable");
+ return 1;
+ default:
+ BUG();
}
+out:
+ mutex_unlock(&vcpu->kvm->lock);
+ return r;
}
+EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
pt = page->spt;
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
/* avoid RMW */
- if (pt[i] & PT_WRITABLE_MASK) {
- rmap_remove(kvm, &pt[i]);
+ if (pt[i] & PT_WRITABLE_MASK)
pt[i] &= ~PT_WRITABLE_MASK;
- }
}
}
return -ENOMEM;
}
+/*
+ * Caculate mmu pages needed for kvm.
+ */
+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
+{
+ int i;
+ unsigned int nr_mmu_pages;
+ unsigned int nr_pages = 0;
+
+ for (i = 0; i < kvm->nmemslots; i++)
+ nr_pages += kvm->memslots[i].npages;
+
+ nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
+ nr_mmu_pages = max(nr_mmu_pages,
+ (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+
+ return nr_mmu_pages;
+}
+
#ifdef AUDIT
static const char *audit_msg;
} else {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+ struct page *page;
if (is_shadow_present_pte(ent)
&& (ent & PT64_BASE_ADDR_MASK) != hpa)
printk(KERN_ERR "xx audit error: (%s) levels %d"
" gva %lx gpa %llx hpa %llx ent %llx %d\n",
audit_msg, vcpu->mmu.root_level,
- va, gpa, hpa, ent, is_shadow_present_pte(ent));
+ va, gpa, hpa, ent,
+ is_shadow_present_pte(ent));
else if (ent == shadow_notrap_nonpresent_pte
&& !is_error_hpa(hpa))
printk(KERN_ERR "audit: (%s) notrap shadow,"
" valid guest gva %lx\n", audit_msg, va);
+ page = pfn_to_page((gpa & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT);
+ kvm_release_page_clean(page);
}
}