KVM: Add statistic for remote tlb flushes
[deliverable/linux.git] / drivers / kvm / mmu.c
index ece0aa4e4c9fcb9a3172b873cae8c1877636f8a5..101cd5377a89716377021baebe68733b14e5ca85 100644 (file)
@@ -19,6 +19,7 @@
 
 #include "vmx.h"
 #include "kvm.h"
+#include "x86.h"
 
 #include <linux/types.h>
 #include <linux/string.h>
@@ -199,6 +200,11 @@ static int is_writeble_pte(unsigned long pte)
        return pte & PT_WRITABLE_MASK;
 }
 
+static int is_dirty_pte(unsigned long pte)
+{
+       return pte & PT_DIRTY_MASK;
+}
+
 static int is_io_pte(unsigned long pte)
 {
        return pte & PT_SHADOW_IO_MARK;
@@ -206,8 +212,8 @@ static int is_io_pte(unsigned long pte)
 
 static int is_rmap_pte(u64 pte)
 {
-       return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
-               == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
+       return pte != shadow_trap_nonpresent_pte
+               && pte != shadow_notrap_nonpresent_pte;
 }
 
 static void set_shadow_pte(u64 *sptep, u64 spte)
@@ -414,12 +420,18 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        struct kvm_rmap_desc *desc;
        struct kvm_rmap_desc *prev_desc;
        struct kvm_mmu_page *page;
+       struct page *release_page;
        unsigned long *rmapp;
        int i;
 
        if (!is_rmap_pte(*spte))
                return;
        page = page_header(__pa(spte));
+       release_page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+       if (is_writeble_pte(*spte))
+               kvm_release_page_dirty(release_page);
+       else
+               kvm_release_page_clean(release_page);
        rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
        if (!*rmapp) {
                printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -451,29 +463,51 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        }
 }
 
-static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 {
        struct kvm_rmap_desc *desc;
+       struct kvm_rmap_desc *prev_desc;
+       u64 *prev_spte;
+       int i;
+
+       if (!*rmapp)
+               return NULL;
+       else if (!(*rmapp & 1)) {
+               if (!spte)
+                       return (u64 *)*rmapp;
+               return NULL;
+       }
+       desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
+       prev_desc = NULL;
+       prev_spte = NULL;
+       while (desc) {
+               for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
+                       if (prev_spte == spte)
+                               return desc->shadow_ptes[i];
+                       prev_spte = desc->shadow_ptes[i];
+               }
+               desc = desc->more;
+       }
+       return NULL;
+}
+
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+{
        unsigned long *rmapp;
        u64 *spte;
 
-       gfn = unalias_gfn(vcpu->kvm, gfn);
-       rmapp = gfn_to_rmap(vcpu->kvm, gfn);
+       gfn = unalias_gfn(kvm, gfn);
+       rmapp = gfn_to_rmap(kvm, gfn);
 
-       while (*rmapp) {
-               if (!(*rmapp & 1))
-                       spte = (u64 *)*rmapp;
-               else {
-                       desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
-                       spte = desc->shadow_ptes[0];
-               }
+       spte = rmap_next(kvm, rmapp, NULL);
+       while (spte) {
                BUG_ON(!spte);
                BUG_ON(!(*spte & PT_PRESENT_MASK));
-               BUG_ON(!(*spte & PT_WRITABLE_MASK));
                rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-               rmap_remove(vcpu->kvm, spte);
-               set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
-               kvm_flush_remote_tlbs(vcpu->kvm);
+               if (is_writeble_pte(*spte))
+                       set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
+               kvm_flush_remote_tlbs(kvm);
+               spte = rmap_next(kvm, rmapp, spte);
        }
 }
 
@@ -606,7 +640,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
        BUG();
 }
 
-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm,
                                                gfn_t gfn)
 {
        unsigned index;
@@ -616,7 +650,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
 
        pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
-       bucket = &vcpu->kvm->mmu_page_hash[index];
+       bucket = &kvm->mmu_page_hash[index];
        hlist_for_each_entry(page, node, bucket, hash_link)
                if (page->gfn == gfn && !page->role.metaphysical) {
                        pgprintk("%s: found role %x\n",
@@ -670,7 +704,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
        hlist_add_head(&page->hash_link, bucket);
        vcpu->mmu.prefetch_page(vcpu, page);
        if (!metaphysical)
-               rmap_write_protect(vcpu, gfn);
+               rmap_write_protect(vcpu->kvm, gfn);
        return page;
 }
 
@@ -725,6 +759,7 @@ static void kvm_mmu_zap_page(struct kvm *kvm,
 {
        u64 *parent_pte;
 
+       ++kvm->stat.mmu_shadow_zapped;
        while (page->multimapped || page->parent_pte) {
                if (!page->multimapped)
                        parent_pte = page->parent_pte;
@@ -782,7 +817,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
        kvm->n_alloc_mmu_pages = kvm_nr_mmu_pages;
 }
 
-static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
        unsigned index;
        struct hlist_head *bucket;
@@ -793,25 +828,25 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
        pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
        r = 0;
        index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
-       bucket = &vcpu->kvm->mmu_page_hash[index];
+       bucket = &kvm->mmu_page_hash[index];
        hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
                if (page->gfn == gfn && !page->role.metaphysical) {
                        pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
                                 page->role.word);
-                       kvm_mmu_zap_page(vcpu->kvm, page);
+                       kvm_mmu_zap_page(kvm, page);
                        r = 1;
                }
        return r;
 }
 
-static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
+static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_mmu_page *page;
 
-       while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
+       while ((page = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
                pgprintk("%s: zap %lx %x\n",
                         __FUNCTION__, gfn, page->role.word);
-               kvm_mmu_zap_page(vcpu->kvm, page);
+               kvm_mmu_zap_page(kvm, page);
        }
 }
 
@@ -823,23 +858,17 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
        __set_bit(slot, &page_head->slot_bitmap);
 }
 
-hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
-       hpa_t hpa = gpa_to_hpa(vcpu, gpa);
-
-       return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
-}
-
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
 {
        struct page *page;
+       hpa_t hpa;
 
        ASSERT((gpa & HPA_ERR_MASK) == 0);
-       page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-       if (!page)
-               return gpa | HPA_ERR_MASK;
-       return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
-               | (gpa & (PAGE_SIZE-1));
+       page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
+       hpa = ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | (gpa & (PAGE_SIZE-1));
+       if (is_error_page(page))
+               return hpa | HPA_ERR_MASK;
+       return hpa;
 }
 
 hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
@@ -848,7 +877,7 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
 
        if (gpa == UNMAPPED_GVA)
                return UNMAPPED_GVA;
-       return gpa_to_hpa(vcpu, gpa);
+       return gpa_to_hpa(vcpu->kvm, gpa);
 }
 
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
@@ -857,7 +886,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 
        if (gpa == UNMAPPED_GVA)
                return NULL;
-       return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
+       return pfn_to_page(gpa_to_hpa(vcpu->kvm, gpa) >> PAGE_SHIFT);
 }
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -868,7 +897,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
 {
        int level = PT32E_ROOT_LEVEL;
        hpa_t table_addr = vcpu->mmu.root_hpa;
+       struct page *page;
 
+       page = pfn_to_page(p >> PAGE_SHIFT);
        for (; ; level--) {
                u32 index = PT64_INDEX(v, level);
                u64 *table;
@@ -878,14 +909,23 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
                table = __va(table_addr);
 
                if (level == 1) {
+                       int was_rmapped;
+
                        pte = table[index];
-                       if (is_shadow_present_pte(pte) && is_writeble_pte(pte))
+                       was_rmapped = is_rmap_pte(pte);
+                       if (is_shadow_present_pte(pte) && is_writeble_pte(pte)) {
+                               kvm_release_page_clean(page);
                                return 0;
+                       }
                        mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
                        page_header_update_slot(vcpu->kvm, table, v);
                        table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
                                                                PT_USER_MASK;
-                       rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+                       if (!was_rmapped)
+                               rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+                       else
+                               kvm_release_page_clean(page);
+
                        return 0;
                }
 
@@ -897,9 +937,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
                                >> PAGE_SHIFT;
                        new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
                                                     v, level - 1,
-                                                    1, 0, &table[index]);
+                                                    1, 3, &table[index]);
                        if (!new_table) {
                                pgprintk("nonpaging_map: ENOMEM\n");
+                               kvm_release_page_clean(page);
                                return -ENOMEM;
                        }
 
@@ -1012,10 +1053,13 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
        ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
 
 
-       paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
+       paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
 
-       if (is_error_hpa(paddr))
+       if (is_error_hpa(paddr)) {
+               kvm_release_page_clean(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+                                      >> PAGE_SHIFT));
                return 1;
+       }
 
        return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
 }
@@ -1192,9 +1236,12 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
                                  const void *new, int bytes,
                                  int offset_in_pte)
 {
-       if (page->role.level != PT_PAGE_TABLE_LEVEL)
+       if (page->role.level != PT_PAGE_TABLE_LEVEL) {
+               ++vcpu->kvm->stat.mmu_pde_zapped;
                return;
+       }
 
+       ++vcpu->kvm->stat.mmu_pte_updated;
        if (page->role.glevels == PT32_ROOT_LEVEL)
                paging32_update_pte(vcpu, page, spte, new, bytes,
                                    offset_in_pte);
@@ -1229,6 +1276,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        int npte;
 
        pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+       ++vcpu->kvm->stat.mmu_pte_write;
        kvm_mmu_audit(vcpu, "pre pte write");
        if (gfn == vcpu->last_pt_write_gfn
            && !last_updated_pte_accessed(vcpu)) {
@@ -1262,6 +1310,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                        pgprintk("misaligned: gpa %llx bytes %d role %x\n",
                                 gpa, bytes, page->role.word);
                        kvm_mmu_zap_page(vcpu->kvm, page);
+                       ++vcpu->kvm->stat.mmu_flooded;
                        continue;
                }
                page_offset = offset;
@@ -1299,7 +1348,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 {
        gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
 
-       return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
+       return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
 }
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@@ -1310,8 +1359,49 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
                page = container_of(vcpu->kvm->active_mmu_pages.prev,
                                    struct kvm_mmu_page, link);
                kvm_mmu_zap_page(vcpu->kvm, page);
+               ++vcpu->kvm->stat.mmu_recycled;
+       }
+}
+
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
+{
+       int r;
+       enum emulation_result er;
+
+       mutex_lock(&vcpu->kvm->lock);
+       r = vcpu->mmu.page_fault(vcpu, cr2, error_code);
+       if (r < 0)
+               goto out;
+
+       if (!r) {
+               r = 1;
+               goto out;
+       }
+
+       r = mmu_topup_memory_caches(vcpu);
+       if (r)
+               goto out;
+
+       er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
+       mutex_unlock(&vcpu->kvm->lock);
+
+       switch (er) {
+       case EMULATE_DONE:
+               return 1;
+       case EMULATE_DO_MMIO:
+               ++vcpu->stat.mmio_exits;
+               return 0;
+       case EMULATE_FAIL:
+               kvm_report_emulation_failure(vcpu, "pagetable");
+               return 1;
+       default:
+               BUG();
        }
+out:
+       mutex_unlock(&vcpu->kvm->lock);
+       return r;
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
@@ -1394,10 +1484,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
                pt = page->spt;
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
                        /* avoid RMW */
-                       if (pt[i] & PT_WRITABLE_MASK) {
-                               rmap_remove(kvm, &pt[i]);
+                       if (pt[i] & PT_WRITABLE_MASK)
                                pt[i] &= ~PT_WRITABLE_MASK;
-                       }
        }
 }
 
@@ -1447,6 +1535,25 @@ nomem:
        return -ENOMEM;
 }
 
+/*
+ * Caculate mmu pages needed for kvm.
+ */
+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
+{
+       int i;
+       unsigned int nr_mmu_pages;
+       unsigned int  nr_pages = 0;
+
+       for (i = 0; i < kvm->nmemslots; i++)
+               nr_pages += kvm->memslots[i].npages;
+
+       nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
+       nr_mmu_pages = max(nr_mmu_pages,
+                       (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+
+       return nr_mmu_pages;
+}
+
 #ifdef AUDIT
 
 static const char *audit_msg;
@@ -1484,6 +1591,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
                } else {
                        gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
                        hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+                       struct page *page;
 
                        if (is_shadow_present_pte(ent)
                            && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1496,6 +1604,9 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
                                 && !is_error_hpa(hpa))
                                printk(KERN_ERR "audit: (%s) notrap shadow,"
                                       " valid guest gva %lx\n", audit_msg, va);
+                       page = pfn_to_page((gpa & PT64_BASE_ADDR_MASK)
+                                          >> PAGE_SHIFT);
+                       kvm_release_page_clean(page);
 
                }
        }
This page took 0.049305 seconds and 5 git commands to generate.