arm/arm64: KVM: map MMIO regions at creation time
[deliverable/linux.git] / arch / arm / kvm / mmu.c
index 16e7994bf3473d4c6ec7c32a8571299c02ab7808..6038027ab1d6f2fb2cb79fad5192275de6bcde4f 100644 (file)
@@ -528,11 +528,10 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                return -EINVAL;
        }
 
-       pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+       pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
        if (!pgd)
                return -ENOMEM;
 
-       memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
        kvm_clean_pgd(pgd);
        kvm->arch.pgd = pgd;
 
@@ -675,7 +674,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
  * @size:      The size of the mapping
  */
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
-                         phys_addr_t pa, unsigned long size)
+                         phys_addr_t pa, unsigned long size, bool writable)
 {
        phys_addr_t addr, end;
        int ret = 0;
@@ -688,6 +687,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
        for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
                pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
 
+               if (writable)
+                       kvm_set_s2pte_writable(&pte);
+
                ret = mmu_topup_memory_cache(&cache, 2, 2);
                if (ret)
                        goto out;
@@ -746,22 +748,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
        return false;
 }
 
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vcpu_trap_is_iabt(vcpu))
+               return false;
+
+       return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-                         struct kvm_memory_slot *memslot,
+                         struct kvm_memory_slot *memslot, unsigned long hva,
                          unsigned long fault_status)
 {
        int ret;
        bool write_fault, writable, hugetlb = false, force_pte = false;
        unsigned long mmu_seq;
        gfn_t gfn = fault_ipa >> PAGE_SHIFT;
-       unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
        struct kvm *kvm = vcpu->kvm;
        struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
        struct vm_area_struct *vma;
        pfn_t pfn;
        pgprot_t mem_type = PAGE_S2;
 
-       write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+       write_fault = kvm_is_write_fault(vcpu);
        if (fault_status == FSC_PERM && !write_fault) {
                kvm_err("Unexpected L2 read permission error\n");
                return -EFAULT;
@@ -770,6 +779,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        /* Let's check if we will get back a huge page backed by hugetlbfs */
        down_read(&current->mm->mmap_sem);
        vma = find_vma_intersection(current->mm, hva, hva + 1);
+       if (unlikely(!vma)) {
+               kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+               up_read(&current->mm->mmap_sem);
+               return -EFAULT;
+       }
+
        if (is_vm_hugetlb_page(vma)) {
                hugetlb = true;
                gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -863,7 +878,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
        unsigned long fault_status;
        phys_addr_t fault_ipa;
        struct kvm_memory_slot *memslot;
-       bool is_iabt;
+       unsigned long hva;
+       bool is_iabt, write_fault, writable;
        gfn_t gfn;
        int ret, idx;
 
@@ -874,17 +890,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                              kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
        /* Check the stage-2 fault is trans. fault or write fault */
-       fault_status = kvm_vcpu_trap_get_fault(vcpu);
+       fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
        if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-               kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-                       kvm_vcpu_trap_get_class(vcpu), fault_status);
+               kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+                       kvm_vcpu_trap_get_class(vcpu),
+                       (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+                       (unsigned long)kvm_vcpu_get_hsr(vcpu));
                return -EFAULT;
        }
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
        gfn = fault_ipa >> PAGE_SHIFT;
-       if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+       memslot = gfn_to_memslot(vcpu->kvm, gfn);
+       hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+       write_fault = kvm_is_write_fault(vcpu);
+       if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
                if (is_iabt) {
                        /* Prefetch Abort on I/O address */
                        kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +913,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                        goto out_unlock;
                }
 
-               if (fault_status != FSC_FAULT) {
-                       kvm_err("Unsupported fault status on io memory: %#lx\n",
-                               fault_status);
-                       ret = -EFAULT;
-                       goto out_unlock;
-               }
-
                /*
                 * The IPA is reported as [MAX:12], so we need to
                 * complement it with the bottom 12 bits from the
@@ -910,9 +924,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                goto out_unlock;
        }
 
-       memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-       ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+       ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
        if (ret == 0)
                ret = 1;
 out_unlock:
@@ -1122,13 +1134,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                   const struct kvm_memory_slot *old,
                                   enum kvm_mr_change change)
 {
-       gpa_t gpa = old->base_gfn << PAGE_SHIFT;
-       phys_addr_t size = old->npages << PAGE_SHIFT;
-       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
-               spin_lock(&kvm->mmu_lock);
-               unmap_stage2_range(kvm, gpa, size);
-               spin_unlock(&kvm->mmu_lock);
-       }
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1136,7 +1141,69 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_userspace_memory_region *mem,
                                   enum kvm_mr_change change)
 {
-       return 0;
+       hva_t hva = mem->userspace_addr;
+       hva_t reg_end = hva + mem->memory_size;
+       bool writable = !(mem->flags & KVM_MEM_READONLY);
+       int ret = 0;
+
+       if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
+               return 0;
+
+       /*
+        * A memory region could potentially cover multiple VMAs, and any holes
+        * between them, so iterate over all of them to find out if we can map
+        * any of them right now.
+        *
+        *     +--------------------------------------------+
+        * +---------------+----------------+   +----------------+
+        * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+        * +---------------+----------------+   +----------------+
+        *     |               memory region                |
+        *     +--------------------------------------------+
+        */
+       do {
+               struct vm_area_struct *vma = find_vma(current->mm, hva);
+               hva_t vm_start, vm_end;
+
+               if (!vma || vma->vm_start >= reg_end)
+                       break;
+
+               /*
+                * Mapping a read-only VMA is only allowed if the
+                * memory region is configured as read-only.
+                */
+               if (writable && !(vma->vm_flags & VM_WRITE)) {
+                       ret = -EPERM;
+                       break;
+               }
+
+               /*
+                * Take the intersection of this VMA with the memory region
+                */
+               vm_start = max(hva, vma->vm_start);
+               vm_end = min(reg_end, vma->vm_end);
+
+               if (vma->vm_flags & VM_PFNMAP) {
+                       gpa_t gpa = mem->guest_phys_addr +
+                                   (vm_start - mem->userspace_addr);
+                       phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
+                                        vm_start - vma->vm_start;
+
+                       ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
+                                                   vm_end - vm_start,
+                                                   writable);
+                       if (ret)
+                               break;
+               }
+               hva = vm_end;
+       } while (hva < reg_end);
+
+       if (ret) {
+               spin_lock(&kvm->mmu_lock);
+               unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+               spin_unlock(&kvm->mmu_lock);
+       }
+       return ret;
 }
 
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -1161,4 +1228,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
                                   struct kvm_memory_slot *slot)
 {
+       gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+       spin_lock(&kvm->mmu_lock);
+       unmap_stage2_range(kvm, gpa, size);
+       spin_unlock(&kvm->mmu_lock);
 }
This page took 0.030733 seconds and 5 git commands to generate.