X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fmmu.c;h=6bdfbc23ecaa8fc779085076bc08f340fc704513;hb=36ca7e0a577aa61f648953886958e50398732d63;hp=c512f095cdac82b9e2ba258ae052a9a4199dc13c;hpb=e3b1f64e04f57198ca0ab1216d824a5547be8446;p=deliverable%2Flinux.git diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c512f095cdac..6bdfbc23ecaa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -632,12 +632,12 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) * kvm_flush_remote_tlbs() IPI to all active vcpus. */ local_irq_disable(); - vcpu->mode = READING_SHADOW_PAGE_TABLES; + /* * Make sure a following spte read is not reordered ahead of the write * to vcpu->mode. */ - smp_mb(); + smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); } static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) @@ -647,8 +647,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) * reads to sptes. If it does, kvm_commit_zap_page() can see us * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. */ - smp_mb(); - vcpu->mode = OUTSIDE_GUEST_MODE; + smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); } @@ -2390,14 +2389,13 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, return; /* - * wmb: make sure everyone sees our modifications to the page tables - * rmb: make sure we see changes to vcpu->mode - */ - smp_mb(); - - /* - * Wait for all vcpus to exit guest mode and/or lockless shadow - * page table walks. + * We need to make sure everyone sees our modifications to + * the page tables and see changes to vcpu->mode here. The barrier + * in the kvm_flush_remote_tlbs() achieves this. This pairs + * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end. + * + * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit + * guest mode and/or lockless shadow page table walks. */ kvm_flush_remote_tlbs(kvm); @@ -3923,6 +3921,81 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, } } +/* +* PKU is an additional mechanism by which the paging controls access to +* user-mode addresses based on the value in the PKRU register. Protection +* key violations are reported through a bit in the page fault error code. +* Unlike other bits of the error code, the PK bit is not known at the +* call site of e.g. gva_to_gpa; it must be computed directly in +* permission_fault based on two bits of PKRU, on some machine state (CR4, +* CR0, EFER, CPL), and on other bits of the error code and the page tables. +* +* In particular the following conditions come from the error code, the +* page tables and the machine state: +* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1 +* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch) +* - PK is always zero if U=0 in the page tables +* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access. +* +* The PKRU bitmask caches the result of these four conditions. The error +* code (minus the P bit) and the page table's U bit form an index into the +* PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed +* with the two bits of the PKRU register corresponding to the protection key. +* For the first three conditions above the bits will be 00, thus masking +* away both AD and WD. For all reads or if the last condition holds, WD +* only will be masked away. +*/ +static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + bool ept) +{ + unsigned bit; + bool wp; + + if (ept) { + mmu->pkru_mask = 0; + return; + } + + /* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */ + if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) { + mmu->pkru_mask = 0; + return; + } + + wp = is_write_protection(vcpu); + + for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) { + unsigned pfec, pkey_bits; + bool check_pkey, check_write, ff, uf, wf, pte_user; + + pfec = bit << 1; + ff = pfec & PFERR_FETCH_MASK; + uf = pfec & PFERR_USER_MASK; + wf = pfec & PFERR_WRITE_MASK; + + /* PFEC.RSVD is replaced by ACC_USER_MASK. */ + pte_user = pfec & PFERR_RSVD_MASK; + + /* + * Only need to check the access which is not an + * instruction fetch and is to a user page. + */ + check_pkey = (!ff && pte_user); + /* + * write access is controlled by PKRU if it is a + * user access or CR0.WP = 1. + */ + check_write = check_pkey && wf && (uf || wp); + + /* PKRU.AD stops both read and write access. */ + pkey_bits = !!check_pkey; + /* PKRU.WD stops write access. */ + pkey_bits |= (!!check_write) << 1; + + mmu->pkru_mask |= (pkey_bits & 3) << pfec; + } +} + static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { unsigned root_level = mmu->root_level; @@ -3941,6 +4014,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context, false); + update_pkru_bitmask(vcpu, context, false); update_last_nonleaf_level(vcpu, context); MMU_WARN_ON(!is_pae(vcpu)); @@ -3968,6 +4042,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context, false); + update_pkru_bitmask(vcpu, context, false); update_last_nonleaf_level(vcpu, context); context->page_fault = paging32_page_fault; @@ -4026,6 +4101,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, context, false); + update_pkru_bitmask(vcpu, context, false); update_last_nonleaf_level(vcpu, context); reset_tdp_shadow_zero_bits_mask(vcpu, context); } @@ -4078,6 +4154,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) context->direct_map = false; update_permission_bitmask(vcpu, context, true); + update_pkru_bitmask(vcpu, context, true); reset_rsvds_bits_mask_ept(vcpu, context, execonly); reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } @@ -4132,6 +4209,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, g_context, false); + update_pkru_bitmask(vcpu, g_context, false); update_last_nonleaf_level(vcpu, g_context); }