arch/x86/kvm/paging_tmpl.h

   1 /*
   2  * Kernel-based Virtual Machine driver for Linux
   3  *
   4  * This module enables machines with Intel VT-x extensions to run virtual
   5  * machines without emulation or binary translation.
   6  *
   7  * MMU support
   8  *
   9  * Copyright (C) 2006 Qumranet, Inc.
  10  *
  11  * Authors:
  12  *   Yaniv Kamay  <yaniv@qumranet.com>
  13  *   Avi Kivity   <avi@qumranet.com>
  14  *
  15  * This work is licensed under the terms of the GNU GPL, version 2.  See
  16  * the COPYING file in the top-level directory.
  17  *
  18  */
  19
  20 /*
  21  * We need the mmu code to access both 32-bit and 64-bit guest ptes,
  22  * so the code in this file is compiled twice, once per pte size.
  23  */
  24
  25 #if PTTYPE == 64
  26         #define pt_element_t u64
  27         #define guest_walker guest_walker64
  28         #define FNAME(name) paging##64_##name
  29         #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
  30         #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
  31         #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
  32         #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
  33         #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
  34         #define PT_LEVEL_BITS PT64_LEVEL_BITS
  35         #ifdef CONFIG_X86_64
  36         #define PT_MAX_FULL_LEVELS 4
  37         #define CMPXCHG cmpxchg
  38         #else
  39         #define CMPXCHG cmpxchg64
  40         #define PT_MAX_FULL_LEVELS 2
  41         #endif
  42 #elif PTTYPE == 32
  43         #define pt_element_t u32
  44         #define guest_walker guest_walker32
  45         #define FNAME(name) paging##32_##name
  46         #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
  47         #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
  48         #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
  49         #define PT_INDEX(addr, level) PT32_INDEX(addr, level)
  50         #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
  51         #define PT_LEVEL_BITS PT32_LEVEL_BITS
  52         #define PT_MAX_FULL_LEVELS 2
  53         #define CMPXCHG cmpxchg
  54 #else
  55         #error Invalid PTTYPE value
  56 #endif
  57
  58 #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
  59 #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
  60
  61 /*
  62  * The guest_walker structure emulates the behavior of the hardware page
  63  * table walker.
  64  */
  65 struct guest_walker {
  66         int level;
  67         gfn_t table_gfn[PT_MAX_FULL_LEVELS];
  68         pt_element_t ptes[PT_MAX_FULL_LEVELS];
  69         gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
  70         unsigned pt_access;
  71         unsigned pte_access;
  72         gfn_t gfn;
  73         u32 error_code;
  74 };
  75
  76 static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
  77 {
  78         return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
  79 }
  80
  81 static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
  82                          gfn_t table_gfn, unsigned index,
  83                          pt_element_t orig_pte, pt_element_t new_pte)
  84 {
  85         pt_element_t ret;
  86         pt_element_t *table;
  87         struct page *page;
  88
  89         page = gfn_to_page(kvm, table_gfn);
  90
  91         table = kmap_atomic(page, KM_USER0);
  92         ret = CMPXCHG(&table[index], orig_pte, new_pte);
  93         kunmap_atomic(table, KM_USER0);
  94
  95         kvm_release_page_dirty(page);
  96
  97         return (ret != orig_pte);
  98 }
  99
 100 static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
 101 {
 102         unsigned access;
 103
 104         access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
 105 #if PTTYPE == 64
 106         if (is_nx(vcpu))
 107                 access &= ~(gpte >> PT64_NX_SHIFT);
 108 #endif
 109         return access;
 110 }
 111
 112 /*
 113  * Fetch a guest pte for a guest virtual address
 114  */
 115 static int FNAME(walk_addr)(struct guest_walker *walker,
 116                             struct kvm_vcpu *vcpu, gva_t addr,
 117                             int write_fault, int user_fault, int fetch_fault)
 118 {
 119         pt_element_t pte;
 120         gfn_t table_gfn;
 121         unsigned index, pt_access, pte_access;
 122         gpa_t pte_gpa;
 123         int rsvd_fault = 0;
 124
 125         trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
 126                                      fetch_fault);
 127 walk:
 128         walker->level = vcpu->arch.mmu.root_level;
 129         pte = vcpu->arch.cr3;
 130 #if PTTYPE == 64
 131         if (!is_long_mode(vcpu)) {
 132                 pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
 133                 trace_kvm_mmu_paging_element(pte, walker->level);
 134                 if (!is_present_gpte(pte))
 135                         goto not_present;
 136                 --walker->level;
 137         }
 138 #endif
 139         ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 140                (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
 141
 142         pt_access = ACC_ALL;
 143
 144         for (;;) {
 145                 index = PT_INDEX(addr, walker->level);
 146
 147                 table_gfn = gpte_to_gfn(pte);
 148                 pte_gpa = gfn_to_gpa(table_gfn);
 149                 pte_gpa += index * sizeof(pt_element_t);
 150                 walker->table_gfn[walker->level - 1] = table_gfn;
 151                 walker->pte_gpa[walker->level - 1] = pte_gpa;
 152
 153                 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
 154                 trace_kvm_mmu_paging_element(pte, walker->level);
 155
 156                 if (!is_present_gpte(pte))
 157                         goto not_present;
 158
 159                 rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
 160                 if (rsvd_fault)
 161                         goto access_error;
 162
 163                 if (write_fault && !is_writeble_pte(pte))
 164                         if (user_fault || is_write_protection(vcpu))
 165                                 goto access_error;
 166
 167                 if (user_fault && !(pte & PT_USER_MASK))
 168                         goto access_error;
 169
 170 #if PTTYPE == 64
 171                 if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
 172                         goto access_error;
 173 #endif
 174
 175                 if (!(pte & PT_ACCESSED_MASK)) {
 176                         trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 177                                                        sizeof(pte));
 178                         mark_page_dirty(vcpu->kvm, table_gfn);
 179                         if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
 180                             index, pte, pte|PT_ACCESSED_MASK))
 181                                 goto walk;
 182                         pte |= PT_ACCESSED_MASK;
 183                 }
 184
 185                 pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
 186
 187                 walker->ptes[walker->level - 1] = pte;
 188
 189                 if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
 190                     ((walker->level == PT_DIRECTORY_LEVEL) &&
 191                                 (pte & PT_PAGE_SIZE_MASK)  &&
 192                                 (PTTYPE == 64 || is_pse(vcpu))) ||
 193                     ((walker->level == PT_PDPE_LEVEL) &&
 194                                 (pte & PT_PAGE_SIZE_MASK)  &&
 195                                 is_long_mode(vcpu))) {
 196                         int lvl = walker->level;
 197
 198                         walker->gfn = gpte_to_gfn_lvl(pte, lvl);
 199                         walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
 200                                         >> PAGE_SHIFT;
 201
 202                         if (PTTYPE == 32 &&
 203                             walker->level == PT_DIRECTORY_LEVEL &&
 204                             is_cpuid_PSE36())
 205                                 walker->gfn += pse36_gfn_delta(pte);
 206
 207                         break;
 208                 }
 209
 210                 pt_access = pte_access;
 211                 --walker->level;
 212         }
 213
 214         if (write_fault && !is_dirty_gpte(pte)) {
 215                 bool ret;
 216
 217                 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
 218                 mark_page_dirty(vcpu->kvm, table_gfn);
 219                 ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
 220                             pte|PT_DIRTY_MASK);
 221                 if (ret)
 222                         goto walk;
 223                 pte |= PT_DIRTY_MASK;
 224                 walker->ptes[walker->level - 1] = pte;
 225         }
 226
 227         walker->pt_access = pt_access;
 228         walker->pte_access = pte_access;
 229         pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
 230                  __func__, (u64)pte, pt_access, pte_access);
 231         return 1;
 232
 233 not_present:
 234         walker->error_code = 0;
 235         goto err;
 236
 237 access_error:
 238         walker->error_code = PFERR_PRESENT_MASK;
 239
 240 err:
 241         if (write_fault)
 242                 walker->error_code |= PFERR_WRITE_MASK;
 243         if (user_fault)
 244                 walker->error_code |= PFERR_USER_MASK;
 245         if (fetch_fault)
 246                 walker->error_code |= PFERR_FETCH_MASK;
 247         if (rsvd_fault)
 248                 walker->error_code |= PFERR_RSVD_MASK;
 249         trace_kvm_mmu_walker_error(walker->error_code);
 250         return 0;
 251 }
 252
 253 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 254                               u64 *spte, const void *pte)
 255 {
 256         pt_element_t gpte;
 257         unsigned pte_access;
 258         pfn_t pfn;
 259
 260         gpte = *(const pt_element_t *)pte;
 261         if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
 262                 if (!is_present_gpte(gpte))
 263                         __set_spte(spte, shadow_notrap_nonpresent_pte);
 264                 return;
 265         }
 266         pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
 267         pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
 268         if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
 269                 return;
 270         pfn = vcpu->arch.update_pte.pfn;
 271         if (is_error_pfn(pfn))
 272                 return;
 273         if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
 274                 return;
 275         kvm_get_pfn(pfn);
 276         mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
 277                      gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
 278                      gpte_to_gfn(gpte), pfn, true);
 279 }
 280
 281 /*
 282  * Fetch a shadow pte for a specific level in the paging hierarchy.
 283  */
 284 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 285                          struct guest_walker *gw,
 286                          int user_fault, int write_fault, int hlevel,
 287                          int *ptwrite, pfn_t pfn)
 288 {
 289         unsigned access = gw->pt_access;
 290         struct kvm_mmu_page *shadow_page;
 291         u64 spte, *sptep = NULL;
 292         int direct;
 293         gfn_t table_gfn;
 294         int r;
 295         int level;
 296         pt_element_t curr_pte;
 297         struct kvm_shadow_walk_iterator iterator;
 298
 299         if (!is_present_gpte(gw->ptes[gw->level - 1]))
 300                 return NULL;
 301
 302         for_each_shadow_entry(vcpu, addr, iterator) {
 303                 level = iterator.level;
 304                 sptep = iterator.sptep;
 305                 if (iterator.level == hlevel) {
 306                         mmu_set_spte(vcpu, sptep, access,
 307                                      gw->pte_access & access,
 308                                      user_fault, write_fault,
 309                                      gw->ptes[gw->level-1] & PT_DIRTY_MASK,
 310                                      ptwrite, level,
 311                                      gw->gfn, pfn, false);
 312                         break;
 313                 }
 314
 315                 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
 316                         continue;
 317
 318                 if (is_large_pte(*sptep)) {
 319                         rmap_remove(vcpu->kvm, sptep);
 320                         __set_spte(sptep, shadow_trap_nonpresent_pte);
 321                         kvm_flush_remote_tlbs(vcpu->kvm);
 322                 }
 323
 324                 if (level <= gw->level) {
 325                         int delta = level - gw->level + 1;
 326                         direct = 1;
 327                         if (!is_dirty_gpte(gw->ptes[level - delta]))
 328                                 access &= ~ACC_WRITE_MASK;
 329                         table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
 330                         /* advance table_gfn when emulating 1gb pages with 4k */
 331                         if (delta == 0)
 332                                 table_gfn += PT_INDEX(addr, level);
 333                 } else {
 334                         direct = 0;
 335                         table_gfn = gw->table_gfn[level - 2];
 336                 }
 337                 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
 338                                                direct, access, sptep);
 339                 if (!direct) {
 340                         r = kvm_read_guest_atomic(vcpu->kvm,
 341                                                   gw->pte_gpa[level - 2],
 342                                                   &curr_pte, sizeof(curr_pte));
 343                         if (r || curr_pte != gw->ptes[level - 2]) {
 344                                 kvm_mmu_put_page(shadow_page, sptep);
 345                                 kvm_release_pfn_clean(pfn);
 346                                 sptep = NULL;
 347                                 break;
 348                         }
 349                 }
 350
 351                 spte = __pa(shadow_page->spt)
 352                         | PT_PRESENT_MASK | PT_ACCESSED_MASK
 353                         | PT_WRITABLE_MASK | PT_USER_MASK;
 354                 *sptep = spte;
 355         }
 356
 357         return sptep;
 358 }
 359
 360 /*
 361  * Page fault handler.  There are several causes for a page fault:
 362  *   - there is no shadow pte for the guest pte
 363  *   - write access through a shadow pte marked read only so that we can set
 364  *     the dirty bit
 365  *   - write access to a shadow pte marked read only so we can update the page
 366  *     dirty bitmap, when userspace requests it
 367  *   - mmio access; in this case we will never install a present shadow pte
 368  *   - normal guest page fault due to the guest pte marked not present, not
 369  *     writable, or not executable
 370  *
 371  *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 372  *           a negative value on error.
 373  */
 374 static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 375                                u32 error_code)
 376 {
 377         int write_fault = error_code & PFERR_WRITE_MASK;
 378         int user_fault = error_code & PFERR_USER_MASK;
 379         int fetch_fault = error_code & PFERR_FETCH_MASK;
 380         struct guest_walker walker;
 381         u64 *sptep;
 382         int write_pt = 0;
 383         int r;
 384         pfn_t pfn;
 385         int level = PT_PAGE_TABLE_LEVEL;
 386         unsigned long mmu_seq;
 387
 388         pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 389         kvm_mmu_audit(vcpu, "pre page fault");
 390
 391         r = mmu_topup_memory_caches(vcpu);
 392         if (r)
 393                 return r;
 394
 395         /*
 396          * Look up the guest pte for the faulting address.
 397          */
 398         r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
 399                              fetch_fault);
 400
 401         /*
 402          * The page is not mapped by the guest.  Let the guest handle it.
 403          */
 404         if (!r) {
 405                 pgprintk("%s: guest page fault\n", __func__);
 406                 inject_page_fault(vcpu, addr, walker.error_code);
 407                 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
 408                 return 0;
 409         }
 410
 411         if (walker.level >= PT_DIRECTORY_LEVEL) {
 412                 level = min(walker.level, mapping_level(vcpu, walker.gfn));
 413                 walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
 414         }
 415
 416         mmu_seq = vcpu->kvm->mmu_notifier_seq;
 417         smp_rmb();
 418         pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
 419
 420         /* mmio */
 421         if (is_error_pfn(pfn)) {
 422                 pgprintk("gfn %lx is mmio\n", walker.gfn);
 423                 kvm_release_pfn_clean(pfn);
 424                 return 1;
 425         }
 426
 427         spin_lock(&vcpu->kvm->mmu_lock);
 428         if (mmu_notifier_retry(vcpu, mmu_seq))
 429                 goto out_unlock;
 430         kvm_mmu_free_some_pages(vcpu);
 431         sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
 432                              level, &write_pt, pfn);
 433         pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
 434                  sptep, *sptep, write_pt);
 435
 436         if (!write_pt)
 437                 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
 438
 439         ++vcpu->stat.pf_fixed;
 440         kvm_mmu_audit(vcpu, "post page fault (fixed)");
 441         spin_unlock(&vcpu->kvm->mmu_lock);
 442
 443         return write_pt;
 444
 445 out_unlock:
 446         spin_unlock(&vcpu->kvm->mmu_lock);
 447         kvm_release_pfn_clean(pfn);
 448         return 0;
 449 }
 450
 451 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 452 {
 453         struct kvm_shadow_walk_iterator iterator;
 454         pt_element_t gpte;
 455         gpa_t pte_gpa = -1;
 456         int level;
 457         u64 *sptep;
 458         int need_flush = 0;
 459
 460         spin_lock(&vcpu->kvm->mmu_lock);
 461
 462         for_each_shadow_entry(vcpu, gva, iterator) {
 463                 level = iterator.level;
 464                 sptep = iterator.sptep;
 465
 466                 /* FIXME: properly handle invlpg on large guest pages */
 467                 if (level == PT_PAGE_TABLE_LEVEL  ||
 468                     ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
 469                     ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
 470                         struct kvm_mmu_page *sp = page_header(__pa(sptep));
 471
 472                         pte_gpa = (sp->gfn << PAGE_SHIFT);
 473                         pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
 474
 475                         if (is_shadow_present_pte(*sptep)) {
 476                                 rmap_remove(vcpu->kvm, sptep);
 477                                 if (is_large_pte(*sptep))
 478                                         --vcpu->kvm->stat.lpages;
 479                                 need_flush = 1;
 480                         }
 481                         __set_spte(sptep, shadow_trap_nonpresent_pte);
 482                         break;
 483                 }
 484
 485                 if (!is_shadow_present_pte(*sptep))
 486                         break;
 487         }
 488
 489         if (need_flush)
 490                 kvm_flush_remote_tlbs(vcpu->kvm);
 491         spin_unlock(&vcpu->kvm->mmu_lock);
 492
 493         if (pte_gpa == -1)
 494                 return;
 495         if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
 496                                   sizeof(pt_element_t)))
 497                 return;
 498         if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
 499                 if (mmu_topup_memory_caches(vcpu))
 500                         return;
 501                 kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
 502                                   sizeof(pt_element_t), 0);
 503         }
 504 }
 505
 506 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
 507 {
 508         struct guest_walker walker;
 509         gpa_t gpa = UNMAPPED_GVA;
 510         int r;
 511
 512         r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
 513
 514         if (r) {
 515                 gpa = gfn_to_gpa(walker.gfn);
 516                 gpa |= vaddr & ~PAGE_MASK;
 517         }
 518
 519         return gpa;
 520 }
 521
 522 static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 523                                  struct kvm_mmu_page *sp)
 524 {
 525         int i, j, offset, r;
 526         pt_element_t pt[256 / sizeof(pt_element_t)];
 527         gpa_t pte_gpa;
 528
 529         if (sp->role.direct
 530             || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
 531                 nonpaging_prefetch_page(vcpu, sp);
 532                 return;
 533         }
 534
 535         pte_gpa = gfn_to_gpa(sp->gfn);
 536         if (PTTYPE == 32) {
 537                 offset = sp->role.quadrant << PT64_LEVEL_BITS;
 538                 pte_gpa += offset * sizeof(pt_element_t);
 539         }
 540
 541         for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
 542                 r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
 543                 pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
 544                 for (j = 0; j < ARRAY_SIZE(pt); ++j)
 545                         if (r || is_present_gpte(pt[j]))
 546                                 sp->spt[i+j] = shadow_trap_nonpresent_pte;
 547                         else
 548                                 sp->spt[i+j] = shadow_notrap_nonpresent_pte;
 549         }
 550 }
 551
 552 /*
 553  * Using the cached information from sp->gfns is safe because:
 554  * - The spte has a reference to the struct page, so the pfn for a given gfn
 555  *   can't change unless all sptes pointing to it are nuked first.
 556  * - Alias changes zap the entire shadow cache.
 557  */
 558 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 559 {
 560         int i, offset, nr_present;
 561
 562         offset = nr_present = 0;
 563
 564         if (PTTYPE == 32)
 565                 offset = sp->role.quadrant << PT64_LEVEL_BITS;
 566
 567         for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
 568                 unsigned pte_access;
 569                 pt_element_t gpte;
 570                 gpa_t pte_gpa;
 571                 gfn_t gfn = sp->gfns[i];
 572
 573                 if (!is_shadow_present_pte(sp->spt[i]))
 574                         continue;
 575
 576                 pte_gpa = gfn_to_gpa(sp->gfn);
 577                 pte_gpa += (i+offset) * sizeof(pt_element_t);
 578
 579                 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
 580                                           sizeof(pt_element_t)))
 581                         return -EINVAL;
 582
 583                 if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
 584                     !(gpte & PT_ACCESSED_MASK)) {
 585                         u64 nonpresent;
 586
 587                         rmap_remove(vcpu->kvm, &sp->spt[i]);
 588                         if (is_present_gpte(gpte))
 589                                 nonpresent = shadow_trap_nonpresent_pte;
 590                         else
 591                                 nonpresent = shadow_notrap_nonpresent_pte;
 592                         __set_spte(&sp->spt[i], nonpresent);
 593                         continue;
 594                 }
 595
 596                 nr_present++;
 597                 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 598                 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
 599                          is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
 600                          spte_to_pfn(sp->spt[i]), true, false);
 601         }
 602
 603         return !nr_present;
 604 }
 605
 606 #undef pt_element_t
 607 #undef guest_walker
 608 #undef FNAME
 609 #undef PT_BASE_ADDR_MASK
 610 #undef PT_INDEX
 611 #undef PT_LEVEL_MASK
 612 #undef PT_LVL_ADDR_MASK
 613 #undef PT_LVL_OFFSET_MASK
 614 #undef PT_LEVEL_BITS
 615 #undef PT_MAX_FULL_LEVELS
 616 #undef gpte_to_gfn
 617 #undef gpte_to_gfn_lvl
 618 #undef CMPXCHG