arch/powerpc/mm/tlb_nohash.c

   1 /*
   2  * This file contains the routines for TLB flushing.
   3  * On machines where the MMU does not use a hash table to store virtual to
   4  * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
   5  * this does -not- include 603 however which shares the implementation with
   6  * hash based processors)
   7  *
   8  *  -- BenH
   9  *
  10  * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
  11  *                     IBM Corp.
  12  *
  13  *  Derived from arch/ppc/mm/init.c:
  14  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  15  *
  16  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
  17  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
  18  *    Copyright (C) 1996 Paul Mackerras
  19  *
  20  *  Derived from "arch/i386/mm/init.c"
  21  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  22  *
  23  *  This program is free software; you can redistribute it and/or
  24  *  modify it under the terms of the GNU General Public License
  25  *  as published by the Free Software Foundation; either version
  26  *  2 of the License, or (at your option) any later version.
  27  *
  28  */
  29
  30 #include <linux/kernel.h>
  31 #include <linux/mm.h>
  32 #include <linux/init.h>
  33 #include <linux/highmem.h>
  34 #include <linux/pagemap.h>
  35 #include <linux/preempt.h>
  36 #include <linux/spinlock.h>
  37 #include <linux/memblock.h>
  38
  39 #include <asm/tlbflush.h>
  40 #include <asm/tlb.h>
  41 #include <asm/code-patching.h>
  42
  43 #include "mmu_decl.h"
  44
  45 #ifdef CONFIG_PPC_BOOK3E
  46 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
  47         [MMU_PAGE_4K] = {
  48                 .shift  = 12,
  49                 .enc    = BOOK3E_PAGESZ_4K,
  50         },
  51         [MMU_PAGE_16K] = {
  52                 .shift  = 14,
  53                 .enc    = BOOK3E_PAGESZ_16K,
  54         },
  55         [MMU_PAGE_64K] = {
  56                 .shift  = 16,
  57                 .enc    = BOOK3E_PAGESZ_64K,
  58         },
  59         [MMU_PAGE_1M] = {
  60                 .shift  = 20,
  61                 .enc    = BOOK3E_PAGESZ_1M,
  62         },
  63         [MMU_PAGE_16M] = {
  64                 .shift  = 24,
  65                 .enc    = BOOK3E_PAGESZ_16M,
  66         },
  67         [MMU_PAGE_256M] = {
  68                 .shift  = 28,
  69                 .enc    = BOOK3E_PAGESZ_256M,
  70         },
  71         [MMU_PAGE_1G] = {
  72                 .shift  = 30,
  73                 .enc    = BOOK3E_PAGESZ_1GB,
  74         },
  75 };
  76 static inline int mmu_get_tsize(int psize)
  77 {
  78         return mmu_psize_defs[psize].enc;
  79 }
  80 #else
  81 static inline int mmu_get_tsize(int psize)
  82 {
  83         /* This isn't used on !Book3E for now */
  84         return 0;
  85 }
  86 #endif
  87
  88 /* The variables below are currently only used on 64-bit Book3E
  89  * though this will probably be made common with other nohash
  90  * implementations at some point
  91  */
  92 #ifdef CONFIG_PPC64
  93
  94 int mmu_linear_psize;           /* Page size used for the linear mapping */
  95 int mmu_pte_psize;              /* Page size used for PTE pages */
  96 int mmu_vmemmap_psize;          /* Page size used for the virtual mem map */
  97 int book3e_htw_enabled;         /* Is HW tablewalk enabled ? */
  98 unsigned long linear_map_top;   /* Top of linear mapping */
  99
 100 #endif /* CONFIG_PPC64 */
 101
 102 /*
 103  * Base TLB flushing operations:
 104  *
 105  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
 106  *  - flush_tlb_page(vma, vmaddr) flushes one page
 107  *  - flush_tlb_range(vma, start, end) flushes a range of pages
 108  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
 109  *
 110  *  - local_* variants of page and mm only apply to the current
 111  *    processor
 112  */
 113
 114 /*
 115  * These are the base non-SMP variants of page and mm flushing
 116  */
 117 void local_flush_tlb_mm(struct mm_struct *mm)
 118 {
 119         unsigned int pid;
 120
 121         preempt_disable();
 122         pid = mm->context.id;
 123         if (pid != MMU_NO_CONTEXT)
 124                 _tlbil_pid(pid);
 125         preempt_enable();
 126 }
 127 EXPORT_SYMBOL(local_flush_tlb_mm);
 128
 129 void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 130                             int tsize, int ind)
 131 {
 132         unsigned int pid;
 133
 134         preempt_disable();
 135         pid = mm ? mm->context.id : 0;
 136         if (pid != MMU_NO_CONTEXT)
 137                 _tlbil_va(vmaddr, pid, tsize, ind);
 138         preempt_enable();
 139 }
 140
 141 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 142 {
 143         __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
 144                                mmu_get_tsize(mmu_virtual_psize), 0);
 145 }
 146 EXPORT_SYMBOL(local_flush_tlb_page);
 147
 148 /*
 149  * And here are the SMP non-local implementations
 150  */
 151 #ifdef CONFIG_SMP
 152
 153 static DEFINE_RAW_SPINLOCK(tlbivax_lock);
 154
 155 static int mm_is_core_local(struct mm_struct *mm)
 156 {
 157         return cpumask_subset(mm_cpumask(mm),
 158                               topology_thread_cpumask(smp_processor_id()));
 159 }
 160
 161 struct tlb_flush_param {
 162         unsigned long addr;
 163         unsigned int pid;
 164         unsigned int tsize;
 165         unsigned int ind;
 166 };
 167
 168 static void do_flush_tlb_mm_ipi(void *param)
 169 {
 170         struct tlb_flush_param *p = param;
 171
 172         _tlbil_pid(p ? p->pid : 0);
 173 }
 174
 175 static void do_flush_tlb_page_ipi(void *param)
 176 {
 177         struct tlb_flush_param *p = param;
 178
 179         _tlbil_va(p->addr, p->pid, p->tsize, p->ind);
 180 }
 181
 182
 183 /* Note on invalidations and PID:
 184  *
 185  * We snapshot the PID with preempt disabled. At this point, it can still
 186  * change either because:
 187  * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
 188  * - we are invaliating some target that isn't currently running here
 189  *   and is concurrently acquiring a new PID on another CPU
 190  * - some other CPU is re-acquiring a lost PID for this mm
 191  * etc...
 192  *
 193  * However, this shouldn't be a problem as we only guarantee
 194  * invalidation of TLB entries present prior to this call, so we
 195  * don't care about the PID changing, and invalidating a stale PID
 196  * is generally harmless.
 197  */
 198
 199 void flush_tlb_mm(struct mm_struct *mm)
 200 {
 201         unsigned int pid;
 202
 203         preempt_disable();
 204         pid = mm->context.id;
 205         if (unlikely(pid == MMU_NO_CONTEXT))
 206                 goto no_context;
 207         if (!mm_is_core_local(mm)) {
 208                 struct tlb_flush_param p = { .pid = pid };
 209                 /* Ignores smp_processor_id() even if set. */
 210                 smp_call_function_many(mm_cpumask(mm),
 211                                        do_flush_tlb_mm_ipi, &p, 1);
 212         }
 213         _tlbil_pid(pid);
 214  no_context:
 215         preempt_enable();
 216 }
 217 EXPORT_SYMBOL(flush_tlb_mm);
 218
 219 void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 220                       int tsize, int ind)
 221 {
 222         struct cpumask *cpu_mask;
 223         unsigned int pid;
 224
 225         preempt_disable();
 226         pid = mm ? mm->context.id : 0;
 227         if (unlikely(pid == MMU_NO_CONTEXT))
 228                 goto bail;
 229         cpu_mask = mm_cpumask(mm);
 230         if (!mm_is_core_local(mm)) {
 231                 /* If broadcast tlbivax is supported, use it */
 232                 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
 233                         int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
 234                         if (lock)
 235                                 raw_spin_lock(&tlbivax_lock);
 236                         _tlbivax_bcast(vmaddr, pid, tsize, ind);
 237                         if (lock)
 238                                 raw_spin_unlock(&tlbivax_lock);
 239                         goto bail;
 240                 } else {
 241                         struct tlb_flush_param p = {
 242                                 .pid = pid,
 243                                 .addr = vmaddr,
 244                                 .tsize = tsize,
 245                                 .ind = ind,
 246                         };
 247                         /* Ignores smp_processor_id() even if set in cpu_mask */
 248                         smp_call_function_many(cpu_mask,
 249                                                do_flush_tlb_page_ipi, &p, 1);
 250                 }
 251         }
 252         _tlbil_va(vmaddr, pid, tsize, ind);
 253  bail:
 254         preempt_enable();
 255 }
 256
 257 void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 258 {
 259         __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
 260                          mmu_get_tsize(mmu_virtual_psize), 0);
 261 }
 262 EXPORT_SYMBOL(flush_tlb_page);
 263
 264 #endif /* CONFIG_SMP */
 265
 266 /*
 267  * Flush kernel TLB entries in the given range
 268  */
 269 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 270 {
 271 #ifdef CONFIG_SMP
 272         preempt_disable();
 273         smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
 274         _tlbil_pid(0);
 275         preempt_enable();
 276 #else
 277         _tlbil_pid(0);
 278 #endif
 279 }
 280 EXPORT_SYMBOL(flush_tlb_kernel_range);
 281
 282 /*
 283  * Currently, for range flushing, we just do a full mm flush. This should
 284  * be optimized based on a threshold on the size of the range, since
 285  * some implementation can stack multiple tlbivax before a tlbsync but
 286  * for now, we keep it that way
 287  */
 288 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 289                      unsigned long end)
 290
 291 {
 292         flush_tlb_mm(vma->vm_mm);
 293 }
 294 EXPORT_SYMBOL(flush_tlb_range);
 295
 296 void tlb_flush(struct mmu_gather *tlb)
 297 {
 298         flush_tlb_mm(tlb->mm);
 299
 300         /* Push out batch of freed page tables */
 301         pte_free_finish();
 302 }
 303
 304 /*
 305  * Below are functions specific to the 64-bit variant of Book3E though that
 306  * may change in the future
 307  */
 308
 309 #ifdef CONFIG_PPC64
 310
 311 /*
 312  * Handling of virtual linear page tables or indirect TLB entries
 313  * flushing when PTE pages are freed
 314  */
 315 void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
 316 {
 317         int tsize = mmu_psize_defs[mmu_pte_psize].enc;
 318
 319         if (book3e_htw_enabled) {
 320                 unsigned long start = address & PMD_MASK;
 321                 unsigned long end = address + PMD_SIZE;
 322                 unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
 323
 324                 /* This isn't the most optimal, ideally we would factor out the
 325                  * while preempt & CPU mask mucking around, or even the IPI but
 326                  * it will do for now
 327                  */
 328                 while (start < end) {
 329                         __flush_tlb_page(tlb->mm, start, tsize, 1);
 330                         start += size;
 331                 }
 332         } else {
 333                 unsigned long rmask = 0xf000000000000000ul;
 334                 unsigned long rid = (address & rmask) | 0x1000000000000000ul;
 335                 unsigned long vpte = address & ~rmask;
 336
 337 #ifdef CONFIG_PPC_64K_PAGES
 338                 vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
 339 #else
 340                 vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
 341 #endif
 342                 vpte |= rid;
 343                 __flush_tlb_page(tlb->mm, vpte, tsize, 0);
 344         }
 345 }
 346
 347 /*
 348  * Early initialization of the MMU TLB code
 349  */
 350 static void __early_init_mmu(int boot_cpu)
 351 {
 352         extern unsigned int interrupt_base_book3e;
 353         extern unsigned int exc_data_tlb_miss_htw_book3e;
 354         extern unsigned int exc_instruction_tlb_miss_htw_book3e;
 355
 356         unsigned int *ibase = &interrupt_base_book3e;
 357         unsigned int mas4;
 358
 359         /* XXX This will have to be decided at runtime, but right
 360          * now our boot and TLB miss code hard wires it. Ideally
 361          * we should find out a suitable page size and patch the
 362          * TLB miss code (either that or use the PACA to store
 363          * the value we want)
 364          */
 365         mmu_linear_psize = MMU_PAGE_1G;
 366
 367         /* XXX This should be decided at runtime based on supported
 368          * page sizes in the TLB, but for now let's assume 16M is
 369          * always there and a good fit (which it probably is)
 370          */
 371         mmu_vmemmap_psize = MMU_PAGE_16M;
 372
 373         /* Check if HW tablewalk is present, and if yes, enable it by:
 374          *
 375          * - patching the TLB miss handlers to branch to the
 376          *   one dedicates to it
 377          *
 378          * - setting the global book3e_htw_enabled
 379          *
 380          * - Set MAS4:INDD and default page size
 381          */
 382
 383         /* XXX This code only checks for TLB 0 capabilities and doesn't
 384          *     check what page size combos are supported by the HW. It
 385          *     also doesn't handle the case where a separate array holds
 386          *     the IND entries from the array loaded by the PT.
 387          */
 388         if (boot_cpu) {
 389                 unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
 390
 391                 /* Check if HW loader is supported */
 392                 if ((tlb0cfg & TLBnCFG_IND) &&
 393                     (tlb0cfg & TLBnCFG_PT)) {
 394                         patch_branch(ibase + (0x1c0 / 4),
 395                              (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
 396                         patch_branch(ibase + (0x1e0 / 4),
 397                              (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
 398                         book3e_htw_enabled = 1;
 399                 }
 400                 pr_info("MMU: Book3E Page Tables %s\n",
 401                         book3e_htw_enabled ? "Enabled" : "Disabled");
 402         }
 403
 404         /* Set MAS4 based on page table setting */
 405
 406         mas4 = 0x4 << MAS4_WIMGED_SHIFT;
 407         if (book3e_htw_enabled) {
 408                 mas4 |= mas4 | MAS4_INDD;
 409 #ifdef CONFIG_PPC_64K_PAGES
 410                 mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
 411                 mmu_pte_psize = MMU_PAGE_256M;
 412 #else
 413                 mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
 414                 mmu_pte_psize = MMU_PAGE_1M;
 415 #endif
 416         } else {
 417 #ifdef CONFIG_PPC_64K_PAGES
 418                 mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
 419 #else
 420                 mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
 421 #endif
 422                 mmu_pte_psize = mmu_virtual_psize;
 423         }
 424         mtspr(SPRN_MAS4, mas4);
 425
 426         /* Set the global containing the top of the linear mapping
 427          * for use by the TLB miss code
 428          */
 429         linear_map_top = memblock_end_of_DRAM();
 430
 431         /* A sync won't hurt us after mucking around with
 432          * the MMU configuration
 433          */
 434         mb();
 435
 436         memblock_set_current_limit(linear_map_top);
 437 }
 438
 439 void __init early_init_mmu(void)
 440 {
 441         __early_init_mmu(1);
 442 }
 443
 444 void __cpuinit early_init_mmu_secondary(void)
 445 {
 446         __early_init_mmu(0);
 447 }
 448
 449 void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 450                                 phys_addr_t first_memblock_size)
 451 {
 452         /* On Embedded 64-bit, we adjust the RMA size to match
 453          * the bolted TLB entry. We know for now that only 1G
 454          * entries are supported though that may eventually
 455          * change. We crop it to the size of the first MEMBLOCK to
 456          * avoid going over total available memory just in case...
 457          */
 458         ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
 459
 460         /* Finally limit subsequent allocations */
 461         memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size);
 462 }
 463 #endif /* CONFIG_PPC64 */