Commit | Line | Data |
---|---|---|
f048aace BH |
1 | /* |
2 | * This file contains the routines for TLB flushing. | |
3 | * On machines where the MMU does not use a hash table to store virtual to | |
4 | * physical translations (ie, SW loaded TLBs or Book3E compilant processors, | |
5 | * this does -not- include 603 however which shares the implementation with | |
6 | * hash based processors) | |
7 | * | |
8 | * -- BenH | |
9 | * | |
25d21ad6 BH |
10 | * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org> |
11 | * IBM Corp. | |
f048aace BH |
12 | * |
13 | * Derived from arch/ppc/mm/init.c: | |
14 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | |
15 | * | |
16 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | |
17 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | |
18 | * Copyright (C) 1996 Paul Mackerras | |
19 | * | |
20 | * Derived from "arch/i386/mm/init.c" | |
21 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
22 | * | |
23 | * This program is free software; you can redistribute it and/or | |
24 | * modify it under the terms of the GNU General Public License | |
25 | * as published by the Free Software Foundation; either version | |
26 | * 2 of the License, or (at your option) any later version. | |
27 | * | |
28 | */ | |
29 | ||
30 | #include <linux/kernel.h> | |
31 | #include <linux/mm.h> | |
32 | #include <linux/init.h> | |
33 | #include <linux/highmem.h> | |
34 | #include <linux/pagemap.h> | |
35 | #include <linux/preempt.h> | |
36 | #include <linux/spinlock.h> | |
95f72d1e | 37 | #include <linux/memblock.h> |
f048aace BH |
38 | |
39 | #include <asm/tlbflush.h> | |
40 | #include <asm/tlb.h> | |
25d21ad6 | 41 | #include <asm/code-patching.h> |
f048aace BH |
42 | |
43 | #include "mmu_decl.h" | |
44 | ||
25d21ad6 BH |
45 | #ifdef CONFIG_PPC_BOOK3E |
46 | struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { | |
47 | [MMU_PAGE_4K] = { | |
48 | .shift = 12, | |
49 | .enc = BOOK3E_PAGESZ_4K, | |
50 | }, | |
51 | [MMU_PAGE_16K] = { | |
52 | .shift = 14, | |
53 | .enc = BOOK3E_PAGESZ_16K, | |
54 | }, | |
55 | [MMU_PAGE_64K] = { | |
56 | .shift = 16, | |
57 | .enc = BOOK3E_PAGESZ_64K, | |
58 | }, | |
59 | [MMU_PAGE_1M] = { | |
60 | .shift = 20, | |
61 | .enc = BOOK3E_PAGESZ_1M, | |
62 | }, | |
63 | [MMU_PAGE_16M] = { | |
64 | .shift = 24, | |
65 | .enc = BOOK3E_PAGESZ_16M, | |
66 | }, | |
67 | [MMU_PAGE_256M] = { | |
68 | .shift = 28, | |
69 | .enc = BOOK3E_PAGESZ_256M, | |
70 | }, | |
71 | [MMU_PAGE_1G] = { | |
72 | .shift = 30, | |
73 | .enc = BOOK3E_PAGESZ_1GB, | |
74 | }, | |
75 | }; | |
76 | static inline int mmu_get_tsize(int psize) | |
77 | { | |
78 | return mmu_psize_defs[psize].enc; | |
79 | } | |
80 | #else | |
81 | static inline int mmu_get_tsize(int psize) | |
82 | { | |
83 | /* This isn't used on !Book3E for now */ | |
84 | return 0; | |
85 | } | |
86 | #endif | |
87 | ||
88 | /* The variables below are currently only used on 64-bit Book3E | |
89 | * though this will probably be made common with other nohash | |
90 | * implementations at some point | |
91 | */ | |
92 | #ifdef CONFIG_PPC64 | |
93 | ||
94 | int mmu_linear_psize; /* Page size used for the linear mapping */ | |
95 | int mmu_pte_psize; /* Page size used for PTE pages */ | |
32a74949 | 96 | int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ |
25d21ad6 BH |
97 | int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ |
98 | unsigned long linear_map_top; /* Top of linear mapping */ | |
99 | ||
100 | #endif /* CONFIG_PPC64 */ | |
101 | ||
f048aace BH |
102 | /* |
103 | * Base TLB flushing operations: | |
104 | * | |
105 | * - flush_tlb_mm(mm) flushes the specified mm context TLB's | |
106 | * - flush_tlb_page(vma, vmaddr) flushes one page | |
107 | * - flush_tlb_range(vma, start, end) flushes a range of pages | |
108 | * - flush_tlb_kernel_range(start, end) flushes kernel pages | |
109 | * | |
110 | * - local_* variants of page and mm only apply to the current | |
111 | * processor | |
112 | */ | |
113 | ||
114 | /* | |
115 | * These are the base non-SMP variants of page and mm flushing | |
116 | */ | |
117 | void local_flush_tlb_mm(struct mm_struct *mm) | |
118 | { | |
119 | unsigned int pid; | |
120 | ||
121 | preempt_disable(); | |
122 | pid = mm->context.id; | |
123 | if (pid != MMU_NO_CONTEXT) | |
124 | _tlbil_pid(pid); | |
125 | preempt_enable(); | |
126 | } | |
127 | EXPORT_SYMBOL(local_flush_tlb_mm); | |
128 | ||
d4e167da BH |
129 | void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, |
130 | int tsize, int ind) | |
f048aace BH |
131 | { |
132 | unsigned int pid; | |
133 | ||
134 | preempt_disable(); | |
d4e167da | 135 | pid = mm ? mm->context.id : 0; |
f048aace | 136 | if (pid != MMU_NO_CONTEXT) |
d4e167da | 137 | _tlbil_va(vmaddr, pid, tsize, ind); |
f048aace BH |
138 | preempt_enable(); |
139 | } | |
f048aace | 140 | |
d4e167da BH |
141 | void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) |
142 | { | |
143 | __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, | |
25d21ad6 | 144 | mmu_get_tsize(mmu_virtual_psize), 0); |
d4e167da BH |
145 | } |
146 | EXPORT_SYMBOL(local_flush_tlb_page); | |
f048aace BH |
147 | |
148 | /* | |
149 | * And here are the SMP non-local implementations | |
150 | */ | |
151 | #ifdef CONFIG_SMP | |
152 | ||
3eb93c55 | 153 | static DEFINE_RAW_SPINLOCK(tlbivax_lock); |
f048aace | 154 | |
fcce8109 BH |
155 | static int mm_is_core_local(struct mm_struct *mm) |
156 | { | |
157 | return cpumask_subset(mm_cpumask(mm), | |
158 | topology_thread_cpumask(smp_processor_id())); | |
159 | } | |
160 | ||
f048aace BH |
161 | struct tlb_flush_param { |
162 | unsigned long addr; | |
163 | unsigned int pid; | |
d4e167da BH |
164 | unsigned int tsize; |
165 | unsigned int ind; | |
f048aace BH |
166 | }; |
167 | ||
168 | static void do_flush_tlb_mm_ipi(void *param) | |
169 | { | |
170 | struct tlb_flush_param *p = param; | |
171 | ||
172 | _tlbil_pid(p ? p->pid : 0); | |
173 | } | |
174 | ||
175 | static void do_flush_tlb_page_ipi(void *param) | |
176 | { | |
177 | struct tlb_flush_param *p = param; | |
178 | ||
d4e167da | 179 | _tlbil_va(p->addr, p->pid, p->tsize, p->ind); |
f048aace BH |
180 | } |
181 | ||
182 | ||
183 | /* Note on invalidations and PID: | |
184 | * | |
185 | * We snapshot the PID with preempt disabled. At this point, it can still | |
186 | * change either because: | |
187 | * - our context is being stolen (PID -> NO_CONTEXT) on another CPU | |
188 | * - we are invaliating some target that isn't currently running here | |
189 | * and is concurrently acquiring a new PID on another CPU | |
190 | * - some other CPU is re-acquiring a lost PID for this mm | |
191 | * etc... | |
192 | * | |
193 | * However, this shouldn't be a problem as we only guarantee | |
194 | * invalidation of TLB entries present prior to this call, so we | |
195 | * don't care about the PID changing, and invalidating a stale PID | |
196 | * is generally harmless. | |
197 | */ | |
198 | ||
199 | void flush_tlb_mm(struct mm_struct *mm) | |
200 | { | |
f048aace BH |
201 | unsigned int pid; |
202 | ||
203 | preempt_disable(); | |
204 | pid = mm->context.id; | |
205 | if (unlikely(pid == MMU_NO_CONTEXT)) | |
206 | goto no_context; | |
fcce8109 | 207 | if (!mm_is_core_local(mm)) { |
f048aace | 208 | struct tlb_flush_param p = { .pid = pid }; |
56aa4129 RR |
209 | /* Ignores smp_processor_id() even if set. */ |
210 | smp_call_function_many(mm_cpumask(mm), | |
211 | do_flush_tlb_mm_ipi, &p, 1); | |
f048aace BH |
212 | } |
213 | _tlbil_pid(pid); | |
214 | no_context: | |
215 | preempt_enable(); | |
216 | } | |
217 | EXPORT_SYMBOL(flush_tlb_mm); | |
218 | ||
d4e167da BH |
219 | void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, |
220 | int tsize, int ind) | |
f048aace | 221 | { |
56aa4129 | 222 | struct cpumask *cpu_mask; |
f048aace BH |
223 | unsigned int pid; |
224 | ||
225 | preempt_disable(); | |
d4e167da | 226 | pid = mm ? mm->context.id : 0; |
f048aace BH |
227 | if (unlikely(pid == MMU_NO_CONTEXT)) |
228 | goto bail; | |
d4e167da | 229 | cpu_mask = mm_cpumask(mm); |
fcce8109 | 230 | if (!mm_is_core_local(mm)) { |
f048aace BH |
231 | /* If broadcast tlbivax is supported, use it */ |
232 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { | |
233 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); | |
234 | if (lock) | |
3eb93c55 | 235 | raw_spin_lock(&tlbivax_lock); |
d4e167da | 236 | _tlbivax_bcast(vmaddr, pid, tsize, ind); |
f048aace | 237 | if (lock) |
3eb93c55 | 238 | raw_spin_unlock(&tlbivax_lock); |
f048aace BH |
239 | goto bail; |
240 | } else { | |
d4e167da BH |
241 | struct tlb_flush_param p = { |
242 | .pid = pid, | |
243 | .addr = vmaddr, | |
244 | .tsize = tsize, | |
245 | .ind = ind, | |
246 | }; | |
56aa4129 RR |
247 | /* Ignores smp_processor_id() even if set in cpu_mask */ |
248 | smp_call_function_many(cpu_mask, | |
f048aace BH |
249 | do_flush_tlb_page_ipi, &p, 1); |
250 | } | |
251 | } | |
d4e167da | 252 | _tlbil_va(vmaddr, pid, tsize, ind); |
f048aace BH |
253 | bail: |
254 | preempt_enable(); | |
255 | } | |
d4e167da BH |
256 | |
257 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | |
258 | { | |
259 | __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, | |
25d21ad6 | 260 | mmu_get_tsize(mmu_virtual_psize), 0); |
d4e167da | 261 | } |
f048aace BH |
262 | EXPORT_SYMBOL(flush_tlb_page); |
263 | ||
264 | #endif /* CONFIG_SMP */ | |
265 | ||
266 | /* | |
267 | * Flush kernel TLB entries in the given range | |
268 | */ | |
269 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | |
270 | { | |
271 | #ifdef CONFIG_SMP | |
272 | preempt_disable(); | |
273 | smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); | |
274 | _tlbil_pid(0); | |
275 | preempt_enable(); | |
d6a09e0c | 276 | #else |
f048aace | 277 | _tlbil_pid(0); |
d6a09e0c | 278 | #endif |
f048aace BH |
279 | } |
280 | EXPORT_SYMBOL(flush_tlb_kernel_range); | |
281 | ||
282 | /* | |
283 | * Currently, for range flushing, we just do a full mm flush. This should | |
284 | * be optimized based on a threshold on the size of the range, since | |
285 | * some implementation can stack multiple tlbivax before a tlbsync but | |
286 | * for now, we keep it that way | |
287 | */ | |
288 | void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, | |
289 | unsigned long end) | |
290 | ||
291 | { | |
292 | flush_tlb_mm(vma->vm_mm); | |
293 | } | |
294 | EXPORT_SYMBOL(flush_tlb_range); | |
c7cc58a1 BH |
295 | |
296 | void tlb_flush(struct mmu_gather *tlb) | |
297 | { | |
298 | flush_tlb_mm(tlb->mm); | |
299 | ||
300 | /* Push out batch of freed page tables */ | |
301 | pte_free_finish(); | |
302 | } | |
25d21ad6 BH |
303 | |
304 | /* | |
305 | * Below are functions specific to the 64-bit variant of Book3E though that | |
306 | * may change in the future | |
307 | */ | |
308 | ||
309 | #ifdef CONFIG_PPC64 | |
310 | ||
311 | /* | |
312 | * Handling of virtual linear page tables or indirect TLB entries | |
313 | * flushing when PTE pages are freed | |
314 | */ | |
315 | void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) | |
316 | { | |
317 | int tsize = mmu_psize_defs[mmu_pte_psize].enc; | |
318 | ||
319 | if (book3e_htw_enabled) { | |
320 | unsigned long start = address & PMD_MASK; | |
321 | unsigned long end = address + PMD_SIZE; | |
322 | unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; | |
323 | ||
324 | /* This isn't the most optimal, ideally we would factor out the | |
325 | * while preempt & CPU mask mucking around, or even the IPI but | |
326 | * it will do for now | |
327 | */ | |
328 | while (start < end) { | |
329 | __flush_tlb_page(tlb->mm, start, tsize, 1); | |
330 | start += size; | |
331 | } | |
332 | } else { | |
333 | unsigned long rmask = 0xf000000000000000ul; | |
334 | unsigned long rid = (address & rmask) | 0x1000000000000000ul; | |
335 | unsigned long vpte = address & ~rmask; | |
336 | ||
337 | #ifdef CONFIG_PPC_64K_PAGES | |
338 | vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; | |
339 | #else | |
340 | vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; | |
341 | #endif | |
342 | vpte |= rid; | |
343 | __flush_tlb_page(tlb->mm, vpte, tsize, 0); | |
344 | } | |
345 | } | |
346 | ||
347 | /* | |
348 | * Early initialization of the MMU TLB code | |
349 | */ | |
350 | static void __early_init_mmu(int boot_cpu) | |
351 | { | |
352 | extern unsigned int interrupt_base_book3e; | |
353 | extern unsigned int exc_data_tlb_miss_htw_book3e; | |
354 | extern unsigned int exc_instruction_tlb_miss_htw_book3e; | |
355 | ||
356 | unsigned int *ibase = &interrupt_base_book3e; | |
357 | unsigned int mas4; | |
358 | ||
359 | /* XXX This will have to be decided at runtime, but right | |
32a74949 BH |
360 | * now our boot and TLB miss code hard wires it. Ideally |
361 | * we should find out a suitable page size and patch the | |
362 | * TLB miss code (either that or use the PACA to store | |
363 | * the value we want) | |
25d21ad6 BH |
364 | */ |
365 | mmu_linear_psize = MMU_PAGE_1G; | |
366 | ||
32a74949 BH |
367 | /* XXX This should be decided at runtime based on supported |
368 | * page sizes in the TLB, but for now let's assume 16M is | |
369 | * always there and a good fit (which it probably is) | |
370 | */ | |
371 | mmu_vmemmap_psize = MMU_PAGE_16M; | |
25d21ad6 BH |
372 | |
373 | /* Check if HW tablewalk is present, and if yes, enable it by: | |
374 | * | |
375 | * - patching the TLB miss handlers to branch to the | |
376 | * one dedicates to it | |
377 | * | |
378 | * - setting the global book3e_htw_enabled | |
379 | * | |
380 | * - Set MAS4:INDD and default page size | |
381 | */ | |
382 | ||
383 | /* XXX This code only checks for TLB 0 capabilities and doesn't | |
384 | * check what page size combos are supported by the HW. It | |
385 | * also doesn't handle the case where a separate array holds | |
386 | * the IND entries from the array loaded by the PT. | |
387 | */ | |
388 | if (boot_cpu) { | |
389 | unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); | |
390 | ||
391 | /* Check if HW loader is supported */ | |
392 | if ((tlb0cfg & TLBnCFG_IND) && | |
393 | (tlb0cfg & TLBnCFG_PT)) { | |
394 | patch_branch(ibase + (0x1c0 / 4), | |
395 | (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); | |
396 | patch_branch(ibase + (0x1e0 / 4), | |
397 | (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); | |
398 | book3e_htw_enabled = 1; | |
399 | } | |
400 | pr_info("MMU: Book3E Page Tables %s\n", | |
401 | book3e_htw_enabled ? "Enabled" : "Disabled"); | |
402 | } | |
403 | ||
404 | /* Set MAS4 based on page table setting */ | |
405 | ||
406 | mas4 = 0x4 << MAS4_WIMGED_SHIFT; | |
407 | if (book3e_htw_enabled) { | |
408 | mas4 |= mas4 | MAS4_INDD; | |
409 | #ifdef CONFIG_PPC_64K_PAGES | |
410 | mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; | |
411 | mmu_pte_psize = MMU_PAGE_256M; | |
412 | #else | |
413 | mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; | |
414 | mmu_pte_psize = MMU_PAGE_1M; | |
415 | #endif | |
416 | } else { | |
417 | #ifdef CONFIG_PPC_64K_PAGES | |
418 | mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; | |
419 | #else | |
420 | mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; | |
421 | #endif | |
422 | mmu_pte_psize = mmu_virtual_psize; | |
423 | } | |
424 | mtspr(SPRN_MAS4, mas4); | |
425 | ||
426 | /* Set the global containing the top of the linear mapping | |
427 | * for use by the TLB miss code | |
428 | */ | |
95f72d1e | 429 | linear_map_top = memblock_end_of_DRAM(); |
25d21ad6 BH |
430 | |
431 | /* A sync won't hurt us after mucking around with | |
432 | * the MMU configuration | |
433 | */ | |
434 | mb(); | |
e63075a3 BH |
435 | |
436 | memblock_set_current_limit(linear_map_top); | |
25d21ad6 BH |
437 | } |
438 | ||
439 | void __init early_init_mmu(void) | |
440 | { | |
441 | __early_init_mmu(1); | |
442 | } | |
443 | ||
444 | void __cpuinit early_init_mmu_secondary(void) | |
445 | { | |
446 | __early_init_mmu(0); | |
447 | } | |
448 | ||
449 | #endif /* CONFIG_PPC64 */ |