Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
a23ba435 | 2 | * arch/sh/mm/cache-sh5.c |
1da177e4 LT |
3 | * |
4 | * Original version Copyright (C) 2000, 2001 Paolo Alberelli | |
5 | * Second version Copyright (C) benedict.gaster@superh.com 2002 | |
6 | * Third version Copyright Richard.Curnow@superh.com 2003 | |
7 | * Hacks to third version Copyright (C) 2003 Paul Mundt | |
a23ba435 PM |
8 | * |
9 | * This file is subject to the terms and conditions of the GNU General Public | |
10 | * License. See the file "COPYING" in the main directory of this archive | |
11 | * for more details. | |
1da177e4 | 12 | */ |
1da177e4 LT |
13 | #include <linux/init.h> |
14 | #include <linux/mman.h> | |
15 | #include <linux/mm.h> | |
16 | #include <linux/threads.h> | |
17 | #include <asm/page.h> | |
18 | #include <asm/pgtable.h> | |
19 | #include <asm/processor.h> | |
20 | #include <asm/cache.h> | |
21 | #include <asm/tlb.h> | |
22 | #include <asm/io.h> | |
23 | #include <asm/uaccess.h> | |
24 | #include <asm/mmu_context.h> | |
25 | #include <asm/pgalloc.h> /* for flush_itlb_range */ | |
26 | ||
27 | #include <linux/proc_fs.h> | |
28 | ||
29 | /* This function is in entry.S */ | |
30 | extern unsigned long switch_and_save_asid(unsigned long new_asid); | |
31 | ||
32 | /* Wired TLB entry for the D-cache */ | |
33 | static unsigned long long dtlb_cache_slot; | |
34 | ||
35 | /** | |
36 | * sh64_cache_init() | |
37 | * | |
38 | * This is pretty much just a straightforward clone of the SH | |
39 | * detect_cpu_and_cache_system(). | |
40 | * | |
41 | * This function is responsible for setting up all of the cache | |
42 | * info dynamically as well as taking care of CPU probing and | |
43 | * setting up the relevant subtype data. | |
44 | * | |
45 | * FIXME: For the time being, we only really support the SH5-101 | |
46 | * out of the box, and don't support dynamic probing for things | |
47 | * like the SH5-103 or even cut2 of the SH5-101. Implement this | |
48 | * later! | |
49 | */ | |
50 | int __init sh64_cache_init(void) | |
51 | { | |
52 | /* | |
53 | * First, setup some sane values for the I-cache. | |
54 | */ | |
55 | cpu_data->icache.ways = 4; | |
56 | cpu_data->icache.sets = 256; | |
57 | cpu_data->icache.linesz = L1_CACHE_BYTES; | |
58 | ||
59 | /* | |
60 | * FIXME: This can probably be cleaned up a bit as well.. for example, | |
61 | * do we really need the way shift _and_ the way_step_shift ?? Judging | |
62 | * by the existing code, I would guess no.. is there any valid reason | |
63 | * why we need to be tracking this around? | |
64 | */ | |
65 | cpu_data->icache.way_shift = 13; | |
66 | cpu_data->icache.entry_shift = 5; | |
67 | cpu_data->icache.set_shift = 4; | |
68 | cpu_data->icache.way_step_shift = 16; | |
69 | cpu_data->icache.asid_shift = 2; | |
70 | ||
71 | /* | |
72 | * way offset = cache size / associativity, so just don't factor in | |
73 | * associativity in the first place.. | |
74 | */ | |
75 | cpu_data->icache.way_ofs = cpu_data->icache.sets * | |
76 | cpu_data->icache.linesz; | |
77 | ||
78 | cpu_data->icache.asid_mask = 0x3fc; | |
79 | cpu_data->icache.idx_mask = 0x1fe0; | |
80 | cpu_data->icache.epn_mask = 0xffffe000; | |
81 | cpu_data->icache.flags = 0; | |
82 | ||
83 | /* | |
84 | * Next, setup some sane values for the D-cache. | |
85 | * | |
86 | * On the SH5, these are pretty consistent with the I-cache settings, | |
87 | * so we just copy over the existing definitions.. these can be fixed | |
88 | * up later, especially if we add runtime CPU probing. | |
89 | * | |
90 | * Though in the meantime it saves us from having to duplicate all of | |
91 | * the above definitions.. | |
92 | */ | |
93 | cpu_data->dcache = cpu_data->icache; | |
94 | ||
95 | /* | |
96 | * Setup any cache-related flags here | |
97 | */ | |
98 | #if defined(CONFIG_DCACHE_WRITE_THROUGH) | |
99 | set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); | |
100 | #elif defined(CONFIG_DCACHE_WRITE_BACK) | |
101 | set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); | |
102 | #endif | |
103 | ||
104 | /* | |
105 | * We also need to reserve a slot for the D-cache in the DTLB, so we | |
106 | * do this now .. | |
107 | */ | |
108 | dtlb_cache_slot = sh64_get_wired_dtlb_entry(); | |
109 | ||
110 | return 0; | |
111 | } | |
112 | ||
113 | #ifdef CONFIG_DCACHE_DISABLED | |
114 | #define sh64_dcache_purge_all() do { } while (0) | |
115 | #define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0) | |
116 | #define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) | |
117 | #define sh64_dcache_purge_phy_page(paddr) do { } while (0) | |
118 | #define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) | |
119 | #define sh64_dcache_purge_kernel_range(start, end) do { } while (0) | |
120 | #define sh64_dcache_wback_current_user_range(start, end) do { } while (0) | |
121 | #endif | |
122 | ||
123 | /*##########################################################################*/ | |
124 | ||
125 | /* From here onwards, a rewrite of the implementation, | |
126 | by Richard.Curnow@superh.com. | |
127 | ||
128 | The major changes in this compared to the old version are; | |
129 | 1. use more selective purging through OCBP instead of using ALLOCO to purge | |
130 | by natural replacement. This avoids purging out unrelated cache lines | |
131 | that happen to be in the same set. | |
132 | 2. exploit the APIs copy_user_page and clear_user_page better | |
133 | 3. be more selective about I-cache purging, in particular use invalidate_all | |
134 | more sparingly. | |
135 | ||
136 | */ | |
137 | ||
138 | /*########################################################################## | |
139 | SUPPORT FUNCTIONS | |
140 | ##########################################################################*/ | |
141 | ||
142 | /****************************************************************************/ | |
143 | /* The following group of functions deal with mapping and unmapping a temporary | |
144 | page into the DTLB slot that have been set aside for our exclusive use. */ | |
145 | /* In order to accomplish this, we use the generic interface for adding and | |
a23ba435 | 146 | removing a wired slot entry as defined in arch/sh/mm/tlb-sh5.c */ |
1da177e4 LT |
147 | /****************************************************************************/ |
148 | ||
149 | static unsigned long slot_own_flags; | |
150 | ||
151 | static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) | |
152 | { | |
153 | local_irq_save(slot_own_flags); | |
154 | sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); | |
155 | } | |
156 | ||
157 | static inline void sh64_teardown_dtlb_cache_slot(void) | |
158 | { | |
159 | sh64_teardown_tlb_slot(dtlb_cache_slot); | |
160 | local_irq_restore(slot_own_flags); | |
161 | } | |
162 | ||
163 | /****************************************************************************/ | |
164 | ||
165 | #ifndef CONFIG_ICACHE_DISABLED | |
166 | ||
167 | static void __inline__ sh64_icache_inv_all(void) | |
168 | { | |
169 | unsigned long long addr, flag, data; | |
170 | unsigned int flags; | |
171 | ||
172 | addr=ICCR0; | |
173 | flag=ICCR0_ICI; | |
174 | data=0; | |
175 | ||
176 | /* Make this a critical section for safety (probably not strictly necessary.) */ | |
177 | local_irq_save(flags); | |
178 | ||
179 | /* Without %1 it gets unexplicably wrong */ | |
180 | asm volatile("getcfg %3, 0, %0\n\t" | |
181 | "or %0, %2, %0\n\t" | |
182 | "putcfg %3, 0, %0\n\t" | |
183 | "synci" | |
184 | : "=&r" (data) | |
185 | : "0" (data), "r" (flag), "r" (addr)); | |
186 | ||
187 | local_irq_restore(flags); | |
188 | } | |
189 | ||
190 | static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) | |
191 | { | |
192 | /* Invalidate range of addresses [start,end] from the I-cache, where | |
193 | * the addresses lie in the kernel superpage. */ | |
194 | ||
195 | unsigned long long ullend, addr, aligned_start; | |
196 | #if (NEFF == 32) | |
197 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | |
198 | #else | |
199 | #error "NEFF != 32" | |
200 | #endif | |
201 | aligned_start &= L1_CACHE_ALIGN_MASK; | |
202 | addr = aligned_start; | |
203 | #if (NEFF == 32) | |
204 | ullend = (unsigned long long) (signed long long) (signed long) end; | |
205 | #else | |
206 | #error "NEFF != 32" | |
207 | #endif | |
208 | while (addr <= ullend) { | |
209 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | |
210 | addr += L1_CACHE_BYTES; | |
211 | } | |
212 | } | |
213 | ||
214 | static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr) | |
215 | { | |
216 | /* If we get called, we know that vma->vm_flags contains VM_EXEC. | |
217 | Also, eaddr is page-aligned. */ | |
218 | ||
219 | unsigned long long addr, end_addr; | |
220 | unsigned long flags = 0; | |
221 | unsigned long running_asid, vma_asid; | |
222 | addr = eaddr; | |
223 | end_addr = addr + PAGE_SIZE; | |
224 | ||
225 | /* Check whether we can use the current ASID for the I-cache | |
226 | invalidation. For example, if we're called via | |
227 | access_process_vm->flush_cache_page->here, (e.g. when reading from | |
228 | /proc), 'running_asid' will be that of the reader, not of the | |
229 | victim. | |
230 | ||
231 | Also, note the risk that we might get pre-empted between the ASID | |
232 | compare and blocking IRQs, and before we regain control, the | |
233 | pid->ASID mapping changes. However, the whole cache will get | |
234 | invalidated when the mapping is renewed, so the worst that can | |
235 | happen is that the loop below ends up invalidating somebody else's | |
236 | cache entries. | |
237 | */ | |
238 | ||
239 | running_asid = get_asid(); | |
240 | vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); | |
241 | if (running_asid != vma_asid) { | |
242 | local_irq_save(flags); | |
243 | switch_and_save_asid(vma_asid); | |
244 | } | |
245 | while (addr < end_addr) { | |
246 | /* Worth unrolling a little */ | |
247 | asm __volatile__("icbi %0, 0" : : "r" (addr)); | |
248 | asm __volatile__("icbi %0, 32" : : "r" (addr)); | |
249 | asm __volatile__("icbi %0, 64" : : "r" (addr)); | |
250 | asm __volatile__("icbi %0, 96" : : "r" (addr)); | |
251 | addr += 128; | |
252 | } | |
253 | if (running_asid != vma_asid) { | |
254 | switch_and_save_asid(running_asid); | |
255 | local_irq_restore(flags); | |
256 | } | |
257 | } | |
258 | ||
259 | /****************************************************************************/ | |
260 | ||
261 | static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | |
262 | unsigned long start, unsigned long end) | |
263 | { | |
264 | /* Used for invalidating big chunks of I-cache, i.e. assume the range | |
265 | is whole pages. If 'start' or 'end' is not page aligned, the code | |
266 | is conservative and invalidates to the ends of the enclosing pages. | |
267 | This is functionally OK, just a performance loss. */ | |
268 | ||
269 | /* See the comments below in sh64_dcache_purge_user_range() regarding | |
270 | the choice of algorithm. However, for the I-cache option (2) isn't | |
271 | available because there are no physical tags so aliases can't be | |
272 | resolved. The icbi instruction has to be used through the user | |
273 | mapping. Because icbi is cheaper than ocbp on a cache hit, it | |
274 | would be cheaper to use the selective code for a large range than is | |
275 | possible with the D-cache. Just assume 64 for now as a working | |
276 | figure. | |
277 | */ | |
278 | ||
279 | int n_pages; | |
280 | ||
281 | if (!mm) return; | |
282 | ||
283 | n_pages = ((end - start) >> PAGE_SHIFT); | |
284 | if (n_pages >= 64) { | |
285 | sh64_icache_inv_all(); | |
286 | } else { | |
287 | unsigned long aligned_start; | |
288 | unsigned long eaddr; | |
289 | unsigned long after_last_page_start; | |
290 | unsigned long mm_asid, current_asid; | |
291 | unsigned long long flags = 0ULL; | |
292 | ||
293 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | |
294 | current_asid = get_asid(); | |
295 | ||
296 | if (mm_asid != current_asid) { | |
297 | /* Switch ASID and run the invalidate loop under cli */ | |
298 | local_irq_save(flags); | |
299 | switch_and_save_asid(mm_asid); | |
300 | } | |
301 | ||
302 | aligned_start = start & PAGE_MASK; | |
303 | after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK); | |
304 | ||
305 | while (aligned_start < after_last_page_start) { | |
306 | struct vm_area_struct *vma; | |
307 | unsigned long vma_end; | |
308 | vma = find_vma(mm, aligned_start); | |
309 | if (!vma || (aligned_start <= vma->vm_end)) { | |
310 | /* Avoid getting stuck in an error condition */ | |
311 | aligned_start += PAGE_SIZE; | |
312 | continue; | |
313 | } | |
314 | vma_end = vma->vm_end; | |
315 | if (vma->vm_flags & VM_EXEC) { | |
316 | /* Executable */ | |
317 | eaddr = aligned_start; | |
318 | while (eaddr < vma_end) { | |
319 | sh64_icache_inv_user_page(vma, eaddr); | |
320 | eaddr += PAGE_SIZE; | |
321 | } | |
322 | } | |
323 | aligned_start = vma->vm_end; /* Skip to start of next region */ | |
324 | } | |
325 | if (mm_asid != current_asid) { | |
326 | switch_and_save_asid(current_asid); | |
327 | local_irq_restore(flags); | |
328 | } | |
329 | } | |
330 | } | |
331 | ||
332 | static void sh64_icache_inv_user_small_range(struct mm_struct *mm, | |
333 | unsigned long start, int len) | |
334 | { | |
335 | ||
336 | /* Invalidate a small range of user context I-cache, not necessarily | |
337 | page (or even cache-line) aligned. */ | |
338 | ||
339 | unsigned long long eaddr = start; | |
340 | unsigned long long eaddr_end = start + len; | |
341 | unsigned long current_asid, mm_asid; | |
342 | unsigned long long flags; | |
343 | unsigned long long epage_start; | |
344 | ||
345 | /* Since this is used inside ptrace, the ASID in the mm context | |
346 | typically won't match current_asid. We'll have to switch ASID to do | |
347 | this. For safety, and given that the range will be small, do all | |
348 | this under cli. | |
349 | ||
350 | Note, there is a hazard that the ASID in mm->context is no longer | |
351 | actually associated with mm, i.e. if the mm->context has started a | |
352 | new cycle since mm was last active. However, this is just a | |
353 | performance issue: all that happens is that we invalidate lines | |
354 | belonging to another mm, so the owning process has to refill them | |
355 | when that mm goes live again. mm itself can't have any cache | |
356 | entries because there will have been a flush_cache_all when the new | |
357 | mm->context cycle started. */ | |
358 | ||
359 | /* Align to start of cache line. Otherwise, suppose len==8 and start | |
360 | was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ | |
361 | eaddr = start & L1_CACHE_ALIGN_MASK; | |
362 | eaddr_end = start + len; | |
363 | ||
364 | local_irq_save(flags); | |
365 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | |
366 | current_asid = switch_and_save_asid(mm_asid); | |
367 | ||
368 | epage_start = eaddr & PAGE_MASK; | |
369 | ||
370 | while (eaddr < eaddr_end) | |
371 | { | |
372 | asm __volatile__("icbi %0, 0" : : "r" (eaddr)); | |
373 | eaddr += L1_CACHE_BYTES; | |
374 | } | |
375 | switch_and_save_asid(current_asid); | |
376 | local_irq_restore(flags); | |
377 | } | |
378 | ||
379 | static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end) | |
380 | { | |
381 | /* The icbi instruction never raises ITLBMISS. i.e. if there's not a | |
382 | cache hit on the virtual tag the instruction ends there, without a | |
383 | TLB lookup. */ | |
384 | ||
385 | unsigned long long aligned_start; | |
386 | unsigned long long ull_end; | |
387 | unsigned long long addr; | |
388 | ||
389 | ull_end = end; | |
390 | ||
391 | /* Just invalidate over the range using the natural addresses. TLB | |
392 | miss handling will be OK (TBC). Since it's for the current process, | |
393 | either we're already in the right ASID context, or the ASIDs have | |
394 | been recycled since we were last active in which case we might just | |
395 | invalidate another processes I-cache entries : no worries, just a | |
396 | performance drop for him. */ | |
397 | aligned_start = start & L1_CACHE_ALIGN_MASK; | |
398 | addr = aligned_start; | |
399 | while (addr < ull_end) { | |
400 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | |
401 | asm __volatile__ ("nop"); | |
402 | asm __volatile__ ("nop"); | |
403 | addr += L1_CACHE_BYTES; | |
404 | } | |
405 | } | |
406 | ||
407 | #endif /* !CONFIG_ICACHE_DISABLED */ | |
408 | ||
409 | /****************************************************************************/ | |
410 | ||
411 | #ifndef CONFIG_DCACHE_DISABLED | |
412 | ||
413 | /* Buffer used as the target of alloco instructions to purge data from cache | |
414 | sets by natural eviction. -- RPC */ | |
415 | #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) | |
416 | static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; | |
417 | ||
418 | /****************************************************************************/ | |
419 | ||
420 | static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) | |
421 | { | |
422 | /* Purge all ways in a particular block of sets, specified by the base | |
423 | set number and number of sets. Can handle wrap-around, if that's | |
424 | needed. */ | |
425 | ||
426 | int dummy_buffer_base_set; | |
427 | unsigned long long eaddr, eaddr0, eaddr1; | |
428 | int j; | |
429 | int set_offset; | |
430 | ||
431 | dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; | |
432 | set_offset = sets_to_purge_base - dummy_buffer_base_set; | |
433 | ||
434 | for (j=0; j<n_sets; j++, set_offset++) { | |
435 | set_offset &= (cpu_data->dcache.sets - 1); | |
436 | eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); | |
437 | ||
438 | /* Do one alloco which hits the required set per cache way. For | |
439 | write-back mode, this will purge the #ways resident lines. There's | |
440 | little point unrolling this loop because the allocos stall more if | |
441 | they're too close together. */ | |
442 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | |
443 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | |
444 | asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); | |
445 | asm __volatile__ ("synco"); /* TAKum03020 */ | |
446 | } | |
447 | ||
448 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | |
449 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | |
450 | /* Load from each address. Required because alloco is a NOP if | |
451 | the cache is write-through. Write-through is a config option. */ | |
452 | if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) | |
453 | *(volatile unsigned char *)(int)eaddr; | |
454 | } | |
455 | } | |
456 | ||
457 | /* Don't use OCBI to invalidate the lines. That costs cycles directly. | |
458 | If the dummy block is just left resident, it will naturally get | |
459 | evicted as required. */ | |
460 | ||
461 | return; | |
462 | } | |
463 | ||
464 | /****************************************************************************/ | |
465 | ||
466 | static void sh64_dcache_purge_all(void) | |
467 | { | |
468 | /* Purge the entire contents of the dcache. The most efficient way to | |
469 | achieve this is to use alloco instructions on a region of unused | |
470 | memory equal in size to the cache, thereby causing the current | |
471 | contents to be discarded by natural eviction. The alternative, | |
472 | namely reading every tag, setting up a mapping for the corresponding | |
473 | page and doing an OCBP for the line, would be much more expensive. | |
474 | */ | |
475 | ||
476 | sh64_dcache_purge_sets(0, cpu_data->dcache.sets); | |
477 | ||
478 | return; | |
479 | ||
480 | } | |
481 | ||
482 | /****************************************************************************/ | |
483 | ||
484 | static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) | |
485 | { | |
486 | /* Purge the range of addresses [start,end] from the D-cache. The | |
487 | addresses lie in the superpage mapping. There's no harm if we | |
488 | overpurge at either end - just a small performance loss. */ | |
489 | unsigned long long ullend, addr, aligned_start; | |
490 | #if (NEFF == 32) | |
491 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | |
492 | #else | |
493 | #error "NEFF != 32" | |
494 | #endif | |
495 | aligned_start &= L1_CACHE_ALIGN_MASK; | |
496 | addr = aligned_start; | |
497 | #if (NEFF == 32) | |
498 | ullend = (unsigned long long) (signed long long) (signed long) end; | |
499 | #else | |
500 | #error "NEFF != 32" | |
501 | #endif | |
502 | while (addr <= ullend) { | |
503 | asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); | |
504 | addr += L1_CACHE_BYTES; | |
505 | } | |
506 | return; | |
507 | } | |
508 | ||
509 | /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for | |
510 | anything else in the kernel */ | |
511 | #define MAGIC_PAGE0_START 0xffffffffec000000ULL | |
512 | ||
513 | static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) | |
514 | { | |
515 | /* Purge the physical page 'paddr' from the cache. It's known that any | |
516 | cache lines requiring attention have the same page colour as the the | |
517 | address 'eaddr'. | |
518 | ||
519 | This relies on the fact that the D-cache matches on physical tags | |
520 | when no virtual tag matches. So we create an alias for the original | |
521 | page and purge through that. (Alternatively, we could have done | |
522 | this by switching ASID to match the original mapping and purged | |
523 | through that, but that involves ASID switching cost + probably a | |
524 | TLBMISS + refill anyway.) | |
525 | */ | |
526 | ||
527 | unsigned long long magic_page_start; | |
528 | unsigned long long magic_eaddr, magic_eaddr_end; | |
529 | ||
530 | magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK); | |
531 | ||
532 | /* As long as the kernel is not pre-emptible, this doesn't need to be | |
533 | under cli/sti. */ | |
534 | ||
535 | sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); | |
536 | ||
537 | magic_eaddr = magic_page_start; | |
538 | magic_eaddr_end = magic_eaddr + PAGE_SIZE; | |
539 | while (magic_eaddr < magic_eaddr_end) { | |
540 | /* Little point in unrolling this loop - the OCBPs are blocking | |
541 | and won't go any quicker (i.e. the loop overhead is parallel | |
542 | to part of the OCBP execution.) */ | |
543 | asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); | |
544 | magic_eaddr += L1_CACHE_BYTES; | |
545 | } | |
546 | ||
547 | sh64_teardown_dtlb_cache_slot(); | |
548 | } | |
549 | ||
550 | /****************************************************************************/ | |
551 | ||
552 | static void sh64_dcache_purge_phy_page(unsigned long paddr) | |
553 | { | |
554 | /* Pure a page given its physical start address, by creating a | |
555 | temporary 1 page mapping and purging across that. Even if we know | |
556 | the virtual address (& vma or mm) of the page, the method here is | |
557 | more elegant because it avoids issues of coping with page faults on | |
558 | the purge instructions (i.e. no special-case code required in the | |
559 | critical path in the TLB miss handling). */ | |
560 | ||
561 | unsigned long long eaddr_start, eaddr, eaddr_end; | |
562 | int i; | |
563 | ||
564 | /* As long as the kernel is not pre-emptible, this doesn't need to be | |
565 | under cli/sti. */ | |
566 | ||
567 | eaddr_start = MAGIC_PAGE0_START; | |
568 | for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { | |
569 | sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); | |
570 | ||
571 | eaddr = eaddr_start; | |
572 | eaddr_end = eaddr + PAGE_SIZE; | |
573 | while (eaddr < eaddr_end) { | |
574 | asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); | |
575 | eaddr += L1_CACHE_BYTES; | |
576 | } | |
577 | ||
578 | sh64_teardown_dtlb_cache_slot(); | |
579 | eaddr_start += PAGE_SIZE; | |
580 | } | |
581 | } | |
582 | ||
60ec5585 HD |
583 | static void sh64_dcache_purge_user_pages(struct mm_struct *mm, |
584 | unsigned long addr, unsigned long end) | |
1da177e4 LT |
585 | { |
586 | pgd_t *pgd; | |
587 | pmd_t *pmd; | |
588 | pte_t *pte; | |
589 | pte_t entry; | |
60ec5585 | 590 | spinlock_t *ptl; |
1da177e4 LT |
591 | unsigned long paddr; |
592 | ||
60ec5585 HD |
593 | if (!mm) |
594 | return; /* No way to find physical address of page */ | |
595 | ||
596 | pgd = pgd_offset(mm, addr); | |
597 | if (pgd_bad(*pgd)) | |
598 | return; | |
599 | ||
600 | pmd = pmd_offset(pgd, addr); | |
601 | if (pmd_none(*pmd) || pmd_bad(*pmd)) | |
602 | return; | |
603 | ||
604 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | |
605 | do { | |
606 | entry = *pte; | |
607 | if (pte_none(entry) || !pte_present(entry)) | |
608 | continue; | |
609 | paddr = pte_val(entry) & PAGE_MASK; | |
610 | sh64_dcache_purge_coloured_phy_page(paddr, addr); | |
611 | } while (pte++, addr += PAGE_SIZE, addr != end); | |
612 | pte_unmap_unlock(pte - 1, ptl); | |
1da177e4 LT |
613 | } |
614 | /****************************************************************************/ | |
615 | ||
616 | static void sh64_dcache_purge_user_range(struct mm_struct *mm, | |
617 | unsigned long start, unsigned long end) | |
618 | { | |
619 | /* There are at least 5 choices for the implementation of this, with | |
620 | pros (+), cons(-), comments(*): | |
621 | ||
622 | 1. ocbp each line in the range through the original user's ASID | |
623 | + no lines spuriously evicted | |
624 | - tlbmiss handling (must either handle faults on demand => extra | |
625 | special-case code in tlbmiss critical path), or map the page in | |
626 | advance (=> flush_tlb_range in advance to avoid multiple hits) | |
627 | - ASID switching | |
628 | - expensive for large ranges | |
629 | ||
630 | 2. temporarily map each page in the range to a special effective | |
631 | address and ocbp through the temporary mapping; relies on the | |
632 | fact that SH-5 OCB* always do TLB lookup and match on ptags (they | |
633 | never look at the etags) | |
634 | + no spurious evictions | |
635 | - expensive for large ranges | |
636 | * surely cheaper than (1) | |
637 | ||
638 | 3. walk all the lines in the cache, check the tags, if a match | |
639 | occurs create a page mapping to ocbp the line through | |
640 | + no spurious evictions | |
641 | - tag inspection overhead | |
642 | - (especially for small ranges) | |
643 | - potential cost of setting up/tearing down page mapping for | |
644 | every line that matches the range | |
645 | * cost partly independent of range size | |
646 | ||
647 | 4. walk all the lines in the cache, check the tags, if a match | |
648 | occurs use 4 * alloco to purge the line (+3 other probably | |
649 | innocent victims) by natural eviction | |
650 | + no tlb mapping overheads | |
651 | - spurious evictions | |
652 | - tag inspection overhead | |
653 | ||
654 | 5. implement like flush_cache_all | |
655 | + no tag inspection overhead | |
656 | - spurious evictions | |
657 | - bad for small ranges | |
658 | ||
659 | (1) can be ruled out as more expensive than (2). (2) appears best | |
660 | for small ranges. The choice between (3), (4) and (5) for large | |
661 | ranges and the range size for the large/small boundary need | |
662 | benchmarking to determine. | |
663 | ||
664 | For now use approach (2) for small ranges and (5) for large ones. | |
665 | ||
666 | */ | |
667 | ||
668 | int n_pages; | |
669 | ||
670 | n_pages = ((end - start) >> PAGE_SHIFT); | |
60ec5585 | 671 | if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { |
1da177e4 LT |
672 | #if 1 |
673 | sh64_dcache_purge_all(); | |
674 | #else | |
675 | unsigned long long set, way; | |
676 | unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | |
677 | for (set = 0; set < cpu_data->dcache.sets; set++) { | |
678 | unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); | |
679 | for (way = 0; way < cpu_data->dcache.ways; way++) { | |
680 | unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); | |
681 | unsigned long long tag0; | |
682 | unsigned long line_valid; | |
683 | ||
684 | asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); | |
685 | line_valid = tag0 & SH_CACHE_VALID; | |
686 | if (line_valid) { | |
687 | unsigned long cache_asid; | |
688 | unsigned long epn; | |
689 | ||
690 | cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; | |
691 | /* The next line needs some | |
692 | explanation. The virtual tags | |
693 | encode bits [31:13] of the virtual | |
694 | address, bit [12] of the 'tag' being | |
695 | implied by the cache set index. */ | |
696 | epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); | |
697 | ||
698 | if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { | |
699 | /* TODO : could optimise this | |
700 | call by batching multiple | |
701 | adjacent sets together. */ | |
702 | sh64_dcache_purge_sets(set, 1); | |
703 | break; /* Don't waste time inspecting other ways for this set */ | |
704 | } | |
705 | } | |
706 | } | |
707 | } | |
708 | #endif | |
709 | } else { | |
60ec5585 HD |
710 | /* Small range, covered by a single page table page */ |
711 | start &= PAGE_MASK; /* should already be so */ | |
712 | end = PAGE_ALIGN(end); /* should already be so */ | |
713 | sh64_dcache_purge_user_pages(mm, start, end); | |
1da177e4 LT |
714 | } |
715 | return; | |
716 | } | |
717 | ||
718 | static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) | |
719 | { | |
720 | unsigned long long aligned_start; | |
721 | unsigned long long ull_end; | |
722 | unsigned long long addr; | |
723 | ||
724 | ull_end = end; | |
725 | ||
726 | /* Just wback over the range using the natural addresses. TLB miss | |
727 | handling will be OK (TBC) : the range has just been written to by | |
728 | the signal frame setup code, so the PTEs must exist. | |
729 | ||
730 | Note, if we have CONFIG_PREEMPT and get preempted inside this loop, | |
731 | it doesn't matter, even if the pid->ASID mapping changes whilst | |
732 | we're away. In that case the cache will have been flushed when the | |
733 | mapping was renewed. So the writebacks below will be nugatory (and | |
734 | we'll doubtless have to fault the TLB entry/ies in again with the | |
735 | new ASID), but it's a rare case. | |
736 | */ | |
737 | aligned_start = start & L1_CACHE_ALIGN_MASK; | |
738 | addr = aligned_start; | |
739 | while (addr < ull_end) { | |
740 | asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); | |
741 | addr += L1_CACHE_BYTES; | |
742 | } | |
743 | } | |
744 | ||
745 | /****************************************************************************/ | |
746 | ||
747 | /* These *MUST* lie in an area of virtual address space that's otherwise unused. */ | |
748 | #define UNIQUE_EADDR_START 0xe0000000UL | |
749 | #define UNIQUE_EADDR_END 0xe8000000UL | |
750 | ||
751 | static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) | |
752 | { | |
753 | /* Given a physical address paddr, and a user virtual address | |
754 | user_eaddr which will eventually be mapped to it, create a one-off | |
755 | kernel-private eaddr mapped to the same paddr. This is used for | |
756 | creating special destination pages for copy_user_page and | |
757 | clear_user_page */ | |
758 | ||
759 | static unsigned long current_pointer = UNIQUE_EADDR_START; | |
760 | unsigned long coloured_pointer; | |
761 | ||
762 | if (current_pointer == UNIQUE_EADDR_END) { | |
763 | sh64_dcache_purge_all(); | |
764 | current_pointer = UNIQUE_EADDR_START; | |
765 | } | |
766 | ||
767 | coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); | |
768 | sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); | |
769 | ||
770 | current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); | |
771 | ||
772 | return coloured_pointer; | |
773 | } | |
774 | ||
775 | /****************************************************************************/ | |
776 | ||
777 | static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) | |
778 | { | |
779 | void *coloured_to; | |
780 | ||
781 | /* Discard any existing cache entries of the wrong colour. These are | |
782 | present quite often, if the kernel has recently used the page | |
783 | internally, then given it up, then it's been allocated to the user. | |
784 | */ | |
785 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | |
786 | ||
787 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | |
788 | sh64_page_copy(from, coloured_to); | |
789 | ||
790 | sh64_teardown_dtlb_cache_slot(); | |
791 | } | |
792 | ||
793 | static void sh64_clear_user_page_coloured(void *to, unsigned long address) | |
794 | { | |
795 | void *coloured_to; | |
796 | ||
797 | /* Discard any existing kernel-originated lines of the wrong colour (as | |
798 | above) */ | |
799 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | |
800 | ||
801 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | |
802 | sh64_page_clear(coloured_to); | |
803 | ||
804 | sh64_teardown_dtlb_cache_slot(); | |
805 | } | |
806 | ||
807 | #endif /* !CONFIG_DCACHE_DISABLED */ | |
808 | ||
809 | /****************************************************************************/ | |
810 | ||
811 | /*########################################################################## | |
812 | EXTERNALLY CALLABLE API. | |
813 | ##########################################################################*/ | |
814 | ||
815 | /* These functions are described in Documentation/cachetlb.txt. | |
816 | Each one of these functions varies in behaviour depending on whether the | |
817 | I-cache and/or D-cache are configured out. | |
818 | ||
819 | Note that the Linux term 'flush' corresponds to what is termed 'purge' in | |
820 | the sh/sh64 jargon for the D-cache, i.e. write back dirty data then | |
821 | invalidate the cache lines, and 'invalidate' for the I-cache. | |
822 | */ | |
823 | ||
824 | #undef FLUSH_TRACE | |
825 | ||
826 | void flush_cache_all(void) | |
827 | { | |
828 | /* Invalidate the entire contents of both caches, after writing back to | |
829 | memory any dirty data from the D-cache. */ | |
830 | sh64_dcache_purge_all(); | |
831 | sh64_icache_inv_all(); | |
832 | } | |
833 | ||
834 | /****************************************************************************/ | |
835 | ||
836 | void flush_cache_mm(struct mm_struct *mm) | |
837 | { | |
838 | /* Invalidate an entire user-address space from both caches, after | |
839 | writing back dirty data (e.g. for shared mmap etc). */ | |
840 | ||
841 | /* This could be coded selectively by inspecting all the tags then | |
842 | doing 4*alloco on any set containing a match (as for | |
843 | flush_cache_range), but fork/exit/execve (where this is called from) | |
844 | are expensive anyway. */ | |
845 | ||
846 | /* Have to do a purge here, despite the comments re I-cache below. | |
847 | There could be odd-coloured dirty data associated with the mm still | |
848 | in the cache - if this gets written out through natural eviction | |
849 | after the kernel has reused the page there will be chaos. | |
850 | */ | |
851 | ||
852 | sh64_dcache_purge_all(); | |
853 | ||
854 | /* The mm being torn down won't ever be active again, so any Icache | |
855 | lines tagged with its ASID won't be visible for the rest of the | |
856 | lifetime of this ASID cycle. Before the ASID gets reused, there | |
857 | will be a flush_cache_all. Hence we don't need to touch the | |
858 | I-cache. This is similar to the lack of action needed in | |
859 | flush_tlb_mm - see fault.c. */ | |
860 | } | |
861 | ||
862 | /****************************************************************************/ | |
863 | ||
864 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | |
865 | unsigned long end) | |
866 | { | |
867 | struct mm_struct *mm = vma->vm_mm; | |
868 | ||
869 | /* Invalidate (from both caches) the range [start,end) of virtual | |
870 | addresses from the user address space specified by mm, after writing | |
871 | back any dirty data. | |
872 | ||
60ec5585 | 873 | Note, 'end' is 1 byte beyond the end of the range to flush. */ |
1da177e4 LT |
874 | |
875 | sh64_dcache_purge_user_range(mm, start, end); | |
876 | sh64_icache_inv_user_page_range(mm, start, end); | |
877 | } | |
878 | ||
879 | /****************************************************************************/ | |
880 | ||
881 | void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) | |
882 | { | |
883 | /* Invalidate any entries in either cache for the vma within the user | |
884 | address space vma->vm_mm for the page starting at virtual address | |
885 | 'eaddr'. This seems to be used primarily in breaking COW. Note, | |
886 | the I-cache must be searched too in case the page in question is | |
887 | both writable and being executed from (e.g. stack trampolines.) | |
888 | ||
60ec5585 | 889 | Note, this is called with pte lock held. |
1da177e4 LT |
890 | */ |
891 | ||
892 | sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); | |
893 | ||
894 | if (vma->vm_flags & VM_EXEC) { | |
895 | sh64_icache_inv_user_page(vma, eaddr); | |
896 | } | |
897 | } | |
898 | ||
899 | /****************************************************************************/ | |
900 | ||
901 | #ifndef CONFIG_DCACHE_DISABLED | |
902 | ||
903 | void copy_user_page(void *to, void *from, unsigned long address, struct page *page) | |
904 | { | |
905 | /* 'from' and 'to' are kernel virtual addresses (within the superpage | |
906 | mapping of the physical RAM). 'address' is the user virtual address | |
907 | where the copy 'to' will be mapped after. This allows a custom | |
908 | mapping to be used to ensure that the new copy is placed in the | |
909 | right cache sets for the user to see it without having to bounce it | |
910 | out via memory. Note however : the call to flush_page_to_ram in | |
911 | (generic)/mm/memory.c:(break_cow) undoes all this good work in that one | |
912 | very important case! | |
913 | ||
914 | TBD : can we guarantee that on every call, any cache entries for | |
915 | 'from' are in the same colour sets as 'address' also? i.e. is this | |
916 | always used just to deal with COW? (I suspect not). */ | |
917 | ||
918 | /* There are two possibilities here for when the page 'from' was last accessed: | |
919 | * by the kernel : this is OK, no purge required. | |
920 | * by the/a user (e.g. for break_COW) : need to purge. | |
921 | ||
922 | If the potential user mapping at 'address' is the same colour as | |
923 | 'from' there is no need to purge any cache lines from the 'from' | |
924 | page mapped into cache sets of colour 'address'. (The copy will be | |
925 | accessing the page through 'from'). | |
926 | */ | |
927 | ||
928 | if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { | |
929 | sh64_dcache_purge_coloured_phy_page(__pa(from), address); | |
930 | } | |
931 | ||
932 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | |
933 | /* No synonym problem on destination */ | |
934 | sh64_page_copy(from, to); | |
935 | } else { | |
936 | sh64_copy_user_page_coloured(to, from, address); | |
937 | } | |
938 | ||
939 | /* Note, don't need to flush 'from' page from the cache again - it's | |
940 | done anyway by the generic code */ | |
941 | } | |
942 | ||
943 | void clear_user_page(void *to, unsigned long address, struct page *page) | |
944 | { | |
945 | /* 'to' is a kernel virtual address (within the superpage | |
946 | mapping of the physical RAM). 'address' is the user virtual address | |
947 | where the 'to' page will be mapped after. This allows a custom | |
948 | mapping to be used to ensure that the new copy is placed in the | |
949 | right cache sets for the user to see it without having to bounce it | |
950 | out via memory. | |
951 | */ | |
952 | ||
953 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | |
954 | /* No synonym problem on destination */ | |
955 | sh64_page_clear(to); | |
956 | } else { | |
957 | sh64_clear_user_page_coloured(to, address); | |
958 | } | |
959 | } | |
960 | ||
961 | #endif /* !CONFIG_DCACHE_DISABLED */ | |
962 | ||
963 | /****************************************************************************/ | |
964 | ||
965 | void flush_dcache_page(struct page *page) | |
966 | { | |
967 | sh64_dcache_purge_phy_page(page_to_phys(page)); | |
968 | wmb(); | |
969 | } | |
970 | ||
971 | /****************************************************************************/ | |
972 | ||
973 | void flush_icache_range(unsigned long start, unsigned long end) | |
974 | { | |
975 | /* Flush the range [start,end] of kernel virtual adddress space from | |
976 | the I-cache. The corresponding range must be purged from the | |
977 | D-cache also because the SH-5 doesn't have cache snooping between | |
978 | the caches. The addresses will be visible through the superpage | |
979 | mapping, therefore it's guaranteed that there no cache entries for | |
980 | the range in cache sets of the wrong colour. | |
981 | ||
982 | Primarily used for cohering the I-cache after a module has | |
983 | been loaded. */ | |
984 | ||
985 | /* We also make sure to purge the same range from the D-cache since | |
986 | flush_page_to_ram() won't be doing this for us! */ | |
987 | ||
988 | sh64_dcache_purge_kernel_range(start, end); | |
989 | wmb(); | |
990 | sh64_icache_inv_kernel_range(start, end); | |
991 | } | |
992 | ||
993 | /****************************************************************************/ | |
994 | ||
995 | void flush_icache_user_range(struct vm_area_struct *vma, | |
996 | struct page *page, unsigned long addr, int len) | |
997 | { | |
998 | /* Flush the range of user (defined by vma->vm_mm) address space | |
999 | starting at 'addr' for 'len' bytes from the cache. The range does | |
1000 | not straddle a page boundary, the unique physical page containing | |
1001 | the range is 'page'. This seems to be used mainly for invalidating | |
1002 | an address range following a poke into the program text through the | |
1003 | ptrace() call from another process (e.g. for BRK instruction | |
1004 | insertion). */ | |
1005 | ||
1006 | sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); | |
1007 | mb(); | |
1008 | ||
1009 | if (vma->vm_flags & VM_EXEC) { | |
1010 | sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); | |
1011 | } | |
1012 | } | |
1013 | ||
1014 | /*########################################################################## | |
1015 | ARCH/SH64 PRIVATE CALLABLE API. | |
1016 | ##########################################################################*/ | |
1017 | ||
1018 | void flush_cache_sigtramp(unsigned long start, unsigned long end) | |
1019 | { | |
1020 | /* For the address range [start,end), write back the data from the | |
1021 | D-cache and invalidate the corresponding region of the I-cache for | |
1022 | the current process. Used to flush signal trampolines on the stack | |
1023 | to make them executable. */ | |
1024 | ||
1025 | sh64_dcache_wback_current_user_range(start, end); | |
1026 | wmb(); | |
1027 | sh64_icache_inv_current_user_range(start, end); | |
1028 | } | |
1029 |