microblaze: Improve TLB calculation for small systems
[deliverable/linux.git] / arch / microblaze / kernel / head.S
1 /*
2 * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
3 * Copyright (C) 2007-2009 PetaLogix
4 * Copyright (C) 2006 Atmark Techno, Inc.
5 *
6 * MMU code derived from arch/ppc/kernel/head_4xx.S:
7 * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
8 * Initial PowerPC version.
9 * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
10 * Rewritten for PReP
11 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
12 * Low-level exception handers, MMU support, and rewrite.
13 * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
14 * PowerPC 8xx modifications.
15 * Copyright (c) 1998-1999 TiVo, Inc.
16 * PowerPC 403GCX modifications.
17 * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
18 * PowerPC 403GCX/405GP modifications.
19 * Copyright 2000 MontaVista Software Inc.
20 * PPC405 modifications
21 * PowerPC 403GCX/405GP modifications.
22 * Author: MontaVista Software, Inc.
23 * frank_rowand@mvista.com or source@mvista.com
24 * debbie_chu@mvista.com
25 *
26 * This file is subject to the terms and conditions of the GNU General Public
27 * License. See the file "COPYING" in the main directory of this archive
28 * for more details.
29 */
30
31 #include <linux/init.h>
32 #include <linux/linkage.h>
33 #include <asm/thread_info.h>
34 #include <asm/page.h>
35 #include <linux/of_fdt.h> /* for OF_DT_HEADER */
36
37 #ifdef CONFIG_MMU
38 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
39 #include <asm/mmu.h>
40 #include <asm/processor.h>
41
42 .section .data
43 .global empty_zero_page
44 .align 12
45 empty_zero_page:
46 .space PAGE_SIZE
47 .global swapper_pg_dir
48 swapper_pg_dir:
49 .space PAGE_SIZE
50
51 #endif /* CONFIG_MMU */
52
53 .section .rodata
54 .align 4
55 endian_check:
56 .word 1
57
58 __HEAD
59 ENTRY(_start)
60 #if CONFIG_KERNEL_BASE_ADDR == 0
61 brai TOPHYS(real_start)
62 .org 0x100
63 real_start:
64 #endif
65
66 mts rmsr, r0
67 /*
68 * According to Xilinx, msrclr instruction behaves like 'mfs rX,rpc'
69 * if the msrclr instruction is not enabled. We use this to detect
70 * if the opcode is available, by issuing msrclr and then testing the result.
71 * r8 == 0 - msr instructions are implemented
72 * r8 != 0 - msr instructions are not implemented
73 */
74 mfs r1, rmsr
75 msrclr r8, 0 /* clear nothing - just read msr for test */
76 cmpu r8, r8, r1 /* r1 must contain msr reg content */
77
78 /* r7 may point to an FDT, or there may be one linked in.
79 if it's in r7, we've got to save it away ASAP.
80 We ensure r7 points to a valid FDT, just in case the bootloader
81 is broken or non-existent */
82 beqi r7, no_fdt_arg /* NULL pointer? don't copy */
83 /* Does r7 point to a valid FDT? Load HEADER magic number */
84 /* Run time Big/Little endian platform */
85 /* Save 1 as word and load byte - 0 - BIG, 1 - LITTLE */
86 lbui r11, r0, TOPHYS(endian_check)
87 beqid r11, big_endian /* DO NOT break delay stop dependency */
88 lw r11, r0, r7 /* Big endian load in delay slot */
89 lwr r11, r0, r7 /* Little endian load */
90 big_endian:
91 rsubi r11, r11, OF_DT_HEADER /* Check FDT header */
92 beqi r11, _prepare_copy_fdt
93 or r7, r0, r0 /* clear R7 when not valid DTB */
94 bnei r11, no_fdt_arg /* No - get out of here */
95 _prepare_copy_fdt:
96 or r11, r0, r0 /* incremment */
97 ori r4, r0, TOPHYS(_fdt_start)
98 ori r3, r0, (0x8000 - 4)
99 _copy_fdt:
100 lw r12, r7, r11 /* r12 = r7 + r11 */
101 sw r12, r4, r11 /* addr[r4 + r11] = r12 */
102 addik r11, r11, 4 /* increment counting */
103 bgtid r3, _copy_fdt /* loop for all entries */
104 addik r3, r3, -4 /* descrement loop */
105 no_fdt_arg:
106
107 #ifdef CONFIG_MMU
108
109 #ifndef CONFIG_CMDLINE_BOOL
110 /*
111 * handling command line
112 * copy command line to __init_end. There is space for storing command line.
113 */
114 or r6, r0, r0 /* incremment */
115 ori r4, r0, __init_end /* load address of command line */
116 tophys(r4,r4) /* convert to phys address */
117 ori r3, r0, COMMAND_LINE_SIZE - 1 /* number of loops */
118 _copy_command_line:
119 lbu r2, r5, r6 /* r2=r5+r6 - r5 contain pointer to command line */
120 sb r2, r4, r6 /* addr[r4+r6]= r2*/
121 addik r6, r6, 1 /* increment counting */
122 bgtid r3, _copy_command_line /* loop for all entries */
123 addik r3, r3, -1 /* descrement loop */
124 addik r5, r4, 0 /* add new space for command line */
125 tovirt(r5,r5)
126 #endif /* CONFIG_CMDLINE_BOOL */
127
128 #ifdef NOT_COMPILE
129 /* save bram context */
130 or r6, r0, r0 /* incremment */
131 ori r4, r0, TOPHYS(_bram_load_start) /* save bram context */
132 ori r3, r0, (LMB_SIZE - 4)
133 _copy_bram:
134 lw r7, r0, r6 /* r7 = r0 + r6 */
135 sw r7, r4, r6 /* addr[r4 + r6] = r7*/
136 addik r6, r6, 4 /* increment counting */
137 bgtid r3, _copy_bram /* loop for all entries */
138 addik r3, r3, -4 /* descrement loop */
139 #endif
140 /* We have to turn on the MMU right away. */
141
142 /*
143 * Set up the initial MMU state so we can do the first level of
144 * kernel initialization. This maps the first 16 MBytes of memory 1:1
145 * virtual to physical.
146 */
147 nop
148 addik r3, r0, MICROBLAZE_TLB_SIZE -1 /* Invalidate all TLB entries */
149 _invalidate:
150 mts rtlbx, r3
151 mts rtlbhi, r0 /* flush: ensure V is clear */
152 bgtid r3, _invalidate /* loop for all entries */
153 addik r3, r3, -1
154 /* sync */
155
156 /* Setup the kernel PID */
157 mts rpid,r0 /* Load the kernel PID */
158 nop
159 bri 4
160
161 /*
162 * We should still be executing code at physical address area
163 * RAM_BASEADDR at this point. However, kernel code is at
164 * a virtual address. So, set up a TLB mapping to cover this once
165 * translation is enabled.
166 */
167
168 addik r3,r0, CONFIG_KERNEL_START /* Load the kernel virtual address */
169 tophys(r4,r3) /* Load the kernel physical address */
170
171 /* start to do TLB calculation */
172 addik r12, r0, _end
173 rsub r12, r3, r12
174 addik r12, r12, CONFIG_KERNEL_PAD /* that's the pad */
175
176 or r9, r0, r0 /* TLB0 = 0 */
177 or r10, r0, r0 /* TLB1 = 0 */
178
179 addik r11, r12, -0x1000000
180 bgei r11, GT16 /* size is greater than 16MB */
181 addik r11, r12, -0x0800000
182 bgei r11, GT8 /* size is greater than 8MB */
183 addik r11, r12, -0x0400000
184 bgei r11, GT4 /* size is greater than 4MB */
185 /* size is less than 4MB */
186 addik r11, r12, -0x0200000
187 bgei r11, GT2 /* size is greater than 2MB */
188 addik r9, r0, 0x0100000 /* TLB0 must be 1MB */
189 addik r11, r12, -0x0100000
190 bgei r11, GT1 /* size is greater than 1MB */
191 /* TLB1 is 0 which is setup above */
192 bri tlb_end
193 GT4: /* r11 contains the rest - will be either 1 or 4 */
194 ori r9, r0, 0x400000 /* TLB0 is 4MB */
195 bri TLB1
196 GT16: /* TLB0 is 16MB */
197 addik r9, r0, 0x1000000 /* means TLB0 is 16MB */
198 TLB1:
199 /* must be used r2 because of substract if failed */
200 addik r2, r11, -0x0400000
201 bgei r2, GT20 /* size is greater than 16MB */
202 /* size is >16MB and <20MB */
203 addik r11, r11, -0x0100000
204 bgei r11, GT17 /* size is greater than 17MB */
205 /* kernel is >16MB and < 17MB */
206 GT1:
207 addik r10, r0, 0x0100000 /* means TLB1 is 1MB */
208 bri tlb_end
209 GT2: /* TLB0 is 0 and TLB1 will be 4MB */
210 GT17: /* TLB1 is 4MB - kernel size <20MB */
211 addik r10, r0, 0x0400000 /* means TLB1 is 4MB */
212 bri tlb_end
213 GT8: /* TLB0 is still zero that's why I can use only TLB1 */
214 GT20: /* TLB1 is 16MB - kernel size >20MB */
215 addik r10, r0, 0x1000000 /* means TLB1 is 16MB */
216 tlb_end:
217
218 /*
219 * Configure and load two entries into TLB slots 0 and 1.
220 * In case we are pinning TLBs, these are reserved in by the
221 * other TLB functions. If not reserving, then it doesn't
222 * matter where they are loaded.
223 */
224 andi r4,r4,0xfffffc00 /* Mask off the real page number */
225 ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */
226
227 /* TLB0 can be zeroes that's why we not setup it */
228 beqi r9, jump_over
229
230 /* look at the code below */
231 ori r30, r0, 0x200
232 andi r29, r9, 0x100000
233 bneid r29, 1f
234 addik r30, r30, 0x80
235 andi r29, r9, 0x400000
236 bneid r29, 1f
237 addik r30, r30, 0x80
238 andi r29, r9, 0x1000000
239 bneid r29, 1f
240 addik r30, r30, 0x80
241 1:
242 ori r11, r30, 0
243
244 andi r3,r3,0xfffffc00 /* Mask off the effective page number */
245 ori r3,r3,(TLB_VALID)
246 or r3, r3, r11
247
248 mts rtlbx,r0 /* TLB slow 0 */
249
250 mts rtlblo,r4 /* Load the data portion of the entry */
251 mts rtlbhi,r3 /* Load the tag portion of the entry */
252
253 jump_over:
254 /* TLB1 can be zeroes that's why we not setup it */
255 beqi r10, jump_over2
256
257 /* look at the code below */
258 ori r30, r0, 0x200
259 andi r29, r10, 0x100000
260 bneid r29, 1f
261 addik r30, r30, 0x80
262 andi r29, r10, 0x400000
263 bneid r29, 1f
264 addik r30, r30, 0x80
265 andi r29, r10, 0x1000000
266 bneid r29, 1f
267 addik r30, r30, 0x80
268 1:
269 ori r12, r30, 0
270
271 addk r4, r4, r9 /* previous addr + TLB0 size */
272 addk r3, r3, r9
273
274 andi r3,r3,0xfffffc00 /* Mask off the effective page number */
275 ori r3,r3,(TLB_VALID)
276 or r3, r3, r12
277
278 ori r6,r0,1 /* TLB slot 1 */
279 mts rtlbx,r6
280
281 mts rtlblo,r4 /* Load the data portion of the entry */
282 mts rtlbhi,r3 /* Load the tag portion of the entry */
283
284 jump_over2:
285 /*
286 * Load a TLB entry for LMB, since we need access to
287 * the exception vectors, using a 4k real==virtual mapping.
288 */
289 ori r6,r0,3 /* TLB slot 3 */
290 mts rtlbx,r6
291
292 ori r4,r0,(TLB_WR | TLB_EX)
293 ori r3,r0,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
294
295 mts rtlblo,r4 /* Load the data portion of the entry */
296 mts rtlbhi,r3 /* Load the tag portion of the entry */
297
298 /*
299 * We now have the lower 16 Meg of RAM mapped into TLB entries, and the
300 * caches ready to work.
301 */
302 turn_on_mmu:
303 ori r15,r0,start_here
304 ori r4,r0,MSR_KERNEL_VMS
305 mts rmsr,r4
306 nop
307 rted r15,0 /* enables MMU */
308 nop
309
310 start_here:
311 #endif /* CONFIG_MMU */
312
313 /* Initialize small data anchors */
314 addik r13, r0, _KERNEL_SDA_BASE_
315 addik r2, r0, _KERNEL_SDA2_BASE_
316
317 /* Initialize stack pointer */
318 addik r1, r0, init_thread_union + THREAD_SIZE - 4
319
320 /* Initialize r31 with current task address */
321 addik r31, r0, init_task
322
323 /*
324 * Call platform dependent initialize function.
325 * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
326 * the function.
327 */
328 addik r11, r0, machine_early_init
329 brald r15, r11
330 nop
331
332 #ifndef CONFIG_MMU
333 addik r15, r0, machine_halt
334 braid start_kernel
335 nop
336 #else
337 /*
338 * Initialize the MMU.
339 */
340 bralid r15, mmu_init
341 nop
342
343 /* Go back to running unmapped so we can load up new values
344 * and change to using our exception vectors.
345 * On the MicroBlaze, all we invalidate the used TLB entries to clear
346 * the old 16M byte TLB mappings.
347 */
348 ori r15,r0,TOPHYS(kernel_load_context)
349 ori r4,r0,MSR_KERNEL
350 mts rmsr,r4
351 nop
352 bri 4
353 rted r15,0
354 nop
355
356 /* Load up the kernel context */
357 kernel_load_context:
358 # Keep entry 0 and 1 valid. Entry 3 mapped to LMB can go away.
359 ori r5,r0,3
360 mts rtlbx,r5
361 nop
362 mts rtlbhi,r0
363 nop
364 addi r15, r0, machine_halt
365 ori r17, r0, start_kernel
366 ori r4, r0, MSR_KERNEL_VMS
367 mts rmsr, r4
368 nop
369 rted r17, 0 /* enable MMU and jump to start_kernel */
370 nop
371 #endif /* CONFIG_MMU */
This page took 0.039509 seconds and 5 git commands to generate.