ARM: Don't allow highmem on SMP platforms without h/w TLB ops broadcast
[deliverable/linux.git] / arch / arm / mm / mmu.c
1 /*
2 * linux/arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/bootmem.h>
15 #include <linux/mman.h>
16 #include <linux/nodemask.h>
17
18 #include <asm/cputype.h>
19 #include <asm/mach-types.h>
20 #include <asm/sections.h>
21 #include <asm/cachetype.h>
22 #include <asm/setup.h>
23 #include <asm/sizes.h>
24 #include <asm/smp_plat.h>
25 #include <asm/tlb.h>
26 #include <asm/highmem.h>
27
28 #include <asm/mach/arch.h>
29 #include <asm/mach/map.h>
30
31 #include "mm.h"
32
33 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
34
35 /*
36 * empty_zero_page is a special page that is used for
37 * zero-initialized data and COW.
38 */
39 struct page *empty_zero_page;
40 EXPORT_SYMBOL(empty_zero_page);
41
42 /*
43 * The pmd table for the upper-most set of pages.
44 */
45 pmd_t *top_pmd;
46
47 #define CPOLICY_UNCACHED 0
48 #define CPOLICY_BUFFERED 1
49 #define CPOLICY_WRITETHROUGH 2
50 #define CPOLICY_WRITEBACK 3
51 #define CPOLICY_WRITEALLOC 4
52
53 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
54 static unsigned int ecc_mask __initdata = 0;
55 pgprot_t pgprot_user;
56 pgprot_t pgprot_kernel;
57
58 EXPORT_SYMBOL(pgprot_user);
59 EXPORT_SYMBOL(pgprot_kernel);
60
61 struct cachepolicy {
62 const char policy[16];
63 unsigned int cr_mask;
64 unsigned int pmd;
65 unsigned int pte;
66 };
67
68 static struct cachepolicy cache_policies[] __initdata = {
69 {
70 .policy = "uncached",
71 .cr_mask = CR_W|CR_C,
72 .pmd = PMD_SECT_UNCACHED,
73 .pte = L_PTE_MT_UNCACHED,
74 }, {
75 .policy = "buffered",
76 .cr_mask = CR_C,
77 .pmd = PMD_SECT_BUFFERED,
78 .pte = L_PTE_MT_BUFFERABLE,
79 }, {
80 .policy = "writethrough",
81 .cr_mask = 0,
82 .pmd = PMD_SECT_WT,
83 .pte = L_PTE_MT_WRITETHROUGH,
84 }, {
85 .policy = "writeback",
86 .cr_mask = 0,
87 .pmd = PMD_SECT_WB,
88 .pte = L_PTE_MT_WRITEBACK,
89 }, {
90 .policy = "writealloc",
91 .cr_mask = 0,
92 .pmd = PMD_SECT_WBWA,
93 .pte = L_PTE_MT_WRITEALLOC,
94 }
95 };
96
97 /*
98 * These are useful for identifying cache coherency
99 * problems by allowing the cache or the cache and
100 * writebuffer to be turned off. (Note: the write
101 * buffer should not be on and the cache off).
102 */
103 static void __init early_cachepolicy(char **p)
104 {
105 int i;
106
107 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
108 int len = strlen(cache_policies[i].policy);
109
110 if (memcmp(*p, cache_policies[i].policy, len) == 0) {
111 cachepolicy = i;
112 cr_alignment &= ~cache_policies[i].cr_mask;
113 cr_no_alignment &= ~cache_policies[i].cr_mask;
114 *p += len;
115 break;
116 }
117 }
118 if (i == ARRAY_SIZE(cache_policies))
119 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
120 if (cpu_architecture() >= CPU_ARCH_ARMv6) {
121 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
122 cachepolicy = CPOLICY_WRITEBACK;
123 }
124 flush_cache_all();
125 set_cr(cr_alignment);
126 }
127 __early_param("cachepolicy=", early_cachepolicy);
128
129 static void __init early_nocache(char **__unused)
130 {
131 char *p = "buffered";
132 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
133 early_cachepolicy(&p);
134 }
135 __early_param("nocache", early_nocache);
136
137 static void __init early_nowrite(char **__unused)
138 {
139 char *p = "uncached";
140 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
141 early_cachepolicy(&p);
142 }
143 __early_param("nowb", early_nowrite);
144
145 static void __init early_ecc(char **p)
146 {
147 if (memcmp(*p, "on", 2) == 0) {
148 ecc_mask = PMD_PROTECTION;
149 *p += 2;
150 } else if (memcmp(*p, "off", 3) == 0) {
151 ecc_mask = 0;
152 *p += 3;
153 }
154 }
155 __early_param("ecc=", early_ecc);
156
157 static int __init noalign_setup(char *__unused)
158 {
159 cr_alignment &= ~CR_A;
160 cr_no_alignment &= ~CR_A;
161 set_cr(cr_alignment);
162 return 1;
163 }
164 __setup("noalign", noalign_setup);
165
166 #ifndef CONFIG_SMP
167 void adjust_cr(unsigned long mask, unsigned long set)
168 {
169 unsigned long flags;
170
171 mask &= ~CR_A;
172
173 set &= mask;
174
175 local_irq_save(flags);
176
177 cr_no_alignment = (cr_no_alignment & ~mask) | set;
178 cr_alignment = (cr_alignment & ~mask) | set;
179
180 set_cr((get_cr() & ~mask) | set);
181
182 local_irq_restore(flags);
183 }
184 #endif
185
186 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
187 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
188
189 static struct mem_type mem_types[] = {
190 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
191 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
192 L_PTE_SHARED,
193 .prot_l1 = PMD_TYPE_TABLE,
194 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
195 .domain = DOMAIN_IO,
196 },
197 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
198 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
199 .prot_l1 = PMD_TYPE_TABLE,
200 .prot_sect = PROT_SECT_DEVICE,
201 .domain = DOMAIN_IO,
202 },
203 [MT_DEVICE_CACHED] = { /* ioremap_cached */
204 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
205 .prot_l1 = PMD_TYPE_TABLE,
206 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
207 .domain = DOMAIN_IO,
208 },
209 [MT_DEVICE_WC] = { /* ioremap_wc */
210 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
211 .prot_l1 = PMD_TYPE_TABLE,
212 .prot_sect = PROT_SECT_DEVICE,
213 .domain = DOMAIN_IO,
214 },
215 [MT_UNCACHED] = {
216 .prot_pte = PROT_PTE_DEVICE,
217 .prot_l1 = PMD_TYPE_TABLE,
218 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
219 .domain = DOMAIN_IO,
220 },
221 [MT_CACHECLEAN] = {
222 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
223 .domain = DOMAIN_KERNEL,
224 },
225 [MT_MINICLEAN] = {
226 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
227 .domain = DOMAIN_KERNEL,
228 },
229 [MT_LOW_VECTORS] = {
230 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
231 L_PTE_EXEC,
232 .prot_l1 = PMD_TYPE_TABLE,
233 .domain = DOMAIN_USER,
234 },
235 [MT_HIGH_VECTORS] = {
236 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
237 L_PTE_USER | L_PTE_EXEC,
238 .prot_l1 = PMD_TYPE_TABLE,
239 .domain = DOMAIN_USER,
240 },
241 [MT_MEMORY] = {
242 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
243 .domain = DOMAIN_KERNEL,
244 },
245 [MT_ROM] = {
246 .prot_sect = PMD_TYPE_SECT,
247 .domain = DOMAIN_KERNEL,
248 },
249 [MT_MEMORY_NONCACHED] = {
250 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
251 .domain = DOMAIN_KERNEL,
252 },
253 };
254
255 const struct mem_type *get_mem_type(unsigned int type)
256 {
257 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
258 }
259 EXPORT_SYMBOL(get_mem_type);
260
261 /*
262 * Adjust the PMD section entries according to the CPU in use.
263 */
264 static void __init build_mem_type_table(void)
265 {
266 struct cachepolicy *cp;
267 unsigned int cr = get_cr();
268 unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
269 int cpu_arch = cpu_architecture();
270 int i;
271
272 if (cpu_arch < CPU_ARCH_ARMv6) {
273 #if defined(CONFIG_CPU_DCACHE_DISABLE)
274 if (cachepolicy > CPOLICY_BUFFERED)
275 cachepolicy = CPOLICY_BUFFERED;
276 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
277 if (cachepolicy > CPOLICY_WRITETHROUGH)
278 cachepolicy = CPOLICY_WRITETHROUGH;
279 #endif
280 }
281 if (cpu_arch < CPU_ARCH_ARMv5) {
282 if (cachepolicy >= CPOLICY_WRITEALLOC)
283 cachepolicy = CPOLICY_WRITEBACK;
284 ecc_mask = 0;
285 }
286 #ifdef CONFIG_SMP
287 cachepolicy = CPOLICY_WRITEALLOC;
288 #endif
289
290 /*
291 * Strip out features not present on earlier architectures.
292 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
293 * without extended page tables don't have the 'Shared' bit.
294 */
295 if (cpu_arch < CPU_ARCH_ARMv5)
296 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
297 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
298 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
299 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
300 mem_types[i].prot_sect &= ~PMD_SECT_S;
301
302 /*
303 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
304 * "update-able on write" bit on ARM610). However, Xscale and
305 * Xscale3 require this bit to be cleared.
306 */
307 if (cpu_is_xscale() || cpu_is_xsc3()) {
308 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
309 mem_types[i].prot_sect &= ~PMD_BIT4;
310 mem_types[i].prot_l1 &= ~PMD_BIT4;
311 }
312 } else if (cpu_arch < CPU_ARCH_ARMv6) {
313 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
314 if (mem_types[i].prot_l1)
315 mem_types[i].prot_l1 |= PMD_BIT4;
316 if (mem_types[i].prot_sect)
317 mem_types[i].prot_sect |= PMD_BIT4;
318 }
319 }
320
321 /*
322 * Mark the device areas according to the CPU/architecture.
323 */
324 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
325 if (!cpu_is_xsc3()) {
326 /*
327 * Mark device regions on ARMv6+ as execute-never
328 * to prevent speculative instruction fetches.
329 */
330 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
331 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
332 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
333 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
334 }
335 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
336 /*
337 * For ARMv7 with TEX remapping,
338 * - shared device is SXCB=1100
339 * - nonshared device is SXCB=0100
340 * - write combine device mem is SXCB=0001
341 * (Uncached Normal memory)
342 */
343 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
344 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
345 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
346 } else if (cpu_is_xsc3()) {
347 /*
348 * For Xscale3,
349 * - shared device is TEXCB=00101
350 * - nonshared device is TEXCB=01000
351 * - write combine device mem is TEXCB=00100
352 * (Inner/Outer Uncacheable in xsc3 parlance)
353 */
354 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
355 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
356 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
357 } else {
358 /*
359 * For ARMv6 and ARMv7 without TEX remapping,
360 * - shared device is TEXCB=00001
361 * - nonshared device is TEXCB=01000
362 * - write combine device mem is TEXCB=00100
363 * (Uncached Normal in ARMv6 parlance).
364 */
365 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
366 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
367 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
368 }
369 } else {
370 /*
371 * On others, write combining is "Uncached/Buffered"
372 */
373 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
374 }
375
376 /*
377 * Now deal with the memory-type mappings
378 */
379 cp = &cache_policies[cachepolicy];
380 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
381
382 #ifndef CONFIG_SMP
383 /*
384 * Only use write-through for non-SMP systems
385 */
386 if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
387 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
388 #endif
389
390 /*
391 * Enable CPU-specific coherency if supported.
392 * (Only available on XSC3 at the moment.)
393 */
394 if (arch_is_coherent() && cpu_is_xsc3())
395 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
396
397 /*
398 * ARMv6 and above have extended page tables.
399 */
400 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
401 /*
402 * Mark cache clean areas and XIP ROM read only
403 * from SVC mode and no access from userspace.
404 */
405 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
406 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
407 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
408
409 #ifdef CONFIG_SMP
410 /*
411 * Mark memory with the "shared" attribute for SMP systems
412 */
413 user_pgprot |= L_PTE_SHARED;
414 kern_pgprot |= L_PTE_SHARED;
415 vecs_pgprot |= L_PTE_SHARED;
416 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
417 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
418 #endif
419 }
420
421 /*
422 * Non-cacheable Normal - intended for memory areas that must
423 * not cause dirty cache line writebacks when used
424 */
425 if (cpu_arch >= CPU_ARCH_ARMv6) {
426 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
427 /* Non-cacheable Normal is XCB = 001 */
428 mem_types[MT_MEMORY_NONCACHED].prot_sect |=
429 PMD_SECT_BUFFERED;
430 } else {
431 /* For both ARMv6 and non-TEX-remapping ARMv7 */
432 mem_types[MT_MEMORY_NONCACHED].prot_sect |=
433 PMD_SECT_TEX(1);
434 }
435 } else {
436 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
437 }
438
439 for (i = 0; i < 16; i++) {
440 unsigned long v = pgprot_val(protection_map[i]);
441 protection_map[i] = __pgprot(v | user_pgprot);
442 }
443
444 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
445 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
446
447 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
448 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
449 L_PTE_DIRTY | L_PTE_WRITE |
450 L_PTE_EXEC | kern_pgprot);
451
452 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
453 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
454 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
455 mem_types[MT_ROM].prot_sect |= cp->pmd;
456
457 switch (cp->pmd) {
458 case PMD_SECT_WT:
459 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
460 break;
461 case PMD_SECT_WB:
462 case PMD_SECT_WBWA:
463 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
464 break;
465 }
466 printk("Memory policy: ECC %sabled, Data cache %s\n",
467 ecc_mask ? "en" : "dis", cp->policy);
468
469 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
470 struct mem_type *t = &mem_types[i];
471 if (t->prot_l1)
472 t->prot_l1 |= PMD_DOMAIN(t->domain);
473 if (t->prot_sect)
474 t->prot_sect |= PMD_DOMAIN(t->domain);
475 }
476 }
477
478 #define vectors_base() (vectors_high() ? 0xffff0000 : 0)
479
480 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
481 unsigned long end, unsigned long pfn,
482 const struct mem_type *type)
483 {
484 pte_t *pte;
485
486 if (pmd_none(*pmd)) {
487 pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
488 __pmd_populate(pmd, __pa(pte) | type->prot_l1);
489 }
490
491 pte = pte_offset_kernel(pmd, addr);
492 do {
493 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
494 pfn++;
495 } while (pte++, addr += PAGE_SIZE, addr != end);
496 }
497
498 static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
499 unsigned long end, unsigned long phys,
500 const struct mem_type *type)
501 {
502 pmd_t *pmd = pmd_offset(pgd, addr);
503
504 /*
505 * Try a section mapping - end, addr and phys must all be aligned
506 * to a section boundary. Note that PMDs refer to the individual
507 * L1 entries, whereas PGDs refer to a group of L1 entries making
508 * up one logical pointer to an L2 table.
509 */
510 if (((addr | end | phys) & ~SECTION_MASK) == 0) {
511 pmd_t *p = pmd;
512
513 if (addr & SECTION_SIZE)
514 pmd++;
515
516 do {
517 *pmd = __pmd(phys | type->prot_sect);
518 phys += SECTION_SIZE;
519 } while (pmd++, addr += SECTION_SIZE, addr != end);
520
521 flush_pmd_entry(p);
522 } else {
523 /*
524 * No need to loop; pte's aren't interested in the
525 * individual L1 entries.
526 */
527 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
528 }
529 }
530
531 static void __init create_36bit_mapping(struct map_desc *md,
532 const struct mem_type *type)
533 {
534 unsigned long phys, addr, length, end;
535 pgd_t *pgd;
536
537 addr = md->virtual;
538 phys = (unsigned long)__pfn_to_phys(md->pfn);
539 length = PAGE_ALIGN(md->length);
540
541 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
542 printk(KERN_ERR "MM: CPU does not support supersection "
543 "mapping for 0x%08llx at 0x%08lx\n",
544 __pfn_to_phys((u64)md->pfn), addr);
545 return;
546 }
547
548 /* N.B. ARMv6 supersections are only defined to work with domain 0.
549 * Since domain assignments can in fact be arbitrary, the
550 * 'domain == 0' check below is required to insure that ARMv6
551 * supersections are only allocated for domain 0 regardless
552 * of the actual domain assignments in use.
553 */
554 if (type->domain) {
555 printk(KERN_ERR "MM: invalid domain in supersection "
556 "mapping for 0x%08llx at 0x%08lx\n",
557 __pfn_to_phys((u64)md->pfn), addr);
558 return;
559 }
560
561 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
562 printk(KERN_ERR "MM: cannot create mapping for "
563 "0x%08llx at 0x%08lx invalid alignment\n",
564 __pfn_to_phys((u64)md->pfn), addr);
565 return;
566 }
567
568 /*
569 * Shift bits [35:32] of address into bits [23:20] of PMD
570 * (See ARMv6 spec).
571 */
572 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
573
574 pgd = pgd_offset_k(addr);
575 end = addr + length;
576 do {
577 pmd_t *pmd = pmd_offset(pgd, addr);
578 int i;
579
580 for (i = 0; i < 16; i++)
581 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
582
583 addr += SUPERSECTION_SIZE;
584 phys += SUPERSECTION_SIZE;
585 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
586 } while (addr != end);
587 }
588
589 /*
590 * Create the page directory entries and any necessary
591 * page tables for the mapping specified by `md'. We
592 * are able to cope here with varying sizes and address
593 * offsets, and we take full advantage of sections and
594 * supersections.
595 */
596 void __init create_mapping(struct map_desc *md)
597 {
598 unsigned long phys, addr, length, end;
599 const struct mem_type *type;
600 pgd_t *pgd;
601
602 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
603 printk(KERN_WARNING "BUG: not creating mapping for "
604 "0x%08llx at 0x%08lx in user region\n",
605 __pfn_to_phys((u64)md->pfn), md->virtual);
606 return;
607 }
608
609 if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
610 md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
611 printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
612 "overlaps vmalloc space\n",
613 __pfn_to_phys((u64)md->pfn), md->virtual);
614 }
615
616 type = &mem_types[md->type];
617
618 /*
619 * Catch 36-bit addresses
620 */
621 if (md->pfn >= 0x100000) {
622 create_36bit_mapping(md, type);
623 return;
624 }
625
626 addr = md->virtual & PAGE_MASK;
627 phys = (unsigned long)__pfn_to_phys(md->pfn);
628 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
629
630 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
631 printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
632 "be mapped using pages, ignoring.\n",
633 __pfn_to_phys(md->pfn), addr);
634 return;
635 }
636
637 pgd = pgd_offset_k(addr);
638 end = addr + length;
639 do {
640 unsigned long next = pgd_addr_end(addr, end);
641
642 alloc_init_section(pgd, addr, next, phys, type);
643
644 phys += next - addr;
645 addr = next;
646 } while (pgd++, addr != end);
647 }
648
649 /*
650 * Create the architecture specific mappings
651 */
652 void __init iotable_init(struct map_desc *io_desc, int nr)
653 {
654 int i;
655
656 for (i = 0; i < nr; i++)
657 create_mapping(io_desc + i);
658 }
659
660 static unsigned long __initdata vmalloc_reserve = SZ_128M;
661
662 /*
663 * vmalloc=size forces the vmalloc area to be exactly 'size'
664 * bytes. This can be used to increase (or decrease) the vmalloc
665 * area - the default is 128m.
666 */
667 static void __init early_vmalloc(char **arg)
668 {
669 vmalloc_reserve = memparse(*arg, arg);
670
671 if (vmalloc_reserve < SZ_16M) {
672 vmalloc_reserve = SZ_16M;
673 printk(KERN_WARNING
674 "vmalloc area too small, limiting to %luMB\n",
675 vmalloc_reserve >> 20);
676 }
677
678 if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
679 vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
680 printk(KERN_WARNING
681 "vmalloc area is too big, limiting to %luMB\n",
682 vmalloc_reserve >> 20);
683 }
684 }
685 __early_param("vmalloc=", early_vmalloc);
686
687 #define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve)
688
689 static void __init sanity_check_meminfo(void)
690 {
691 int i, j, highmem = 0;
692
693 for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
694 struct membank *bank = &meminfo.bank[j];
695 *bank = meminfo.bank[i];
696
697 #ifdef CONFIG_HIGHMEM
698 if (__va(bank->start) > VMALLOC_MIN ||
699 __va(bank->start) < (void *)PAGE_OFFSET)
700 highmem = 1;
701
702 bank->highmem = highmem;
703
704 /*
705 * Split those memory banks which are partially overlapping
706 * the vmalloc area greatly simplifying things later.
707 */
708 if (__va(bank->start) < VMALLOC_MIN &&
709 bank->size > VMALLOC_MIN - __va(bank->start)) {
710 if (meminfo.nr_banks >= NR_BANKS) {
711 printk(KERN_CRIT "NR_BANKS too low, "
712 "ignoring high memory\n");
713 } else {
714 memmove(bank + 1, bank,
715 (meminfo.nr_banks - i) * sizeof(*bank));
716 meminfo.nr_banks++;
717 i++;
718 bank[1].size -= VMALLOC_MIN - __va(bank->start);
719 bank[1].start = __pa(VMALLOC_MIN - 1) + 1;
720 bank[1].highmem = highmem = 1;
721 j++;
722 }
723 bank->size = VMALLOC_MIN - __va(bank->start);
724 }
725 #else
726 bank->highmem = highmem;
727
728 /*
729 * Check whether this memory bank would entirely overlap
730 * the vmalloc area.
731 */
732 if (__va(bank->start) >= VMALLOC_MIN ||
733 __va(bank->start) < (void *)PAGE_OFFSET) {
734 printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
735 "(vmalloc region overlap).\n",
736 bank->start, bank->start + bank->size - 1);
737 continue;
738 }
739
740 /*
741 * Check whether this memory bank would partially overlap
742 * the vmalloc area.
743 */
744 if (__va(bank->start + bank->size) > VMALLOC_MIN ||
745 __va(bank->start + bank->size) < __va(bank->start)) {
746 unsigned long newsize = VMALLOC_MIN - __va(bank->start);
747 printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
748 "to -%.8lx (vmalloc region overlap).\n",
749 bank->start, bank->start + bank->size - 1,
750 bank->start + newsize - 1);
751 bank->size = newsize;
752 }
753 #endif
754 j++;
755 }
756 #ifdef CONFIG_HIGHMEM
757 if (highmem) {
758 const char *reason = NULL;
759
760 if (cache_is_vipt_aliasing()) {
761 /*
762 * Interactions between kmap and other mappings
763 * make highmem support with aliasing VIPT caches
764 * rather difficult.
765 */
766 reason = "with VIPT aliasing cache";
767 #ifdef CONFIG_SMP
768 } else if (tlb_ops_need_broadcast()) {
769 /*
770 * kmap_high needs to occasionally flush TLB entries,
771 * however, if the TLB entries need to be broadcast
772 * we may deadlock:
773 * kmap_high(irqs off)->flush_all_zero_pkmaps->
774 * flush_tlb_kernel_range->smp_call_function_many
775 * (must not be called with irqs off)
776 */
777 reason = "without hardware TLB ops broadcasting";
778 #endif
779 }
780 if (reason) {
781 printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
782 reason);
783 while (j > 0 && meminfo.bank[j - 1].highmem)
784 j--;
785 }
786 }
787 #endif
788 meminfo.nr_banks = j;
789 }
790
791 static inline void prepare_page_table(void)
792 {
793 unsigned long addr;
794
795 /*
796 * Clear out all the mappings below the kernel image.
797 */
798 for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
799 pmd_clear(pmd_off_k(addr));
800
801 #ifdef CONFIG_XIP_KERNEL
802 /* The XIP kernel is mapped in the module area -- skip over it */
803 addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
804 #endif
805 for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
806 pmd_clear(pmd_off_k(addr));
807
808 /*
809 * Clear out all the kernel space mappings, except for the first
810 * memory bank, up to the end of the vmalloc region.
811 */
812 for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0]));
813 addr < VMALLOC_END; addr += PGDIR_SIZE)
814 pmd_clear(pmd_off_k(addr));
815 }
816
817 /*
818 * Reserve the various regions of node 0
819 */
820 void __init reserve_node_zero(pg_data_t *pgdat)
821 {
822 unsigned long res_size = 0;
823
824 /*
825 * Register the kernel text and data with bootmem.
826 * Note that this can only be in node 0.
827 */
828 #ifdef CONFIG_XIP_KERNEL
829 reserve_bootmem_node(pgdat, __pa(_data), _end - _data,
830 BOOTMEM_DEFAULT);
831 #else
832 reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext,
833 BOOTMEM_DEFAULT);
834 #endif
835
836 /*
837 * Reserve the page tables. These are already in use,
838 * and can only be in node 0.
839 */
840 reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
841 PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
842
843 /*
844 * Hmm... This should go elsewhere, but we really really need to
845 * stop things allocating the low memory; ideally we need a better
846 * implementation of GFP_DMA which does not assume that DMA-able
847 * memory starts at zero.
848 */
849 if (machine_is_integrator() || machine_is_cintegrator())
850 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
851
852 /*
853 * These should likewise go elsewhere. They pre-reserve the
854 * screen memory region at the start of main system memory.
855 */
856 if (machine_is_edb7211())
857 res_size = 0x00020000;
858 if (machine_is_p720t())
859 res_size = 0x00014000;
860
861 /* H1940 and RX3715 need to reserve this for suspend */
862
863 if (machine_is_h1940() || machine_is_rx3715()) {
864 reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
865 BOOTMEM_DEFAULT);
866 reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
867 BOOTMEM_DEFAULT);
868 }
869
870 if (machine_is_palmld() || machine_is_palmtx()) {
871 reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
872 BOOTMEM_EXCLUSIVE);
873 reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
874 BOOTMEM_EXCLUSIVE);
875 }
876
877 if (machine_is_treo680()) {
878 reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
879 BOOTMEM_EXCLUSIVE);
880 reserve_bootmem_node(pgdat, 0xa2000000, 0x1000,
881 BOOTMEM_EXCLUSIVE);
882 }
883
884 if (machine_is_palmt5())
885 reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
886 BOOTMEM_EXCLUSIVE);
887
888 /*
889 * U300 - This platform family can share physical memory
890 * between two ARM cpus, one running Linux and the other
891 * running another OS.
892 */
893 if (machine_is_u300()) {
894 #ifdef CONFIG_MACH_U300_SINGLE_RAM
895 #if ((CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1) == 1) && \
896 CONFIG_MACH_U300_2MB_ALIGNMENT_FIX
897 res_size = 0x00100000;
898 #endif
899 #endif
900 }
901
902 #ifdef CONFIG_SA1111
903 /*
904 * Because of the SA1111 DMA bug, we want to preserve our
905 * precious DMA-able memory...
906 */
907 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
908 #endif
909 if (res_size)
910 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
911 BOOTMEM_DEFAULT);
912 }
913
914 /*
915 * Set up device the mappings. Since we clear out the page tables for all
916 * mappings above VMALLOC_END, we will remove any debug device mappings.
917 * This means you have to be careful how you debug this function, or any
918 * called function. This means you can't use any function or debugging
919 * method which may touch any device, otherwise the kernel _will_ crash.
920 */
921 static void __init devicemaps_init(struct machine_desc *mdesc)
922 {
923 struct map_desc map;
924 unsigned long addr;
925 void *vectors;
926
927 /*
928 * Allocate the vector page early.
929 */
930 vectors = alloc_bootmem_low_pages(PAGE_SIZE);
931
932 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
933 pmd_clear(pmd_off_k(addr));
934
935 /*
936 * Map the kernel if it is XIP.
937 * It is always first in the modulearea.
938 */
939 #ifdef CONFIG_XIP_KERNEL
940 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
941 map.virtual = MODULES_VADDR;
942 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
943 map.type = MT_ROM;
944 create_mapping(&map);
945 #endif
946
947 /*
948 * Map the cache flushing regions.
949 */
950 #ifdef FLUSH_BASE
951 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
952 map.virtual = FLUSH_BASE;
953 map.length = SZ_1M;
954 map.type = MT_CACHECLEAN;
955 create_mapping(&map);
956 #endif
957 #ifdef FLUSH_BASE_MINICACHE
958 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
959 map.virtual = FLUSH_BASE_MINICACHE;
960 map.length = SZ_1M;
961 map.type = MT_MINICLEAN;
962 create_mapping(&map);
963 #endif
964
965 /*
966 * Create a mapping for the machine vectors at the high-vectors
967 * location (0xffff0000). If we aren't using high-vectors, also
968 * create a mapping at the low-vectors virtual address.
969 */
970 map.pfn = __phys_to_pfn(virt_to_phys(vectors));
971 map.virtual = 0xffff0000;
972 map.length = PAGE_SIZE;
973 map.type = MT_HIGH_VECTORS;
974 create_mapping(&map);
975
976 if (!vectors_high()) {
977 map.virtual = 0;
978 map.type = MT_LOW_VECTORS;
979 create_mapping(&map);
980 }
981
982 /*
983 * Ask the machine support to map in the statically mapped devices.
984 */
985 if (mdesc->map_io)
986 mdesc->map_io();
987
988 /*
989 * Finally flush the caches and tlb to ensure that we're in a
990 * consistent state wrt the writebuffer. This also ensures that
991 * any write-allocated cache lines in the vector page are written
992 * back. After this point, we can start to touch devices again.
993 */
994 local_flush_tlb_all();
995 flush_cache_all();
996 }
997
998 static void __init kmap_init(void)
999 {
1000 #ifdef CONFIG_HIGHMEM
1001 pmd_t *pmd = pmd_off_k(PKMAP_BASE);
1002 pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
1003 BUG_ON(!pmd_none(*pmd) || !pte);
1004 __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
1005 pkmap_page_table = pte + PTRS_PER_PTE;
1006 #endif
1007 }
1008
1009 /*
1010 * paging_init() sets up the page tables, initialises the zone memory
1011 * maps, and sets up the zero page, bad page and bad page tables.
1012 */
1013 void __init paging_init(struct machine_desc *mdesc)
1014 {
1015 void *zero_page;
1016
1017 build_mem_type_table();
1018 sanity_check_meminfo();
1019 prepare_page_table();
1020 bootmem_init();
1021 devicemaps_init(mdesc);
1022 kmap_init();
1023
1024 top_pmd = pmd_off_k(0xffff0000);
1025
1026 /*
1027 * allocate the zero page. Note that this always succeeds and
1028 * returns a zeroed result.
1029 */
1030 zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
1031 empty_zero_page = virt_to_page(zero_page);
1032 flush_dcache_page(empty_zero_page);
1033 }
1034
1035 /*
1036 * In order to soft-boot, we need to insert a 1:1 mapping in place of
1037 * the user-mode pages. This will then ensure that we have predictable
1038 * results when turning the mmu off
1039 */
1040 void setup_mm_for_reboot(char mode)
1041 {
1042 unsigned long base_pmdval;
1043 pgd_t *pgd;
1044 int i;
1045
1046 if (current->mm && current->mm->pgd)
1047 pgd = current->mm->pgd;
1048 else
1049 pgd = init_mm.pgd;
1050
1051 base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
1052 if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
1053 base_pmdval |= PMD_BIT4;
1054
1055 for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
1056 unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
1057 pmd_t *pmd;
1058
1059 pmd = pmd_off(pgd, i << PGDIR_SHIFT);
1060 pmd[0] = __pmd(pmdval);
1061 pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
1062 flush_pmd_entry(pmd);
1063 }
1064 }
This page took 0.078672 seconds and 5 git commands to generate.