x86: fix early_ioremap() on 64-bit
[deliverable/linux.git] / arch / x86 / mm / init_64.c
1 /*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 #include <asm/kdebug.h>
47 #include <asm/numa.h>
48
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
52
53 const struct dma_mapping_ops* dma_ops;
54 EXPORT_SYMBOL(dma_ops);
55
56 static unsigned long dma_reserve __initdata;
57
58 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60 /*
61 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62 * physical space so we can cache the place of the first one and move
63 * around without checking the pgd every time.
64 */
65
66 void show_mem(void)
67 {
68 long i, total = 0, reserved = 0;
69 long shared = 0, cached = 0;
70 pg_data_t *pgdat;
71 struct page *page;
72
73 printk(KERN_INFO "Mem-info:\n");
74 show_free_areas();
75 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
76
77 for_each_online_pgdat(pgdat) {
78 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
79 /* this loop can take a while with 256 GB and 4k pages
80 so update the NMI watchdog */
81 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82 touch_nmi_watchdog();
83 }
84 if (!pfn_valid(pgdat->node_start_pfn + i))
85 continue;
86 page = pfn_to_page(pgdat->node_start_pfn + i);
87 total++;
88 if (PageReserved(page))
89 reserved++;
90 else if (PageSwapCache(page))
91 cached++;
92 else if (page_count(page))
93 shared += page_count(page) - 1;
94 }
95 }
96 printk(KERN_INFO "%lu pages of RAM\n", total);
97 printk(KERN_INFO "%lu reserved pages\n",reserved);
98 printk(KERN_INFO "%lu pages shared\n",shared);
99 printk(KERN_INFO "%lu pages swap cached\n",cached);
100 }
101
102 int after_bootmem;
103
104 static __init void *spp_getpage(void)
105 {
106 void *ptr;
107 if (after_bootmem)
108 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
109 else
110 ptr = alloc_bootmem_pages(PAGE_SIZE);
111 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114 Dprintk("spp_getpage %p\n", ptr);
115 return ptr;
116 }
117
118 static __init void set_pte_phys(unsigned long vaddr,
119 unsigned long phys, pgprot_t prot)
120 {
121 pgd_t *pgd;
122 pud_t *pud;
123 pmd_t *pmd;
124 pte_t *pte, new_pte;
125
126 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128 pgd = pgd_offset_k(vaddr);
129 if (pgd_none(*pgd)) {
130 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131 return;
132 }
133 pud = pud_offset(pgd, vaddr);
134 if (pud_none(*pud)) {
135 pmd = (pmd_t *) spp_getpage();
136 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137 if (pmd != pmd_offset(pud, 0)) {
138 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139 return;
140 }
141 }
142 pmd = pmd_offset(pud, vaddr);
143 if (pmd_none(*pmd)) {
144 pte = (pte_t *) spp_getpage();
145 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146 if (pte != pte_offset_kernel(pmd, 0)) {
147 printk("PAGETABLE BUG #02!\n");
148 return;
149 }
150 }
151 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153 pte = pte_offset_kernel(pmd, vaddr);
154 if (!pte_none(*pte) &&
155 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156 pte_ERROR(*pte);
157 set_pte(pte, new_pte);
158
159 /*
160 * It's enough to flush this one mapping.
161 * (PGE mappings get flushed as well)
162 */
163 __flush_tlb_one(vaddr);
164 }
165
166 /* NOTE: this is meant to be run only at boot */
167 void __init
168 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
169 {
170 unsigned long address = __fix_to_virt(idx);
171
172 if (idx >= __end_of_fixed_addresses) {
173 printk("Invalid __set_fixmap\n");
174 return;
175 }
176 set_pte_phys(address, phys, prot);
177 }
178
179 static unsigned long __initdata table_start;
180 static unsigned long __meminitdata table_end;
181
182 static __meminit void *alloc_low_page(unsigned long *phys)
183 {
184 unsigned long pfn = table_end++;
185 void *adr;
186
187 if (after_bootmem) {
188 adr = (void *)get_zeroed_page(GFP_ATOMIC);
189 *phys = __pa(adr);
190 return adr;
191 }
192
193 if (pfn >= end_pfn)
194 panic("alloc_low_page: ran out of memory");
195
196 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
197 memset(adr, 0, PAGE_SIZE);
198 *phys = pfn * PAGE_SIZE;
199 return adr;
200 }
201
202 static __meminit void unmap_low_page(void *adr)
203 {
204
205 if (after_bootmem)
206 return;
207
208 early_iounmap(adr, PAGE_SIZE);
209 }
210
211 /* Must run before zap_low_mappings */
212 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
213 {
214 unsigned long vaddr;
215 pmd_t *pmd, *last_pmd;
216 int i, pmds;
217
218 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
219 vaddr = __START_KERNEL_map;
220 pmd = level2_kernel_pgt;
221 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
222 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
223 for (i = 0; i < pmds; i++) {
224 if (pmd_present(pmd[i]))
225 goto next;
226 }
227 vaddr += addr & ~PMD_MASK;
228 addr &= PMD_MASK;
229 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
230 set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
231 __flush_tlb_all();
232 return (void *)vaddr;
233 next:
234 ;
235 }
236 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
237 return NULL;
238 }
239
240 /* To avoid virtual aliases later */
241 __meminit void early_iounmap(void *addr, unsigned long size)
242 {
243 unsigned long vaddr;
244 pmd_t *pmd;
245 int i, pmds;
246
247 vaddr = (unsigned long)addr;
248 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
249 pmd = level2_kernel_pgt + pmd_index(vaddr);
250 for (i = 0; i < pmds; i++)
251 pmd_clear(pmd + i);
252 __flush_tlb_all();
253 }
254
255 static void __meminit
256 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
257 {
258 int i = pmd_index(address);
259
260 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
261 unsigned long entry;
262 pmd_t *pmd = pmd_page + pmd_index(address);
263
264 if (address >= end) {
265 if (!after_bootmem)
266 for (; i < PTRS_PER_PMD; i++, pmd++)
267 set_pmd(pmd, __pmd(0));
268 break;
269 }
270
271 if (pmd_val(*pmd))
272 continue;
273
274 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
275 entry &= __supported_pte_mask;
276 set_pmd(pmd, __pmd(entry));
277 }
278 }
279
280 static void __meminit
281 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
282 {
283 pmd_t *pmd = pmd_offset(pud,0);
284 spin_lock(&init_mm.page_table_lock);
285 phys_pmd_init(pmd, address, end);
286 spin_unlock(&init_mm.page_table_lock);
287 __flush_tlb_all();
288 }
289
290 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
291 {
292 int i = pud_index(addr);
293
294
295 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
296 unsigned long pmd_phys;
297 pud_t *pud = pud_page + pud_index(addr);
298 pmd_t *pmd;
299
300 if (addr >= end)
301 break;
302
303 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
304 set_pud(pud, __pud(0));
305 continue;
306 }
307
308 if (pud_val(*pud)) {
309 phys_pmd_update(pud, addr, end);
310 continue;
311 }
312
313 pmd = alloc_low_page(&pmd_phys);
314 spin_lock(&init_mm.page_table_lock);
315 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
316 phys_pmd_init(pmd, addr, end);
317 spin_unlock(&init_mm.page_table_lock);
318 unmap_low_page(pmd);
319 }
320 __flush_tlb_all();
321 }
322
323 static void __init find_early_table_space(unsigned long end)
324 {
325 unsigned long puds, pmds, tables, start;
326
327 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
328 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
329 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
330 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
331
332 /* RED-PEN putting page tables only on node 0 could
333 cause a hotspot and fill up ZONE_DMA. The page tables
334 need roughly 0.5KB per GB. */
335 start = 0x8000;
336 table_start = find_e820_area(start, end, tables);
337 if (table_start == -1UL)
338 panic("Cannot find space for the kernel page tables");
339
340 table_start >>= PAGE_SHIFT;
341 table_end = table_start;
342
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
344 end, table_start << PAGE_SHIFT,
345 (table_start << PAGE_SHIFT) + tables);
346 }
347
348 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
349 This runs before bootmem is initialized and gets pages directly from the
350 physical memory. To access them they are temporarily mapped. */
351 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
352 {
353 unsigned long next;
354
355 Dprintk("init_memory_mapping\n");
356
357 /*
358 * Find space for the kernel direct mapping tables.
359 * Later we should allocate these tables in the local node of the memory
360 * mapped. Unfortunately this is done currently before the nodes are
361 * discovered.
362 */
363 if (!after_bootmem)
364 find_early_table_space(end);
365
366 start = (unsigned long)__va(start);
367 end = (unsigned long)__va(end);
368
369 for (; start < end; start = next) {
370 unsigned long pud_phys;
371 pgd_t *pgd = pgd_offset_k(start);
372 pud_t *pud;
373
374 if (after_bootmem)
375 pud = pud_offset(pgd, start & PGDIR_MASK);
376 else
377 pud = alloc_low_page(&pud_phys);
378
379 next = start + PGDIR_SIZE;
380 if (next > end)
381 next = end;
382 phys_pud_init(pud, __pa(start), __pa(next));
383 if (!after_bootmem)
384 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
385 unmap_low_page(pud);
386 }
387
388 if (!after_bootmem)
389 mmu_cr4_features = read_cr4();
390 __flush_tlb_all();
391
392 reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
393 }
394
395 #ifndef CONFIG_NUMA
396 void __init paging_init(void)
397 {
398 unsigned long max_zone_pfns[MAX_NR_ZONES];
399 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
400 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
401 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
402 max_zone_pfns[ZONE_NORMAL] = end_pfn;
403
404 memory_present(0, 0, end_pfn);
405 sparse_init();
406 free_area_init_nodes(max_zone_pfns);
407 }
408 #endif
409
410 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
411 from the CPU leading to inconsistent cache lines. address and size
412 must be aligned to 2MB boundaries.
413 Does nothing when the mapping doesn't exist. */
414 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
415 {
416 unsigned long end = address + size;
417
418 BUG_ON(address & ~LARGE_PAGE_MASK);
419 BUG_ON(size & ~LARGE_PAGE_MASK);
420
421 for (; address < end; address += LARGE_PAGE_SIZE) {
422 pgd_t *pgd = pgd_offset_k(address);
423 pud_t *pud;
424 pmd_t *pmd;
425 if (pgd_none(*pgd))
426 continue;
427 pud = pud_offset(pgd, address);
428 if (pud_none(*pud))
429 continue;
430 pmd = pmd_offset(pud, address);
431 if (!pmd || pmd_none(*pmd))
432 continue;
433 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
434 /* Could handle this, but it should not happen currently. */
435 printk(KERN_ERR
436 "clear_kernel_mapping: mapping has been split. will leak memory\n");
437 pmd_ERROR(*pmd);
438 }
439 set_pmd(pmd, __pmd(0));
440 }
441 __flush_tlb_all();
442 }
443
444 /*
445 * Memory hotplug specific functions
446 */
447 void online_page(struct page *page)
448 {
449 ClearPageReserved(page);
450 init_page_count(page);
451 __free_page(page);
452 totalram_pages++;
453 num_physpages++;
454 }
455
456 #ifdef CONFIG_MEMORY_HOTPLUG
457 /*
458 * Memory is added always to NORMAL zone. This means you will never get
459 * additional DMA/DMA32 memory.
460 */
461 int arch_add_memory(int nid, u64 start, u64 size)
462 {
463 struct pglist_data *pgdat = NODE_DATA(nid);
464 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
465 unsigned long start_pfn = start >> PAGE_SHIFT;
466 unsigned long nr_pages = size >> PAGE_SHIFT;
467 int ret;
468
469 init_memory_mapping(start, (start + size -1));
470
471 ret = __add_pages(zone, start_pfn, nr_pages);
472 if (ret)
473 goto error;
474
475 return ret;
476 error:
477 printk("%s: Problem encountered in __add_pages!\n", __func__);
478 return ret;
479 }
480 EXPORT_SYMBOL_GPL(arch_add_memory);
481
482 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
483 int memory_add_physaddr_to_nid(u64 start)
484 {
485 return 0;
486 }
487 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
488 #endif
489
490 #endif /* CONFIG_MEMORY_HOTPLUG */
491
492 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
493 kcore_vsyscall;
494
495 void __init mem_init(void)
496 {
497 long codesize, reservedpages, datasize, initsize;
498
499 pci_iommu_alloc();
500
501 /* clear_bss() already clear the empty_zero_page */
502
503 /* temporary debugging - double check it's true: */
504 {
505 int i;
506
507 for (i = 0; i < 1024; i++)
508 WARN_ON_ONCE(empty_zero_page[i]);
509 }
510
511 reservedpages = 0;
512
513 /* this will put all low memory onto the freelists */
514 #ifdef CONFIG_NUMA
515 totalram_pages = numa_free_all_bootmem();
516 #else
517 totalram_pages = free_all_bootmem();
518 #endif
519 reservedpages = end_pfn - totalram_pages -
520 absent_pages_in_range(0, end_pfn);
521
522 after_bootmem = 1;
523
524 codesize = (unsigned long) &_etext - (unsigned long) &_text;
525 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
526 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
527
528 /* Register memory areas for /proc/kcore */
529 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
530 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
531 VMALLOC_END-VMALLOC_START);
532 kclist_add(&kcore_kernel, &_stext, _end - _stext);
533 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
534 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
535 VSYSCALL_END - VSYSCALL_START);
536
537 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
538 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
539 end_pfn << (PAGE_SHIFT-10),
540 codesize >> 10,
541 reservedpages << (PAGE_SHIFT-10),
542 datasize >> 10,
543 initsize >> 10);
544 }
545
546 void free_init_pages(char *what, unsigned long begin, unsigned long end)
547 {
548 unsigned long addr;
549
550 if (begin >= end)
551 return;
552
553 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
554 for (addr = begin; addr < end; addr += PAGE_SIZE) {
555 ClearPageReserved(virt_to_page(addr));
556 init_page_count(virt_to_page(addr));
557 memset((void *)(addr & ~(PAGE_SIZE-1)),
558 POISON_FREE_INITMEM, PAGE_SIZE);
559 if (addr >= __START_KERNEL_map)
560 change_page_attr_addr(addr, 1, __pgprot(0));
561 free_page(addr);
562 totalram_pages++;
563 }
564 if (addr > __START_KERNEL_map)
565 global_flush_tlb();
566 }
567
568 void free_initmem(void)
569 {
570 free_init_pages("unused kernel memory",
571 (unsigned long)(&__init_begin),
572 (unsigned long)(&__init_end));
573 }
574
575 #ifdef CONFIG_DEBUG_RODATA
576
577 void mark_rodata_ro(void)
578 {
579 unsigned long start = (unsigned long)_stext, end;
580
581 #ifdef CONFIG_HOTPLUG_CPU
582 /* It must still be possible to apply SMP alternatives. */
583 if (num_possible_cpus() > 1)
584 start = (unsigned long)_etext;
585 #endif
586
587 #ifdef CONFIG_KPROBES
588 start = (unsigned long)__start_rodata;
589 #endif
590
591 end = (unsigned long)__end_rodata;
592 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
593 end &= PAGE_MASK;
594 if (end <= start)
595 return;
596
597 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
598
599 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
600 (end - start) >> 10);
601
602 /*
603 * change_page_attr_addr() requires a global_flush_tlb() call after it.
604 * We do this after the printk so that if something went wrong in the
605 * change, the printk gets out at least to give a better debug hint
606 * of who is the culprit.
607 */
608 global_flush_tlb();
609
610 #ifdef CONFIG_CPA_DEBUG
611 printk("Testing CPA: undo %lx-%lx\n", start, end);
612 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL);
613 global_flush_tlb();
614
615 printk("Testing CPA: again\n");
616 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
617 global_flush_tlb();
618 #endif
619 }
620 #endif
621
622 #ifdef CONFIG_BLK_DEV_INITRD
623 void free_initrd_mem(unsigned long start, unsigned long end)
624 {
625 free_init_pages("initrd memory", start, end);
626 }
627 #endif
628
629 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
630 {
631 #ifdef CONFIG_NUMA
632 int nid = phys_to_nid(phys);
633 #endif
634 unsigned long pfn = phys >> PAGE_SHIFT;
635 if (pfn >= end_pfn) {
636 /* This can happen with kdump kernels when accessing firmware
637 tables. */
638 if (pfn < end_pfn_map)
639 return;
640 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
641 phys, len);
642 return;
643 }
644
645 /* Should check here against the e820 map to avoid double free */
646 #ifdef CONFIG_NUMA
647 reserve_bootmem_node(NODE_DATA(nid), phys, len);
648 #else
649 reserve_bootmem(phys, len);
650 #endif
651 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
652 dma_reserve += len / PAGE_SIZE;
653 set_dma_reserve(dma_reserve);
654 }
655 }
656
657 int kern_addr_valid(unsigned long addr)
658 {
659 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
660 pgd_t *pgd;
661 pud_t *pud;
662 pmd_t *pmd;
663 pte_t *pte;
664
665 if (above != 0 && above != -1UL)
666 return 0;
667
668 pgd = pgd_offset_k(addr);
669 if (pgd_none(*pgd))
670 return 0;
671
672 pud = pud_offset(pgd, addr);
673 if (pud_none(*pud))
674 return 0;
675
676 pmd = pmd_offset(pud, addr);
677 if (pmd_none(*pmd))
678 return 0;
679 if (pmd_large(*pmd))
680 return pfn_valid(pmd_pfn(*pmd));
681
682 pte = pte_offset_kernel(pmd, addr);
683 if (pte_none(*pte))
684 return 0;
685 return pfn_valid(pte_pfn(*pte));
686 }
687
688 /* A pseudo VMA to allow ptrace access for the vsyscall page. This only
689 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
690 not need special handling anymore. */
691
692 static struct vm_area_struct gate_vma = {
693 .vm_start = VSYSCALL_START,
694 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
695 .vm_page_prot = PAGE_READONLY_EXEC,
696 .vm_flags = VM_READ | VM_EXEC
697 };
698
699 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
700 {
701 #ifdef CONFIG_IA32_EMULATION
702 if (test_tsk_thread_flag(tsk, TIF_IA32))
703 return NULL;
704 #endif
705 return &gate_vma;
706 }
707
708 int in_gate_area(struct task_struct *task, unsigned long addr)
709 {
710 struct vm_area_struct *vma = get_gate_vma(task);
711 if (!vma)
712 return 0;
713 return (addr >= vma->vm_start) && (addr < vma->vm_end);
714 }
715
716 /* Use this when you have no reliable task/vma, typically from interrupt
717 * context. It is less reliable than using the task's vma and may give
718 * false positives.
719 */
720 int in_gate_area_no_task(unsigned long addr)
721 {
722 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
723 }
724
725 const char *arch_vma_name(struct vm_area_struct *vma)
726 {
727 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
728 return "[vdso]";
729 if (vma == &gate_vma)
730 return "[vsyscall]";
731 return NULL;
732 }
733
734 #ifdef CONFIG_SPARSEMEM_VMEMMAP
735 /*
736 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
737 */
738 int __meminit vmemmap_populate(struct page *start_page,
739 unsigned long size, int node)
740 {
741 unsigned long addr = (unsigned long)start_page;
742 unsigned long end = (unsigned long)(start_page + size);
743 unsigned long next;
744 pgd_t *pgd;
745 pud_t *pud;
746 pmd_t *pmd;
747
748 for (; addr < end; addr = next) {
749 next = pmd_addr_end(addr, end);
750
751 pgd = vmemmap_pgd_populate(addr, node);
752 if (!pgd)
753 return -ENOMEM;
754 pud = vmemmap_pud_populate(pgd, addr, node);
755 if (!pud)
756 return -ENOMEM;
757
758 pmd = pmd_offset(pud, addr);
759 if (pmd_none(*pmd)) {
760 pte_t entry;
761 void *p = vmemmap_alloc_block(PMD_SIZE, node);
762 if (!p)
763 return -ENOMEM;
764
765 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
766 set_pmd(pmd, __pmd(pte_val(entry)));
767
768 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
769 addr, addr + PMD_SIZE - 1, p, node);
770 } else
771 vmemmap_verify((pte_t *)pmd, node, addr, next);
772 }
773
774 return 0;
775 }
776 #endif
This page took 0.075774 seconds and 6 git commands to generate.