Fix vmi.c compilation
[deliverable/linux.git] / arch / x86_64 / mm / init.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
1da177e4
LT
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/pagemap.h>
22#include <linux/bootmem.h>
23#include <linux/proc_fs.h>
59170891 24#include <linux/pci.h>
6fb14755 25#include <linux/pfn.h>
c9cf5528 26#include <linux/poison.h>
17a941d8 27#include <linux/dma-mapping.h>
44df75e6
MT
28#include <linux/module.h>
29#include <linux/memory_hotplug.h>
ae32b129 30#include <linux/nmi.h>
1da177e4
LT
31
32#include <asm/processor.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/dma.h>
38#include <asm/fixmap.h>
39#include <asm/e820.h>
40#include <asm/apic.h>
41#include <asm/tlb.h>
42#include <asm/mmu_context.h>
43#include <asm/proto.h>
44#include <asm/smp.h>
2bc0414e 45#include <asm/sections.h>
1da177e4
LT
46
47#ifndef Dprintk
48#define Dprintk(x...)
49#endif
50
e6584504 51const struct dma_mapping_ops* dma_ops;
17a941d8
MBY
52EXPORT_SYMBOL(dma_ops);
53
e18c6874
AK
54static unsigned long dma_reserve __initdata;
55
1da177e4
LT
56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57
58/*
59 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60 * physical space so we can cache the place of the first one and move
61 * around without checking the pgd every time.
62 */
63
64void show_mem(void)
65{
e92343cc
AK
66 long i, total = 0, reserved = 0;
67 long shared = 0, cached = 0;
1da177e4
LT
68 pg_data_t *pgdat;
69 struct page *page;
70
e92343cc 71 printk(KERN_INFO "Mem-info:\n");
1da177e4 72 show_free_areas();
e92343cc 73 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
1da177e4 74
ec936fc5 75 for_each_online_pgdat(pgdat) {
1da177e4 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
ae32b129
KR
77 /* this loop can take a while with 256 GB and 4k pages
78 so update the NMI watchdog */
79 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 touch_nmi_watchdog();
81 }
1da177e4
LT
82 page = pfn_to_page(pgdat->node_start_pfn + i);
83 total++;
e92343cc
AK
84 if (PageReserved(page))
85 reserved++;
86 else if (PageSwapCache(page))
87 cached++;
88 else if (page_count(page))
89 shared += page_count(page) - 1;
1da177e4
LT
90 }
91 }
e92343cc
AK
92 printk(KERN_INFO "%lu pages of RAM\n", total);
93 printk(KERN_INFO "%lu reserved pages\n",reserved);
94 printk(KERN_INFO "%lu pages shared\n",shared);
95 printk(KERN_INFO "%lu pages swap cached\n",cached);
1da177e4
LT
96}
97
1da177e4
LT
98int after_bootmem;
99
5f44a669 100static __init void *spp_getpage(void)
1da177e4
LT
101{
102 void *ptr;
103 if (after_bootmem)
104 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
105 else
106 ptr = alloc_bootmem_pages(PAGE_SIZE);
107 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
108 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
109
110 Dprintk("spp_getpage %p\n", ptr);
111 return ptr;
112}
113
5f44a669 114static __init void set_pte_phys(unsigned long vaddr,
1da177e4
LT
115 unsigned long phys, pgprot_t prot)
116{
117 pgd_t *pgd;
118 pud_t *pud;
119 pmd_t *pmd;
120 pte_t *pte, new_pte;
121
122 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
123
124 pgd = pgd_offset_k(vaddr);
125 if (pgd_none(*pgd)) {
126 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
127 return;
128 }
129 pud = pud_offset(pgd, vaddr);
130 if (pud_none(*pud)) {
131 pmd = (pmd_t *) spp_getpage();
132 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
133 if (pmd != pmd_offset(pud, 0)) {
134 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
135 return;
136 }
137 }
138 pmd = pmd_offset(pud, vaddr);
139 if (pmd_none(*pmd)) {
140 pte = (pte_t *) spp_getpage();
141 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
142 if (pte != pte_offset_kernel(pmd, 0)) {
143 printk("PAGETABLE BUG #02!\n");
144 return;
145 }
146 }
147 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
148
149 pte = pte_offset_kernel(pmd, vaddr);
150 if (!pte_none(*pte) &&
151 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
152 pte_ERROR(*pte);
153 set_pte(pte, new_pte);
154
155 /*
156 * It's enough to flush this one mapping.
157 * (PGE mappings get flushed as well)
158 */
159 __flush_tlb_one(vaddr);
160}
161
162/* NOTE: this is meant to be run only at boot */
5f44a669
AK
163void __init
164__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
1da177e4
LT
165{
166 unsigned long address = __fix_to_virt(idx);
167
168 if (idx >= __end_of_fixed_addresses) {
169 printk("Invalid __set_fixmap\n");
170 return;
171 }
172 set_pte_phys(address, phys, prot);
173}
174
a3142c8e 175unsigned long __meminitdata table_start, table_end;
1da177e4 176
dafe41ee 177static __meminit void *alloc_low_page(unsigned long *phys)
1da177e4 178{
dafe41ee 179 unsigned long pfn = table_end++;
1da177e4
LT
180 void *adr;
181
44df75e6
MT
182 if (after_bootmem) {
183 adr = (void *)get_zeroed_page(GFP_ATOMIC);
184 *phys = __pa(adr);
185 return adr;
186 }
187
1da177e4
LT
188 if (pfn >= end_pfn)
189 panic("alloc_low_page: ran out of memory");
dafe41ee
VG
190
191 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
44df75e6 192 memset(adr, 0, PAGE_SIZE);
dafe41ee
VG
193 *phys = pfn * PAGE_SIZE;
194 return adr;
195}
1da177e4 196
dafe41ee 197static __meminit void unmap_low_page(void *adr)
1da177e4 198{
44df75e6
MT
199
200 if (after_bootmem)
201 return;
202
dafe41ee 203 early_iounmap(adr, PAGE_SIZE);
1da177e4
LT
204}
205
f2d3efed 206/* Must run before zap_low_mappings */
a3142c8e 207__meminit void *early_ioremap(unsigned long addr, unsigned long size)
f2d3efed 208{
dafe41ee
VG
209 unsigned long vaddr;
210 pmd_t *pmd, *last_pmd;
211 int i, pmds;
212
213 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
214 vaddr = __START_KERNEL_map;
215 pmd = level2_kernel_pgt;
216 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
217 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
218 for (i = 0; i < pmds; i++) {
219 if (pmd_present(pmd[i]))
220 goto next;
221 }
222 vaddr += addr & ~PMD_MASK;
223 addr &= PMD_MASK;
224 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
225 set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
226 __flush_tlb();
227 return (void *)vaddr;
228 next:
229 ;
f2d3efed 230 }
dafe41ee
VG
231 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
232 return NULL;
f2d3efed
AK
233}
234
235/* To avoid virtual aliases later */
a3142c8e 236__meminit void early_iounmap(void *addr, unsigned long size)
f2d3efed 237{
dafe41ee
VG
238 unsigned long vaddr;
239 pmd_t *pmd;
240 int i, pmds;
241
242 vaddr = (unsigned long)addr;
243 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
244 pmd = level2_kernel_pgt + pmd_index(vaddr);
245 for (i = 0; i < pmds; i++)
246 pmd_clear(pmd + i);
f2d3efed
AK
247 __flush_tlb();
248}
249
44df75e6 250static void __meminit
6ad91658 251phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
44df75e6 252{
6ad91658 253 int i = pmd_index(address);
44df75e6 254
6ad91658 255 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
44df75e6 256 unsigned long entry;
6ad91658 257 pmd_t *pmd = pmd_page + pmd_index(address);
44df75e6 258
5f51e139
JB
259 if (address >= end) {
260 if (!after_bootmem)
261 for (; i < PTRS_PER_PMD; i++, pmd++)
262 set_pmd(pmd, __pmd(0));
44df75e6
MT
263 break;
264 }
6ad91658
KM
265
266 if (pmd_val(*pmd))
267 continue;
268
44df75e6
MT
269 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
270 entry &= __supported_pte_mask;
271 set_pmd(pmd, __pmd(entry));
272 }
273}
274
275static void __meminit
276phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
277{
6ad91658
KM
278 pmd_t *pmd = pmd_offset(pud,0);
279 spin_lock(&init_mm.page_table_lock);
280 phys_pmd_init(pmd, address, end);
281 spin_unlock(&init_mm.page_table_lock);
282 __flush_tlb_all();
44df75e6
MT
283}
284
6ad91658 285static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
1da177e4 286{
6ad91658 287 int i = pud_index(addr);
44df75e6 288
44df75e6 289
6ad91658 290 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
6ad91658
KM
291 unsigned long pmd_phys;
292 pud_t *pud = pud_page + pud_index(addr);
1da177e4
LT
293 pmd_t *pmd;
294
6ad91658 295 if (addr >= end)
1da177e4 296 break;
1da177e4 297
6ad91658 298 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
1da177e4
LT
299 set_pud(pud, __pud(0));
300 continue;
301 }
302
6ad91658
KM
303 if (pud_val(*pud)) {
304 phys_pmd_update(pud, addr, end);
305 continue;
306 }
307
dafe41ee 308 pmd = alloc_low_page(&pmd_phys);
44df75e6 309 spin_lock(&init_mm.page_table_lock);
1da177e4 310 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
6ad91658 311 phys_pmd_init(pmd, addr, end);
44df75e6 312 spin_unlock(&init_mm.page_table_lock);
dafe41ee 313 unmap_low_page(pmd);
1da177e4
LT
314 }
315 __flush_tlb();
316}
317
318static void __init find_early_table_space(unsigned long end)
319{
6c5acd16 320 unsigned long puds, pmds, tables, start;
1da177e4
LT
321
322 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
323 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
324 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
325 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
326
ee408c79
AK
327 /* RED-PEN putting page tables only on node 0 could
328 cause a hotspot and fill up ZONE_DMA. The page tables
329 need roughly 0.5KB per GB. */
330 start = 0x8000;
331 table_start = find_e820_area(start, end, tables);
1da177e4
LT
332 if (table_start == -1UL)
333 panic("Cannot find space for the kernel page tables");
334
335 table_start >>= PAGE_SHIFT;
336 table_end = table_start;
44df75e6
MT
337
338 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
5f51e139
JB
339 end, table_start << PAGE_SHIFT,
340 (table_start << PAGE_SHIFT) + tables);
1da177e4
LT
341}
342
343/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
344 This runs before bootmem is initialized and gets pages directly from the
345 physical memory. To access them they are temporarily mapped. */
44df75e6 346void __meminit init_memory_mapping(unsigned long start, unsigned long end)
1da177e4
LT
347{
348 unsigned long next;
349
350 Dprintk("init_memory_mapping\n");
351
352 /*
353 * Find space for the kernel direct mapping tables.
354 * Later we should allocate these tables in the local node of the memory
355 * mapped. Unfortunately this is done currently before the nodes are
356 * discovered.
357 */
44df75e6
MT
358 if (!after_bootmem)
359 find_early_table_space(end);
1da177e4
LT
360
361 start = (unsigned long)__va(start);
362 end = (unsigned long)__va(end);
363
364 for (; start < end; start = next) {
1da177e4 365 unsigned long pud_phys;
44df75e6
MT
366 pgd_t *pgd = pgd_offset_k(start);
367 pud_t *pud;
368
369 if (after_bootmem)
d2ae5b5f 370 pud = pud_offset(pgd, start & PGDIR_MASK);
44df75e6 371 else
dafe41ee 372 pud = alloc_low_page(&pud_phys);
44df75e6 373
1da177e4
LT
374 next = start + PGDIR_SIZE;
375 if (next > end)
376 next = end;
377 phys_pud_init(pud, __pa(start), __pa(next));
44df75e6
MT
378 if (!after_bootmem)
379 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
dafe41ee 380 unmap_low_page(pud);
1da177e4
LT
381 }
382
44df75e6
MT
383 if (!after_bootmem)
384 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
1da177e4 385 __flush_tlb_all();
1da177e4
LT
386}
387
2b97690f 388#ifndef CONFIG_NUMA
1da177e4
LT
389void __init paging_init(void)
390{
6391af17
MG
391 unsigned long max_zone_pfns[MAX_NR_ZONES];
392 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
393 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
394 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
395 max_zone_pfns[ZONE_NORMAL] = end_pfn;
396
44df75e6
MT
397 memory_present(0, 0, end_pfn);
398 sparse_init();
5cb248ab 399 free_area_init_nodes(max_zone_pfns);
1da177e4
LT
400}
401#endif
402
403/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
404 from the CPU leading to inconsistent cache lines. address and size
405 must be aligned to 2MB boundaries.
406 Does nothing when the mapping doesn't exist. */
407void __init clear_kernel_mapping(unsigned long address, unsigned long size)
408{
409 unsigned long end = address + size;
410
411 BUG_ON(address & ~LARGE_PAGE_MASK);
412 BUG_ON(size & ~LARGE_PAGE_MASK);
413
414 for (; address < end; address += LARGE_PAGE_SIZE) {
415 pgd_t *pgd = pgd_offset_k(address);
416 pud_t *pud;
417 pmd_t *pmd;
418 if (pgd_none(*pgd))
419 continue;
420 pud = pud_offset(pgd, address);
421 if (pud_none(*pud))
422 continue;
423 pmd = pmd_offset(pud, address);
424 if (!pmd || pmd_none(*pmd))
425 continue;
426 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
427 /* Could handle this, but it should not happen currently. */
428 printk(KERN_ERR
429 "clear_kernel_mapping: mapping has been split. will leak memory\n");
430 pmd_ERROR(*pmd);
431 }
432 set_pmd(pmd, __pmd(0));
433 }
434 __flush_tlb_all();
435}
436
44df75e6
MT
437/*
438 * Memory hotplug specific functions
44df75e6 439 */
44df75e6
MT
440void online_page(struct page *page)
441{
442 ClearPageReserved(page);
7835e98b 443 init_page_count(page);
44df75e6
MT
444 __free_page(page);
445 totalram_pages++;
446 num_physpages++;
447}
448
bc02af93 449#ifdef CONFIG_MEMORY_HOTPLUG
9d99aaa3
AK
450/*
451 * Memory is added always to NORMAL zone. This means you will never get
452 * additional DMA/DMA32 memory.
453 */
bc02af93 454int arch_add_memory(int nid, u64 start, u64 size)
44df75e6 455{
bc02af93 456 struct pglist_data *pgdat = NODE_DATA(nid);
776ed98b 457 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
44df75e6
MT
458 unsigned long start_pfn = start >> PAGE_SHIFT;
459 unsigned long nr_pages = size >> PAGE_SHIFT;
460 int ret;
461
45e0b78b
KM
462 init_memory_mapping(start, (start + size -1));
463
44df75e6
MT
464 ret = __add_pages(zone, start_pfn, nr_pages);
465 if (ret)
466 goto error;
467
44df75e6
MT
468 return ret;
469error:
470 printk("%s: Problem encountered in __add_pages!\n", __func__);
471 return ret;
472}
bc02af93 473EXPORT_SYMBOL_GPL(arch_add_memory);
44df75e6
MT
474
475int remove_memory(u64 start, u64 size)
476{
477 return -EINVAL;
478}
479EXPORT_SYMBOL_GPL(remove_memory);
480
8243229f 481#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
4942e998
KM
482int memory_add_physaddr_to_nid(u64 start)
483{
484 return 0;
485}
8c2676a5 486EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
4942e998
KM
487#endif
488
45e0b78b
KM
489#endif /* CONFIG_MEMORY_HOTPLUG */
490
491#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
bc02af93
YG
492/*
493 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
494 * just online the pages.
495 */
496int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
497{
498 int err = -EIO;
499 unsigned long pfn;
500 unsigned long total = 0, mem = 0;
501 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
502 if (pfn_valid(pfn)) {
503 online_page(pfn_to_page(pfn));
504 err = 0;
505 mem++;
506 }
507 total++;
508 }
509 if (!err) {
510 z->spanned_pages += total;
511 z->present_pages += mem;
512 z->zone_pgdat->node_spanned_pages += total;
513 z->zone_pgdat->node_present_pages += mem;
514 }
515 return err;
516}
45e0b78b 517#endif
44df75e6 518
1da177e4
LT
519static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
520 kcore_vsyscall;
521
522void __init mem_init(void)
523{
0a43e4bf 524 long codesize, reservedpages, datasize, initsize;
1da177e4 525
0dc243ae 526 pci_iommu_alloc();
1da177e4 527
1da177e4
LT
528 /* clear the zero-page */
529 memset(empty_zero_page, 0, PAGE_SIZE);
530
531 reservedpages = 0;
532
533 /* this will put all low memory onto the freelists */
2b97690f 534#ifdef CONFIG_NUMA
0a43e4bf 535 totalram_pages = numa_free_all_bootmem();
1da177e4 536#else
0a43e4bf 537 totalram_pages = free_all_bootmem();
1da177e4 538#endif
5cb248ab
MG
539 reservedpages = end_pfn - totalram_pages -
540 absent_pages_in_range(0, end_pfn);
1da177e4
LT
541
542 after_bootmem = 1;
543
544 codesize = (unsigned long) &_etext - (unsigned long) &_text;
545 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
546 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
547
548 /* Register memory areas for /proc/kcore */
549 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
550 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
551 VMALLOC_END-VMALLOC_START);
552 kclist_add(&kcore_kernel, &_stext, _end - _stext);
553 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
554 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
555 VSYSCALL_END - VSYSCALL_START);
556
0a43e4bf 557 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
1da177e4
LT
558 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
559 end_pfn << (PAGE_SHIFT-10),
560 codesize >> 10,
561 reservedpages << (PAGE_SHIFT-10),
562 datasize >> 10,
563 initsize >> 10);
1da177e4
LT
564}
565
d167a518 566void free_init_pages(char *what, unsigned long begin, unsigned long end)
1da177e4
LT
567{
568 unsigned long addr;
569
d167a518
GH
570 if (begin >= end)
571 return;
572
6fb14755 573 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
d167a518 574 for (addr = begin; addr < end; addr += PAGE_SIZE) {
e3ebadd9
LT
575 ClearPageReserved(virt_to_page(addr));
576 init_page_count(virt_to_page(addr));
577 memset((void *)(addr & ~(PAGE_SIZE-1)),
578 POISON_FREE_INITMEM, PAGE_SIZE);
6fb14755
JB
579 if (addr >= __START_KERNEL_map)
580 change_page_attr_addr(addr, 1, __pgprot(0));
e3ebadd9 581 free_page(addr);
1da177e4
LT
582 totalram_pages++;
583 }
6fb14755
JB
584 if (addr > __START_KERNEL_map)
585 global_flush_tlb();
d167a518
GH
586}
587
588void free_initmem(void)
589{
d167a518 590 free_init_pages("unused kernel memory",
e3ebadd9
LT
591 (unsigned long)(&__init_begin),
592 (unsigned long)(&__init_end));
1da177e4
LT
593}
594
67df197b
AV
595#ifdef CONFIG_DEBUG_RODATA
596
67df197b
AV
597void mark_rodata_ro(void)
598{
e3ebadd9 599 unsigned long start = (unsigned long)_stext, end;
67df197b 600
6fb14755
JB
601#ifdef CONFIG_HOTPLUG_CPU
602 /* It must still be possible to apply SMP alternatives. */
603 if (num_possible_cpus() > 1)
e3ebadd9 604 start = (unsigned long)_etext;
6fb14755 605#endif
e3ebadd9
LT
606 end = (unsigned long)__end_rodata;
607 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
608 end &= PAGE_MASK;
609 if (end <= start)
610 return;
611
612 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
67df197b 613
6fb14755 614 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
e3ebadd9 615 (end - start) >> 10);
67df197b
AV
616
617 /*
618 * change_page_attr_addr() requires a global_flush_tlb() call after it.
619 * We do this after the printk so that if something went wrong in the
620 * change, the printk gets out at least to give a better debug hint
621 * of who is the culprit.
622 */
623 global_flush_tlb();
624}
625#endif
626
1da177e4
LT
627#ifdef CONFIG_BLK_DEV_INITRD
628void free_initrd_mem(unsigned long start, unsigned long end)
629{
e3ebadd9 630 free_init_pages("initrd memory", start, end);
1da177e4
LT
631}
632#endif
633
634void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
635{
2b97690f 636#ifdef CONFIG_NUMA
1da177e4 637 int nid = phys_to_nid(phys);
5e58a02a
AK
638#endif
639 unsigned long pfn = phys >> PAGE_SHIFT;
640 if (pfn >= end_pfn) {
641 /* This can happen with kdump kernels when accessing firmware
642 tables. */
643 if (pfn < end_pfn_map)
644 return;
645 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
646 phys, len);
647 return;
648 }
649
650 /* Should check here against the e820 map to avoid double free */
651#ifdef CONFIG_NUMA
1da177e4
LT
652 reserve_bootmem_node(NODE_DATA(nid), phys, len);
653#else
654 reserve_bootmem(phys, len);
655#endif
0e0b864e 656 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
e18c6874 657 dma_reserve += len / PAGE_SIZE;
0e0b864e
MG
658 set_dma_reserve(dma_reserve);
659 }
1da177e4
LT
660}
661
662int kern_addr_valid(unsigned long addr)
663{
664 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
665 pgd_t *pgd;
666 pud_t *pud;
667 pmd_t *pmd;
668 pte_t *pte;
669
670 if (above != 0 && above != -1UL)
671 return 0;
672
673 pgd = pgd_offset_k(addr);
674 if (pgd_none(*pgd))
675 return 0;
676
677 pud = pud_offset(pgd, addr);
678 if (pud_none(*pud))
679 return 0;
680
681 pmd = pmd_offset(pud, addr);
682 if (pmd_none(*pmd))
683 return 0;
684 if (pmd_large(*pmd))
685 return pfn_valid(pmd_pfn(*pmd));
686
687 pte = pte_offset_kernel(pmd, addr);
688 if (pte_none(*pte))
689 return 0;
690 return pfn_valid(pte_pfn(*pte));
691}
692
693#ifdef CONFIG_SYSCTL
694#include <linux/sysctl.h>
695
696extern int exception_trace, page_fault_trace;
697
698static ctl_table debug_table2[] = {
c37ce032
EB
699 {
700 .ctl_name = 99,
701 .procname = "exception-trace",
702 .data = &exception_trace,
703 .maxlen = sizeof(int),
704 .mode = 0644,
705 .proc_handler = proc_dointvec
706 },
707 {}
1da177e4
LT
708};
709
710static ctl_table debug_root_table2[] = {
c37ce032
EB
711 {
712 .ctl_name = CTL_DEBUG,
713 .procname = "debug",
714 .mode = 0555,
715 .child = debug_table2
716 },
717 {}
1da177e4
LT
718};
719
720static __init int x8664_sysctl_init(void)
721{
0b4d4147 722 register_sysctl_table(debug_root_table2);
1da177e4
LT
723 return 0;
724}
725__initcall(x8664_sysctl_init);
726#endif
727
103efcd9 728/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
1e014410
AK
729 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
730 not need special handling anymore. */
1da177e4
LT
731
732static struct vm_area_struct gate_vma = {
733 .vm_start = VSYSCALL_START,
103efcd9
EP
734 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
735 .vm_page_prot = PAGE_READONLY_EXEC,
736 .vm_flags = VM_READ | VM_EXEC
1da177e4
LT
737};
738
1da177e4
LT
739struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
740{
741#ifdef CONFIG_IA32_EMULATION
1e014410
AK
742 if (test_tsk_thread_flag(tsk, TIF_IA32))
743 return NULL;
1da177e4
LT
744#endif
745 return &gate_vma;
746}
747
748int in_gate_area(struct task_struct *task, unsigned long addr)
749{
750 struct vm_area_struct *vma = get_gate_vma(task);
1e014410
AK
751 if (!vma)
752 return 0;
1da177e4
LT
753 return (addr >= vma->vm_start) && (addr < vma->vm_end);
754}
755
756/* Use this when you have no reliable task/vma, typically from interrupt
757 * context. It is less reliable than using the task's vma and may give
758 * false positives.
759 */
760int in_gate_area_no_task(unsigned long addr)
761{
1e014410 762 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1da177e4 763}
This page took 0.259876 seconds and 5 git commands to generate.