[PATCH] x86_64: Support memory hotadd without sparsemem
[deliverable/linux.git] / arch / x86_64 / mm / init.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
9#include <linux/config.h>
10#include <linux/signal.h>
11#include <linux/sched.h>
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/string.h>
15#include <linux/types.h>
16#include <linux/ptrace.h>
17#include <linux/mman.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/smp.h>
21#include <linux/init.h>
22#include <linux/pagemap.h>
23#include <linux/bootmem.h>
24#include <linux/proc_fs.h>
59170891 25#include <linux/pci.h>
17a941d8 26#include <linux/dma-mapping.h>
44df75e6
MT
27#include <linux/module.h>
28#include <linux/memory_hotplug.h>
1da177e4
LT
29
30#include <asm/processor.h>
31#include <asm/system.h>
32#include <asm/uaccess.h>
33#include <asm/pgtable.h>
34#include <asm/pgalloc.h>
35#include <asm/dma.h>
36#include <asm/fixmap.h>
37#include <asm/e820.h>
38#include <asm/apic.h>
39#include <asm/tlb.h>
40#include <asm/mmu_context.h>
41#include <asm/proto.h>
42#include <asm/smp.h>
2bc0414e 43#include <asm/sections.h>
17a941d8
MBY
44#include <asm/dma-mapping.h>
45#include <asm/swiotlb.h>
1da177e4
LT
46
47#ifndef Dprintk
48#define Dprintk(x...)
49#endif
50
17a941d8
MBY
51struct dma_mapping_ops* dma_ops;
52EXPORT_SYMBOL(dma_ops);
53
e18c6874
AK
54static unsigned long dma_reserve __initdata;
55
1da177e4
LT
56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57
58/*
59 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60 * physical space so we can cache the place of the first one and move
61 * around without checking the pgd every time.
62 */
63
64void show_mem(void)
65{
e92343cc
AK
66 long i, total = 0, reserved = 0;
67 long shared = 0, cached = 0;
1da177e4
LT
68 pg_data_t *pgdat;
69 struct page *page;
70
e92343cc 71 printk(KERN_INFO "Mem-info:\n");
1da177e4 72 show_free_areas();
e92343cc 73 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
1da177e4 74
ec936fc5 75 for_each_online_pgdat(pgdat) {
1da177e4
LT
76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 page = pfn_to_page(pgdat->node_start_pfn + i);
78 total++;
e92343cc
AK
79 if (PageReserved(page))
80 reserved++;
81 else if (PageSwapCache(page))
82 cached++;
83 else if (page_count(page))
84 shared += page_count(page) - 1;
1da177e4
LT
85 }
86 }
e92343cc
AK
87 printk(KERN_INFO "%lu pages of RAM\n", total);
88 printk(KERN_INFO "%lu reserved pages\n",reserved);
89 printk(KERN_INFO "%lu pages shared\n",shared);
90 printk(KERN_INFO "%lu pages swap cached\n",cached);
1da177e4
LT
91}
92
93/* References to section boundaries */
94
1da177e4
LT
95int after_bootmem;
96
5f44a669 97static __init void *spp_getpage(void)
1da177e4
LT
98{
99 void *ptr;
100 if (after_bootmem)
101 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
102 else
103 ptr = alloc_bootmem_pages(PAGE_SIZE);
104 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
105 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
106
107 Dprintk("spp_getpage %p\n", ptr);
108 return ptr;
109}
110
5f44a669 111static __init void set_pte_phys(unsigned long vaddr,
1da177e4
LT
112 unsigned long phys, pgprot_t prot)
113{
114 pgd_t *pgd;
115 pud_t *pud;
116 pmd_t *pmd;
117 pte_t *pte, new_pte;
118
119 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
120
121 pgd = pgd_offset_k(vaddr);
122 if (pgd_none(*pgd)) {
123 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
124 return;
125 }
126 pud = pud_offset(pgd, vaddr);
127 if (pud_none(*pud)) {
128 pmd = (pmd_t *) spp_getpage();
129 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
130 if (pmd != pmd_offset(pud, 0)) {
131 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
132 return;
133 }
134 }
135 pmd = pmd_offset(pud, vaddr);
136 if (pmd_none(*pmd)) {
137 pte = (pte_t *) spp_getpage();
138 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
139 if (pte != pte_offset_kernel(pmd, 0)) {
140 printk("PAGETABLE BUG #02!\n");
141 return;
142 }
143 }
144 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
145
146 pte = pte_offset_kernel(pmd, vaddr);
147 if (!pte_none(*pte) &&
148 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
149 pte_ERROR(*pte);
150 set_pte(pte, new_pte);
151
152 /*
153 * It's enough to flush this one mapping.
154 * (PGE mappings get flushed as well)
155 */
156 __flush_tlb_one(vaddr);
157}
158
159/* NOTE: this is meant to be run only at boot */
5f44a669
AK
160void __init
161__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
1da177e4
LT
162{
163 unsigned long address = __fix_to_virt(idx);
164
165 if (idx >= __end_of_fixed_addresses) {
166 printk("Invalid __set_fixmap\n");
167 return;
168 }
169 set_pte_phys(address, phys, prot);
170}
171
172unsigned long __initdata table_start, table_end;
173
174extern pmd_t temp_boot_pmds[];
175
176static struct temp_map {
177 pmd_t *pmd;
178 void *address;
179 int allocated;
180} temp_mappings[] __initdata = {
181 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
182 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
183 {}
184};
185
44df75e6 186static __meminit void *alloc_low_page(int *index, unsigned long *phys)
1da177e4
LT
187{
188 struct temp_map *ti;
189 int i;
190 unsigned long pfn = table_end++, paddr;
191 void *adr;
192
44df75e6
MT
193 if (after_bootmem) {
194 adr = (void *)get_zeroed_page(GFP_ATOMIC);
195 *phys = __pa(adr);
196 return adr;
197 }
198
1da177e4
LT
199 if (pfn >= end_pfn)
200 panic("alloc_low_page: ran out of memory");
201 for (i = 0; temp_mappings[i].allocated; i++) {
202 if (!temp_mappings[i].pmd)
203 panic("alloc_low_page: ran out of temp mappings");
204 }
205 ti = &temp_mappings[i];
206 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
207 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
208 ti->allocated = 1;
209 __flush_tlb();
210 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
44df75e6 211 memset(adr, 0, PAGE_SIZE);
1da177e4
LT
212 *index = i;
213 *phys = pfn * PAGE_SIZE;
214 return adr;
215}
216
44df75e6 217static __meminit void unmap_low_page(int i)
1da177e4 218{
44df75e6
MT
219 struct temp_map *ti;
220
221 if (after_bootmem)
222 return;
223
224 ti = &temp_mappings[i];
1da177e4
LT
225 set_pmd(ti->pmd, __pmd(0));
226 ti->allocated = 0;
227}
228
f2d3efed
AK
229/* Must run before zap_low_mappings */
230__init void *early_ioremap(unsigned long addr, unsigned long size)
231{
232 unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
233
234 /* actually usually some more */
235 if (size >= LARGE_PAGE_SIZE) {
236 printk("SMBIOS area too long %lu\n", size);
237 return NULL;
238 }
239 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
240 map += LARGE_PAGE_SIZE;
241 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
242 __flush_tlb();
243 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
244}
245
246/* To avoid virtual aliases later */
247__init void early_iounmap(void *addr, unsigned long size)
248{
249 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
250 printk("early_iounmap: bad address %p\n", addr);
251 set_pmd(temp_mappings[0].pmd, __pmd(0));
252 set_pmd(temp_mappings[1].pmd, __pmd(0));
253 __flush_tlb();
254}
255
44df75e6
MT
256static void __meminit
257phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
258{
259 int i;
260
261 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
262 unsigned long entry;
263
264 if (address > end) {
265 for (; i < PTRS_PER_PMD; i++, pmd++)
266 set_pmd(pmd, __pmd(0));
267 break;
268 }
269 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
270 entry &= __supported_pte_mask;
271 set_pmd(pmd, __pmd(entry));
272 }
273}
274
275static void __meminit
276phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
277{
278 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
279
280 if (pmd_none(*pmd)) {
281 spin_lock(&init_mm.page_table_lock);
282 phys_pmd_init(pmd, address, end);
283 spin_unlock(&init_mm.page_table_lock);
284 __flush_tlb_all();
285 }
286}
287
288static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
1da177e4 289{
44df75e6 290 long i = pud_index(address);
1da177e4 291
1da177e4 292 pud = pud + i;
44df75e6
MT
293
294 if (after_bootmem && pud_val(*pud)) {
295 phys_pmd_update(pud, address, end);
296 return;
297 }
298
1da177e4
LT
299 for (; i < PTRS_PER_PUD; pud++, i++) {
300 int map;
301 unsigned long paddr, pmd_phys;
302 pmd_t *pmd;
303
44df75e6
MT
304 paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
305 if (paddr >= end)
1da177e4 306 break;
1da177e4 307
44df75e6 308 if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
1da177e4
LT
309 set_pud(pud, __pud(0));
310 continue;
311 }
312
313 pmd = alloc_low_page(&map, &pmd_phys);
44df75e6 314 spin_lock(&init_mm.page_table_lock);
1da177e4 315 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
44df75e6
MT
316 phys_pmd_init(pmd, paddr, end);
317 spin_unlock(&init_mm.page_table_lock);
1da177e4
LT
318 unmap_low_page(map);
319 }
320 __flush_tlb();
321}
322
323static void __init find_early_table_space(unsigned long end)
324{
6c5acd16 325 unsigned long puds, pmds, tables, start;
1da177e4
LT
326
327 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
328 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
329 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
330 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
331
ee408c79
AK
332 /* RED-PEN putting page tables only on node 0 could
333 cause a hotspot and fill up ZONE_DMA. The page tables
334 need roughly 0.5KB per GB. */
335 start = 0x8000;
336 table_start = find_e820_area(start, end, tables);
1da177e4
LT
337 if (table_start == -1UL)
338 panic("Cannot find space for the kernel page tables");
339
340 table_start >>= PAGE_SHIFT;
341 table_end = table_start;
44df75e6
MT
342
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
344 end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
1da177e4
LT
345}
346
347/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
348 This runs before bootmem is initialized and gets pages directly from the
349 physical memory. To access them they are temporarily mapped. */
44df75e6 350void __meminit init_memory_mapping(unsigned long start, unsigned long end)
1da177e4
LT
351{
352 unsigned long next;
353
354 Dprintk("init_memory_mapping\n");
355
356 /*
357 * Find space for the kernel direct mapping tables.
358 * Later we should allocate these tables in the local node of the memory
359 * mapped. Unfortunately this is done currently before the nodes are
360 * discovered.
361 */
44df75e6
MT
362 if (!after_bootmem)
363 find_early_table_space(end);
1da177e4
LT
364
365 start = (unsigned long)__va(start);
366 end = (unsigned long)__va(end);
367
368 for (; start < end; start = next) {
369 int map;
370 unsigned long pud_phys;
44df75e6
MT
371 pgd_t *pgd = pgd_offset_k(start);
372 pud_t *pud;
373
374 if (after_bootmem)
c7ea1a96 375 pud = pud_offset_k(pgd, start & PGDIR_MASK);
44df75e6
MT
376 else
377 pud = alloc_low_page(&map, &pud_phys);
378
1da177e4
LT
379 next = start + PGDIR_SIZE;
380 if (next > end)
381 next = end;
382 phys_pud_init(pud, __pa(start), __pa(next));
44df75e6
MT
383 if (!after_bootmem)
384 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
1da177e4
LT
385 unmap_low_page(map);
386 }
387
44df75e6
MT
388 if (!after_bootmem)
389 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
1da177e4 390 __flush_tlb_all();
1da177e4
LT
391}
392
f6c2e333 393void __cpuinit zap_low_mappings(int cpu)
1da177e4 394{
f6c2e333
SS
395 if (cpu == 0) {
396 pgd_t *pgd = pgd_offset_k(0UL);
397 pgd_clear(pgd);
398 } else {
399 /*
400 * For AP's, zap the low identity mappings by changing the cr3
401 * to init_level4_pgt and doing local flush tlb all
402 */
403 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
404 }
405 __flush_tlb_all();
1da177e4
LT
406}
407
a2f1b424
AK
408/* Compute zone sizes for the DMA and DMA32 zones in a node. */
409__init void
410size_zones(unsigned long *z, unsigned long *h,
411 unsigned long start_pfn, unsigned long end_pfn)
412{
413 int i;
414 unsigned long w;
415
416 for (i = 0; i < MAX_NR_ZONES; i++)
417 z[i] = 0;
418
419 if (start_pfn < MAX_DMA_PFN)
420 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
421 if (start_pfn < MAX_DMA32_PFN) {
422 unsigned long dma32_pfn = MAX_DMA32_PFN;
423 if (dma32_pfn > end_pfn)
424 dma32_pfn = end_pfn;
425 z[ZONE_DMA32] = dma32_pfn - start_pfn;
426 }
427 z[ZONE_NORMAL] = end_pfn - start_pfn;
428
429 /* Remove lower zones from higher ones. */
430 w = 0;
431 for (i = 0; i < MAX_NR_ZONES; i++) {
432 if (z[i])
433 z[i] -= w;
434 w += z[i];
435 }
436
437 /* Compute holes */
576fc097 438 w = start_pfn;
a2f1b424
AK
439 for (i = 0; i < MAX_NR_ZONES; i++) {
440 unsigned long s = w;
441 w += z[i];
442 h[i] = e820_hole_size(s, w);
443 }
e18c6874
AK
444
445 /* Add the space pace needed for mem_map to the holes too. */
446 for (i = 0; i < MAX_NR_ZONES; i++)
447 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
448
449 /* The 16MB DMA zone has the kernel and other misc mappings.
450 Account them too */
451 if (h[ZONE_DMA]) {
452 h[ZONE_DMA] += dma_reserve;
453 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
454 printk(KERN_WARNING
455 "Kernel too large and filling up ZONE_DMA?\n");
456 h[ZONE_DMA] = z[ZONE_DMA];
457 }
458 }
a2f1b424
AK
459}
460
2b97690f 461#ifndef CONFIG_NUMA
1da177e4
LT
462void __init paging_init(void)
463{
a2f1b424 464 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
44df75e6
MT
465
466 memory_present(0, 0, end_pfn);
467 sparse_init();
a2f1b424
AK
468 size_zones(zones, holes, 0, end_pfn);
469 free_area_init_node(0, NODE_DATA(0), zones,
470 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
1da177e4
LT
471}
472#endif
473
474/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
475 from the CPU leading to inconsistent cache lines. address and size
476 must be aligned to 2MB boundaries.
477 Does nothing when the mapping doesn't exist. */
478void __init clear_kernel_mapping(unsigned long address, unsigned long size)
479{
480 unsigned long end = address + size;
481
482 BUG_ON(address & ~LARGE_PAGE_MASK);
483 BUG_ON(size & ~LARGE_PAGE_MASK);
484
485 for (; address < end; address += LARGE_PAGE_SIZE) {
486 pgd_t *pgd = pgd_offset_k(address);
487 pud_t *pud;
488 pmd_t *pmd;
489 if (pgd_none(*pgd))
490 continue;
491 pud = pud_offset(pgd, address);
492 if (pud_none(*pud))
493 continue;
494 pmd = pmd_offset(pud, address);
495 if (!pmd || pmd_none(*pmd))
496 continue;
497 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
498 /* Could handle this, but it should not happen currently. */
499 printk(KERN_ERR
500 "clear_kernel_mapping: mapping has been split. will leak memory\n");
501 pmd_ERROR(*pmd);
502 }
503 set_pmd(pmd, __pmd(0));
504 }
505 __flush_tlb_all();
506}
507
44df75e6
MT
508/*
509 * Memory hotplug specific functions
44df75e6 510 */
9d99aaa3 511#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
44df75e6
MT
512
513void online_page(struct page *page)
514{
515 ClearPageReserved(page);
7835e98b 516 init_page_count(page);
44df75e6
MT
517 __free_page(page);
518 totalram_pages++;
519 num_physpages++;
520}
521
9d99aaa3
AK
522#ifndef CONFIG_MEMORY_HOTPLUG
523/*
524 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
525 * just online the pages.
526 */
527int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
528{
529 int err = -EIO;
530 unsigned long pfn;
531 unsigned long total = 0, mem = 0;
532 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
533 unsigned long addr = pfn << PAGE_SHIFT;
534 if (pfn_valid(pfn) && e820_mapped(addr, addr+1, E820_RAM)) {
535 online_page(pfn_to_page(pfn));
536 err = 0;
537 mem++;
538 }
539 total++;
540 }
541 if (!err) {
542 z->spanned_pages += total;
543 z->present_pages += mem;
544 z->zone_pgdat->node_spanned_pages += total;
545 z->zone_pgdat->node_present_pages += mem;
546 }
547 return err;
548}
549#endif
550
551/*
552 * Memory is added always to NORMAL zone. This means you will never get
553 * additional DMA/DMA32 memory.
554 */
44df75e6
MT
555int add_memory(u64 start, u64 size)
556{
557 struct pglist_data *pgdat = NODE_DATA(0);
558 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
559 unsigned long start_pfn = start >> PAGE_SHIFT;
560 unsigned long nr_pages = size >> PAGE_SHIFT;
561 int ret;
562
563 ret = __add_pages(zone, start_pfn, nr_pages);
564 if (ret)
565 goto error;
566
567 init_memory_mapping(start, (start + size -1));
568
569 return ret;
570error:
571 printk("%s: Problem encountered in __add_pages!\n", __func__);
572 return ret;
573}
574EXPORT_SYMBOL_GPL(add_memory);
575
576int remove_memory(u64 start, u64 size)
577{
578 return -EINVAL;
579}
580EXPORT_SYMBOL_GPL(remove_memory);
581
582#endif
583
1da177e4
LT
584static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
585 kcore_vsyscall;
586
587void __init mem_init(void)
588{
0a43e4bf 589 long codesize, reservedpages, datasize, initsize;
1da177e4
LT
590
591#ifdef CONFIG_SWIOTLB
17a941d8 592 pci_swiotlb_init();
1da177e4 593#endif
17a941d8 594 no_iommu_init();
1da177e4
LT
595
596 /* How many end-of-memory variables you have, grandma! */
597 max_low_pfn = end_pfn;
598 max_pfn = end_pfn;
599 num_physpages = end_pfn;
600 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
601
602 /* clear the zero-page */
603 memset(empty_zero_page, 0, PAGE_SIZE);
604
605 reservedpages = 0;
606
607 /* this will put all low memory onto the freelists */
2b97690f 608#ifdef CONFIG_NUMA
0a43e4bf 609 totalram_pages = numa_free_all_bootmem();
1da177e4 610#else
0a43e4bf 611 totalram_pages = free_all_bootmem();
1da177e4 612#endif
0a43e4bf 613 reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
1da177e4
LT
614
615 after_bootmem = 1;
616
617 codesize = (unsigned long) &_etext - (unsigned long) &_text;
618 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
619 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
620
621 /* Register memory areas for /proc/kcore */
622 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
623 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
624 VMALLOC_END-VMALLOC_START);
625 kclist_add(&kcore_kernel, &_stext, _end - _stext);
626 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
627 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
628 VSYSCALL_END - VSYSCALL_START);
629
0a43e4bf 630 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
1da177e4
LT
631 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
632 end_pfn << (PAGE_SHIFT-10),
633 codesize >> 10,
634 reservedpages << (PAGE_SHIFT-10),
635 datasize >> 10,
636 initsize >> 10);
637
f6c2e333 638#ifdef CONFIG_SMP
1da177e4 639 /*
f6c2e333
SS
640 * Sync boot_level4_pgt mappings with the init_level4_pgt
641 * except for the low identity mappings which are already zapped
642 * in init_level4_pgt. This sync-up is essential for AP's bringup
1da177e4 643 */
f6c2e333 644 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
1da177e4
LT
645#endif
646}
647
1da177e4
LT
648void free_initmem(void)
649{
650 unsigned long addr;
651
652 addr = (unsigned long)(&__init_begin);
653 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
654 ClearPageReserved(virt_to_page(addr));
7835e98b 655 init_page_count(virt_to_page(addr));
1da177e4
LT
656 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
657 free_page(addr);
658 totalram_pages++;
659 }
660 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
2bc0414e 661 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
1da177e4
LT
662}
663
67df197b
AV
664#ifdef CONFIG_DEBUG_RODATA
665
666extern char __start_rodata, __end_rodata;
667void mark_rodata_ro(void)
668{
669 unsigned long addr = (unsigned long)&__start_rodata;
670
671 for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE)
672 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
673
674 printk ("Write protecting the kernel read-only data: %luk\n",
675 (&__end_rodata - &__start_rodata) >> 10);
676
677 /*
678 * change_page_attr_addr() requires a global_flush_tlb() call after it.
679 * We do this after the printk so that if something went wrong in the
680 * change, the printk gets out at least to give a better debug hint
681 * of who is the culprit.
682 */
683 global_flush_tlb();
684}
685#endif
686
1da177e4
LT
687#ifdef CONFIG_BLK_DEV_INITRD
688void free_initrd_mem(unsigned long start, unsigned long end)
689{
f74e6670 690 if (start >= end)
1da177e4
LT
691 return;
692 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
693 for (; start < end; start += PAGE_SIZE) {
694 ClearPageReserved(virt_to_page(start));
7835e98b 695 init_page_count(virt_to_page(start));
1da177e4
LT
696 free_page(start);
697 totalram_pages++;
698 }
699}
700#endif
701
702void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
703{
704 /* Should check here against the e820 map to avoid double free */
2b97690f 705#ifdef CONFIG_NUMA
1da177e4
LT
706 int nid = phys_to_nid(phys);
707 reserve_bootmem_node(NODE_DATA(nid), phys, len);
708#else
709 reserve_bootmem(phys, len);
710#endif
e18c6874
AK
711 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
712 dma_reserve += len / PAGE_SIZE;
1da177e4
LT
713}
714
715int kern_addr_valid(unsigned long addr)
716{
717 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
718 pgd_t *pgd;
719 pud_t *pud;
720 pmd_t *pmd;
721 pte_t *pte;
722
723 if (above != 0 && above != -1UL)
724 return 0;
725
726 pgd = pgd_offset_k(addr);
727 if (pgd_none(*pgd))
728 return 0;
729
730 pud = pud_offset(pgd, addr);
731 if (pud_none(*pud))
732 return 0;
733
734 pmd = pmd_offset(pud, addr);
735 if (pmd_none(*pmd))
736 return 0;
737 if (pmd_large(*pmd))
738 return pfn_valid(pmd_pfn(*pmd));
739
740 pte = pte_offset_kernel(pmd, addr);
741 if (pte_none(*pte))
742 return 0;
743 return pfn_valid(pte_pfn(*pte));
744}
745
746#ifdef CONFIG_SYSCTL
747#include <linux/sysctl.h>
748
749extern int exception_trace, page_fault_trace;
750
751static ctl_table debug_table2[] = {
752 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
753 proc_dointvec },
1da177e4
LT
754 { 0, }
755};
756
757static ctl_table debug_root_table2[] = {
758 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
759 .child = debug_table2 },
760 { 0 },
761};
762
763static __init int x8664_sysctl_init(void)
764{
765 register_sysctl_table(debug_root_table2, 1);
766 return 0;
767}
768__initcall(x8664_sysctl_init);
769#endif
770
1e014410
AK
771/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
772 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
773 not need special handling anymore. */
1da177e4
LT
774
775static struct vm_area_struct gate_vma = {
776 .vm_start = VSYSCALL_START,
777 .vm_end = VSYSCALL_END,
778 .vm_page_prot = PAGE_READONLY
779};
780
1da177e4
LT
781struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
782{
783#ifdef CONFIG_IA32_EMULATION
1e014410
AK
784 if (test_tsk_thread_flag(tsk, TIF_IA32))
785 return NULL;
1da177e4
LT
786#endif
787 return &gate_vma;
788}
789
790int in_gate_area(struct task_struct *task, unsigned long addr)
791{
792 struct vm_area_struct *vma = get_gate_vma(task);
1e014410
AK
793 if (!vma)
794 return 0;
1da177e4
LT
795 return (addr >= vma->vm_start) && (addr < vma->vm_end);
796}
797
798/* Use this when you have no reliable task/vma, typically from interrupt
799 * context. It is less reliable than using the task's vma and may give
800 * false positives.
801 */
802int in_gate_area_no_task(unsigned long addr)
803{
1e014410 804 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1da177e4 805}
This page took 0.16898 seconds and 5 git commands to generate.