mm: make defensive checks around PFN values registered for memory usage
[deliverable/linux.git] / mm / bootmem.c
1 /*
2 * linux/mm/bootmem.c
3 *
4 * Copyright (C) 1999 Ingo Molnar
5 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
6 *
7 * simple boot-time physical memory area allocator and
8 * free memory collector. It's used to deal with reserved
9 * system memory and memory holes as well.
10 */
11 #include <linux/init.h>
12 #include <linux/pfn.h>
13 #include <linux/bootmem.h>
14 #include <linux/module.h>
15
16 #include <asm/bug.h>
17 #include <asm/io.h>
18 #include <asm/processor.h>
19
20 #include "internal.h"
21
22 /*
23 * Access to this subsystem has to be serialized externally. (this is
24 * true for the boot process anyway)
25 */
26 unsigned long max_low_pfn;
27 unsigned long min_low_pfn;
28 unsigned long max_pfn;
29
30 static LIST_HEAD(bdata_list);
31 #ifdef CONFIG_CRASH_DUMP
32 /*
33 * If we have booted due to a crash, max_pfn will be a very low value. We need
34 * to know the amount of memory that the previous kernel used.
35 */
36 unsigned long saved_max_pfn;
37 #endif
38
39 /* return the number of _pages_ that will be allocated for the boot bitmap */
40 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
41 {
42 unsigned long mapsize;
43
44 mapsize = (pages+7)/8;
45 mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
46 mapsize >>= PAGE_SHIFT;
47
48 return mapsize;
49 }
50
51 /*
52 * link bdata in order
53 */
54 static void __init link_bootmem(bootmem_data_t *bdata)
55 {
56 bootmem_data_t *ent;
57
58 if (list_empty(&bdata_list)) {
59 list_add(&bdata->list, &bdata_list);
60 return;
61 }
62 /* insert in order */
63 list_for_each_entry(ent, &bdata_list, list) {
64 if (bdata->node_boot_start < ent->node_boot_start) {
65 list_add_tail(&bdata->list, &ent->list);
66 return;
67 }
68 }
69 list_add_tail(&bdata->list, &bdata_list);
70 }
71
72 /*
73 * Given an initialised bdata, it returns the size of the boot bitmap
74 */
75 static unsigned long __init get_mapsize(bootmem_data_t *bdata)
76 {
77 unsigned long mapsize;
78 unsigned long start = PFN_DOWN(bdata->node_boot_start);
79 unsigned long end = bdata->node_low_pfn;
80
81 mapsize = ((end - start) + 7) / 8;
82 return ALIGN(mapsize, sizeof(long));
83 }
84
85 /*
86 * Called once to set up the allocator itself.
87 */
88 static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
89 unsigned long mapstart, unsigned long start, unsigned long end)
90 {
91 bootmem_data_t *bdata = pgdat->bdata;
92 unsigned long mapsize;
93
94 mminit_validate_memmodel_limits(&start, &end);
95 bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
96 bdata->node_boot_start = PFN_PHYS(start);
97 bdata->node_low_pfn = end;
98 link_bootmem(bdata);
99
100 /*
101 * Initially all pages are reserved - setup_arch() has to
102 * register free RAM areas explicitly.
103 */
104 mapsize = get_mapsize(bdata);
105 memset(bdata->node_bootmem_map, 0xff, mapsize);
106
107 return mapsize;
108 }
109
110 /*
111 * Marks a particular physical memory range as unallocatable. Usable RAM
112 * might be used for boot-time allocations - or it might get added
113 * to the free page pool later on.
114 */
115 static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
116 unsigned long addr, unsigned long size, int flags)
117 {
118 unsigned long sidx, eidx;
119 unsigned long i;
120
121 BUG_ON(!size);
122
123 /* out of range, don't hold other */
124 if (addr + size < bdata->node_boot_start ||
125 PFN_DOWN(addr) > bdata->node_low_pfn)
126 return 0;
127
128 /*
129 * Round up to index to the range.
130 */
131 if (addr > bdata->node_boot_start)
132 sidx= PFN_DOWN(addr - bdata->node_boot_start);
133 else
134 sidx = 0;
135
136 eidx = PFN_UP(addr + size - bdata->node_boot_start);
137 if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
138 eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
139
140 for (i = sidx; i < eidx; i++) {
141 if (test_bit(i, bdata->node_bootmem_map)) {
142 if (flags & BOOTMEM_EXCLUSIVE)
143 return -EBUSY;
144 }
145 }
146
147 return 0;
148
149 }
150
151 static void __init reserve_bootmem_core(bootmem_data_t *bdata,
152 unsigned long addr, unsigned long size, int flags)
153 {
154 unsigned long sidx, eidx;
155 unsigned long i;
156
157 BUG_ON(!size);
158
159 /* out of range */
160 if (addr + size < bdata->node_boot_start ||
161 PFN_DOWN(addr) > bdata->node_low_pfn)
162 return;
163
164 /*
165 * Round up to index to the range.
166 */
167 if (addr > bdata->node_boot_start)
168 sidx= PFN_DOWN(addr - bdata->node_boot_start);
169 else
170 sidx = 0;
171
172 eidx = PFN_UP(addr + size - bdata->node_boot_start);
173 if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
174 eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
175
176 for (i = sidx; i < eidx; i++) {
177 if (test_and_set_bit(i, bdata->node_bootmem_map)) {
178 #ifdef CONFIG_DEBUG_BOOTMEM
179 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
180 #endif
181 }
182 }
183 }
184
185 static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
186 unsigned long size)
187 {
188 unsigned long sidx, eidx;
189 unsigned long i;
190
191 BUG_ON(!size);
192
193 /* out range */
194 if (addr + size < bdata->node_boot_start ||
195 PFN_DOWN(addr) > bdata->node_low_pfn)
196 return;
197 /*
198 * round down end of usable mem, partially free pages are
199 * considered reserved.
200 */
201
202 if (addr >= bdata->node_boot_start && addr < bdata->last_success)
203 bdata->last_success = addr;
204
205 /*
206 * Round up to index to the range.
207 */
208 if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
209 sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
210 else
211 sidx = 0;
212
213 eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
214 if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
215 eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
216
217 for (i = sidx; i < eidx; i++) {
218 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
219 BUG();
220 }
221 }
222
223 /*
224 * We 'merge' subsequent allocations to save space. We might 'lose'
225 * some fraction of a page if allocations cannot be satisfied due to
226 * size constraints on boxes where there is physical RAM space
227 * fragmentation - in these cases (mostly large memory boxes) this
228 * is not a problem.
229 *
230 * On low memory boxes we get it right in 100% of the cases.
231 *
232 * alignment has to be a power of 2 value.
233 *
234 * NOTE: This function is _not_ reentrant.
235 */
236 void * __init
237 __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
238 unsigned long align, unsigned long goal, unsigned long limit)
239 {
240 unsigned long areasize, preferred;
241 unsigned long i, start = 0, incr, eidx, end_pfn;
242 void *ret;
243 unsigned long node_boot_start;
244 void *node_bootmem_map;
245
246 if (!size) {
247 printk("__alloc_bootmem_core(): zero-sized request\n");
248 BUG();
249 }
250 BUG_ON(align & (align-1));
251
252 /* on nodes without memory - bootmem_map is NULL */
253 if (!bdata->node_bootmem_map)
254 return NULL;
255
256 /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
257 node_boot_start = bdata->node_boot_start;
258 node_bootmem_map = bdata->node_bootmem_map;
259 if (align) {
260 node_boot_start = ALIGN(bdata->node_boot_start, align);
261 if (node_boot_start > bdata->node_boot_start)
262 node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
263 PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
264 }
265
266 if (limit && node_boot_start >= limit)
267 return NULL;
268
269 end_pfn = bdata->node_low_pfn;
270 limit = PFN_DOWN(limit);
271 if (limit && end_pfn > limit)
272 end_pfn = limit;
273
274 eidx = end_pfn - PFN_DOWN(node_boot_start);
275
276 /*
277 * We try to allocate bootmem pages above 'goal'
278 * first, then we try to allocate lower pages.
279 */
280 preferred = 0;
281 if (goal && PFN_DOWN(goal) < end_pfn) {
282 if (goal > node_boot_start)
283 preferred = goal - node_boot_start;
284
285 if (bdata->last_success > node_boot_start &&
286 bdata->last_success - node_boot_start >= preferred)
287 if (!limit || (limit && limit > bdata->last_success))
288 preferred = bdata->last_success - node_boot_start;
289 }
290
291 preferred = PFN_DOWN(ALIGN(preferred, align));
292 areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
293 incr = align >> PAGE_SHIFT ? : 1;
294
295 restart_scan:
296 for (i = preferred; i < eidx;) {
297 unsigned long j;
298
299 i = find_next_zero_bit(node_bootmem_map, eidx, i);
300 i = ALIGN(i, incr);
301 if (i >= eidx)
302 break;
303 if (test_bit(i, node_bootmem_map)) {
304 i += incr;
305 continue;
306 }
307 for (j = i + 1; j < i + areasize; ++j) {
308 if (j >= eidx)
309 goto fail_block;
310 if (test_bit(j, node_bootmem_map))
311 goto fail_block;
312 }
313 start = i;
314 goto found;
315 fail_block:
316 i = ALIGN(j, incr);
317 if (i == j)
318 i += incr;
319 }
320
321 if (preferred > 0) {
322 preferred = 0;
323 goto restart_scan;
324 }
325 return NULL;
326
327 found:
328 bdata->last_success = PFN_PHYS(start) + node_boot_start;
329 BUG_ON(start >= eidx);
330
331 /*
332 * Is the next page of the previous allocation-end the start
333 * of this allocation's buffer? If yes then we can 'merge'
334 * the previous partial page with this allocation.
335 */
336 if (align < PAGE_SIZE &&
337 bdata->last_offset && bdata->last_pos+1 == start) {
338 unsigned long offset, remaining_size;
339 offset = ALIGN(bdata->last_offset, align);
340 BUG_ON(offset > PAGE_SIZE);
341 remaining_size = PAGE_SIZE - offset;
342 if (size < remaining_size) {
343 areasize = 0;
344 /* last_pos unchanged */
345 bdata->last_offset = offset + size;
346 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
347 offset + node_boot_start);
348 } else {
349 remaining_size = size - remaining_size;
350 areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
351 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
352 offset + node_boot_start);
353 bdata->last_pos = start + areasize - 1;
354 bdata->last_offset = remaining_size;
355 }
356 bdata->last_offset &= ~PAGE_MASK;
357 } else {
358 bdata->last_pos = start + areasize - 1;
359 bdata->last_offset = size & ~PAGE_MASK;
360 ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
361 }
362
363 /*
364 * Reserve the area now:
365 */
366 for (i = start; i < start + areasize; i++)
367 if (unlikely(test_and_set_bit(i, node_bootmem_map)))
368 BUG();
369 memset(ret, 0, size);
370 return ret;
371 }
372
373 static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
374 {
375 struct page *page;
376 unsigned long pfn;
377 bootmem_data_t *bdata = pgdat->bdata;
378 unsigned long i, count, total = 0;
379 unsigned long idx;
380 unsigned long *map;
381 int gofast = 0;
382
383 BUG_ON(!bdata->node_bootmem_map);
384
385 count = 0;
386 /* first extant page of the node */
387 pfn = PFN_DOWN(bdata->node_boot_start);
388 idx = bdata->node_low_pfn - pfn;
389 map = bdata->node_bootmem_map;
390 /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
391 if (bdata->node_boot_start == 0 ||
392 ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG))
393 gofast = 1;
394 for (i = 0; i < idx; ) {
395 unsigned long v = ~map[i / BITS_PER_LONG];
396
397 if (gofast && v == ~0UL) {
398 int order;
399
400 page = pfn_to_page(pfn);
401 count += BITS_PER_LONG;
402 order = ffs(BITS_PER_LONG) - 1;
403 __free_pages_bootmem(page, order);
404 i += BITS_PER_LONG;
405 page += BITS_PER_LONG;
406 } else if (v) {
407 unsigned long m;
408
409 page = pfn_to_page(pfn);
410 for (m = 1; m && i < idx; m<<=1, page++, i++) {
411 if (v & m) {
412 count++;
413 __free_pages_bootmem(page, 0);
414 }
415 }
416 } else {
417 i += BITS_PER_LONG;
418 }
419 pfn += BITS_PER_LONG;
420 }
421 total += count;
422
423 /*
424 * Now free the allocator bitmap itself, it's not
425 * needed anymore:
426 */
427 page = virt_to_page(bdata->node_bootmem_map);
428 count = 0;
429 idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
430 for (i = 0; i < idx; i++, page++) {
431 __free_pages_bootmem(page, 0);
432 count++;
433 }
434 total += count;
435 bdata->node_bootmem_map = NULL;
436
437 return total;
438 }
439
440 unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
441 unsigned long startpfn, unsigned long endpfn)
442 {
443 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
444 }
445
446 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
447 unsigned long size, int flags)
448 {
449 int ret;
450
451 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
452 if (ret < 0)
453 return -ENOMEM;
454 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
455
456 return 0;
457 }
458
459 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
460 unsigned long size)
461 {
462 free_bootmem_core(pgdat->bdata, physaddr, size);
463 }
464
465 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
466 {
467 register_page_bootmem_info_node(pgdat);
468 return free_all_bootmem_core(pgdat);
469 }
470
471 unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
472 {
473 max_low_pfn = pages;
474 min_low_pfn = start;
475 return init_bootmem_core(NODE_DATA(0), start, 0, pages);
476 }
477
478 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
479 int __init reserve_bootmem(unsigned long addr, unsigned long size,
480 int flags)
481 {
482 bootmem_data_t *bdata;
483 int ret;
484
485 list_for_each_entry(bdata, &bdata_list, list) {
486 ret = can_reserve_bootmem_core(bdata, addr, size, flags);
487 if (ret < 0)
488 return ret;
489 }
490 list_for_each_entry(bdata, &bdata_list, list)
491 reserve_bootmem_core(bdata, addr, size, flags);
492
493 return 0;
494 }
495 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
496
497 void __init free_bootmem(unsigned long addr, unsigned long size)
498 {
499 bootmem_data_t *bdata;
500 list_for_each_entry(bdata, &bdata_list, list)
501 free_bootmem_core(bdata, addr, size);
502 }
503
504 unsigned long __init free_all_bootmem(void)
505 {
506 return free_all_bootmem_core(NODE_DATA(0));
507 }
508
509 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
510 unsigned long goal)
511 {
512 bootmem_data_t *bdata;
513 void *ptr;
514
515 list_for_each_entry(bdata, &bdata_list, list) {
516 ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
517 if (ptr)
518 return ptr;
519 }
520 return NULL;
521 }
522
523 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
524 unsigned long goal)
525 {
526 void *mem = __alloc_bootmem_nopanic(size,align,goal);
527
528 if (mem)
529 return mem;
530 /*
531 * Whoops, we cannot satisfy the allocation request.
532 */
533 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
534 panic("Out of memory");
535 return NULL;
536 }
537
538
539 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
540 unsigned long align, unsigned long goal)
541 {
542 void *ptr;
543
544 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
545 if (ptr)
546 return ptr;
547
548 return __alloc_bootmem(size, align, goal);
549 }
550
551 #ifdef CONFIG_SPARSEMEM
552 void * __init alloc_bootmem_section(unsigned long size,
553 unsigned long section_nr)
554 {
555 void *ptr;
556 unsigned long limit, goal, start_nr, end_nr, pfn;
557 struct pglist_data *pgdat;
558
559 pfn = section_nr_to_pfn(section_nr);
560 goal = PFN_PHYS(pfn);
561 limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
562 pgdat = NODE_DATA(early_pfn_to_nid(pfn));
563 ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
564 limit);
565
566 if (!ptr)
567 return NULL;
568
569 start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
570 end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
571 if (start_nr != section_nr || end_nr != section_nr) {
572 printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
573 section_nr);
574 free_bootmem_core(pgdat->bdata, __pa(ptr), size);
575 ptr = NULL;
576 }
577
578 return ptr;
579 }
580 #endif
581
582 #ifndef ARCH_LOW_ADDRESS_LIMIT
583 #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
584 #endif
585
586 void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
587 unsigned long goal)
588 {
589 bootmem_data_t *bdata;
590 void *ptr;
591
592 list_for_each_entry(bdata, &bdata_list, list) {
593 ptr = __alloc_bootmem_core(bdata, size, align, goal,
594 ARCH_LOW_ADDRESS_LIMIT);
595 if (ptr)
596 return ptr;
597 }
598
599 /*
600 * Whoops, we cannot satisfy the allocation request.
601 */
602 printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size);
603 panic("Out of low memory");
604 return NULL;
605 }
606
607 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
608 unsigned long align, unsigned long goal)
609 {
610 return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
611 ARCH_LOW_ADDRESS_LIMIT);
612 }
This page took 0.070692 seconds and 6 git commands to generate.