2 * Procedures for maintaining information about logical memory blocks.
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/bitops.h>
17 #include <linux/poison.h>
18 #include <linux/pfn.h>
19 #include <linux/debugfs.h>
20 #include <linux/seq_file.h>
21 #include <linux/memblock.h>
23 struct memblock memblock __initdata_memblock
;
25 int memblock_debug __initdata_memblock
;
26 int memblock_can_resize __initdata_memblock
;
27 static struct memblock_region memblock_memory_init_regions
[INIT_MEMBLOCK_REGIONS
+ 1] __initdata_memblock
;
28 static struct memblock_region memblock_reserved_init_regions
[INIT_MEMBLOCK_REGIONS
+ 1] __initdata_memblock
;
30 /* inline so we don't get a warning when pr_debug is compiled out */
31 static inline const char *memblock_type_name(struct memblock_type
*type
)
33 if (type
== &memblock
.memory
)
35 else if (type
== &memblock
.reserved
)
42 * Address comparison utilities
44 static unsigned long __init_memblock
memblock_addrs_overlap(phys_addr_t base1
, phys_addr_t size1
,
45 phys_addr_t base2
, phys_addr_t size2
)
47 return ((base1
< (base2
+ size2
)) && (base2
< (base1
+ size1
)));
50 long __init_memblock
memblock_overlaps_region(struct memblock_type
*type
, phys_addr_t base
, phys_addr_t size
)
54 for (i
= 0; i
< type
->cnt
; i
++) {
55 phys_addr_t rgnbase
= type
->regions
[i
].base
;
56 phys_addr_t rgnsize
= type
->regions
[i
].size
;
57 if (memblock_addrs_overlap(base
, size
, rgnbase
, rgnsize
))
61 return (i
< type
->cnt
) ? i
: -1;
65 * Find, allocate, deallocate or reserve unreserved regions. All allocations
69 static phys_addr_t __init_memblock
memblock_find_region(phys_addr_t start
, phys_addr_t end
,
70 phys_addr_t size
, phys_addr_t align
)
72 phys_addr_t base
, res_base
;
75 /* In case, huge size is requested */
79 base
= round_down(end
- size
, align
);
81 /* Prevent allocations returning 0 as it's also used to
82 * indicate an allocation failure
87 while (start
<= base
) {
88 j
= memblock_overlaps_region(&memblock
.reserved
, base
, size
);
91 res_base
= memblock
.reserved
.regions
[j
].base
;
94 base
= round_down(res_base
- size
, align
);
100 static phys_addr_t __init_memblock
memblock_find_base(phys_addr_t size
,
101 phys_addr_t align
, phys_addr_t start
, phys_addr_t end
)
107 /* Pump up max_addr */
108 if (end
== MEMBLOCK_ALLOC_ACCESSIBLE
)
109 end
= memblock
.current_limit
;
111 /* We do a top-down search, this tends to limit memory
112 * fragmentation by keeping early boot allocs near the
115 for (i
= memblock
.memory
.cnt
- 1; i
>= 0; i
--) {
116 phys_addr_t memblockbase
= memblock
.memory
.regions
[i
].base
;
117 phys_addr_t memblocksize
= memblock
.memory
.regions
[i
].size
;
118 phys_addr_t bottom
, top
, found
;
120 if (memblocksize
< size
)
122 if ((memblockbase
+ memblocksize
) <= start
)
124 bottom
= max(memblockbase
, start
);
125 top
= min(memblockbase
+ memblocksize
, end
);
128 found
= memblock_find_region(bottom
, top
, size
, align
);
136 * Find a free area with specified alignment in a specific range.
138 u64 __init_memblock
memblock_find_in_range(u64 start
, u64 end
, u64 size
, u64 align
)
140 return memblock_find_base(size
, align
, start
, end
);
144 * Free memblock.reserved.regions
146 int __init_memblock
memblock_free_reserved_regions(void)
148 if (memblock
.reserved
.regions
== memblock_reserved_init_regions
)
151 return memblock_free(__pa(memblock
.reserved
.regions
),
152 sizeof(struct memblock_region
) * memblock
.reserved
.max
);
156 * Reserve memblock.reserved.regions
158 int __init_memblock
memblock_reserve_reserved_regions(void)
160 if (memblock
.reserved
.regions
== memblock_reserved_init_regions
)
163 return memblock_reserve(__pa(memblock
.reserved
.regions
),
164 sizeof(struct memblock_region
) * memblock
.reserved
.max
);
167 static void __init_memblock
memblock_remove_region(struct memblock_type
*type
, unsigned long r
)
171 for (i
= r
; i
< type
->cnt
- 1; i
++) {
172 type
->regions
[i
].base
= type
->regions
[i
+ 1].base
;
173 type
->regions
[i
].size
= type
->regions
[i
+ 1].size
;
177 /* Special case for empty arrays */
178 if (type
->cnt
== 0) {
180 type
->regions
[0].base
= 0;
181 type
->regions
[0].size
= 0;
185 /* Defined below but needed now */
186 static long memblock_add_region(struct memblock_type
*type
, phys_addr_t base
, phys_addr_t size
);
188 static int __init_memblock
memblock_double_array(struct memblock_type
*type
)
190 struct memblock_region
*new_array
, *old_array
;
191 phys_addr_t old_size
, new_size
, addr
;
192 int use_slab
= slab_is_available();
194 /* We don't allow resizing until we know about the reserved regions
195 * of memory that aren't suitable for allocation
197 if (!memblock_can_resize
)
200 /* Calculate new doubled size */
201 old_size
= type
->max
* sizeof(struct memblock_region
);
202 new_size
= old_size
<< 1;
204 /* Try to find some space for it.
206 * WARNING: We assume that either slab_is_available() and we use it or
207 * we use MEMBLOCK for allocations. That means that this is unsafe to use
208 * when bootmem is currently active (unless bootmem itself is implemented
209 * on top of MEMBLOCK which isn't the case yet)
211 * This should however not be an issue for now, as we currently only
212 * call into MEMBLOCK while it's still active, or much later when slab is
213 * active for memory hotplug operations
216 new_array
= kmalloc(new_size
, GFP_KERNEL
);
217 addr
= new_array
? __pa(new_array
) : 0;
219 addr
= memblock_find_base(new_size
, sizeof(phys_addr_t
), 0, MEMBLOCK_ALLOC_ACCESSIBLE
);
221 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
222 memblock_type_name(type
), type
->max
, type
->max
* 2);
225 new_array
= __va(addr
);
227 memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]",
228 memblock_type_name(type
), type
->max
* 2, (u64
)addr
, (u64
)addr
+ new_size
- 1);
230 /* Found space, we now need to move the array over before
231 * we add the reserved region since it may be our reserved
232 * array itself that is full.
234 memcpy(new_array
, type
->regions
, old_size
);
235 memset(new_array
+ type
->max
, 0, old_size
);
236 old_array
= type
->regions
;
237 type
->regions
= new_array
;
240 /* If we use SLAB that's it, we are done */
244 /* Add the new reserved region now. Should not fail ! */
245 BUG_ON(memblock_add_region(&memblock
.reserved
, addr
, new_size
));
247 /* If the array wasn't our static init one, then free it. We only do
248 * that before SLAB is available as later on, we don't know whether
249 * to use kfree or free_bootmem_pages(). Shouldn't be a big deal
252 if (old_array
!= memblock_memory_init_regions
&&
253 old_array
!= memblock_reserved_init_regions
)
254 memblock_free(__pa(old_array
), old_size
);
259 extern int __init_memblock __weak
memblock_memory_can_coalesce(phys_addr_t addr1
, phys_addr_t size1
,
260 phys_addr_t addr2
, phys_addr_t size2
)
265 static long __init_memblock
memblock_add_region(struct memblock_type
*type
,
266 phys_addr_t base
, phys_addr_t size
)
268 phys_addr_t end
= base
+ size
;
271 /* First try and coalesce this MEMBLOCK with others */
272 for (i
= 0; i
< type
->cnt
; i
++) {
273 struct memblock_region
*rgn
= &type
->regions
[i
];
274 phys_addr_t rend
= rgn
->base
+ rgn
->size
;
276 /* Exit if there's no possible hits */
277 if (rgn
->base
> end
|| rgn
->size
== 0)
280 /* Check if we are fully enclosed within an existing
283 if (rgn
->base
<= base
&& rend
>= end
)
286 /* Check if we overlap or are adjacent with the bottom
289 if (base
< rgn
->base
&& end
>= rgn
->base
) {
290 /* If we can't coalesce, create a new block */
291 if (!memblock_memory_can_coalesce(base
, size
,
294 /* Overlap & can't coalesce are mutually
295 * exclusive, if you do that, be prepared
298 WARN_ON(end
!= rgn
->base
);
301 /* We extend the bottom of the block down to our
305 rgn
->size
= rend
- base
;
307 /* Return if we have nothing else to allocate
313 /* We continue processing from the end of the
320 /* Now check if we overlap or are adjacent with the
323 if (base
<= rend
&& end
>= rend
) {
324 /* If we can't coalesce, create a new block */
325 if (!memblock_memory_can_coalesce(rgn
->base
,
328 /* Overlap & can't coalesce are mutually
329 * exclusive, if you do that, be prepared
332 WARN_ON(rend
!= base
);
335 /* We adjust our base down to enclose the
336 * original block and destroy it. It will be
337 * part of our new allocation. Since we've
338 * freed an entry, we know we won't fail
339 * to allocate one later, so we won't risk
340 * losing the original block allocation.
342 size
+= (base
- rgn
->base
);
344 memblock_remove_region(type
, i
--);
348 /* If the array is empty, special case, replace the fake
349 * filler region and return
351 if ((type
->cnt
== 1) && (type
->regions
[0].size
== 0)) {
352 type
->regions
[0].base
= base
;
353 type
->regions
[0].size
= size
;
358 /* If we are out of space, we fail. It's too late to resize the array
359 * but then this shouldn't have happened in the first place.
361 if (WARN_ON(type
->cnt
>= type
->max
))
364 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
365 for (i
= type
->cnt
- 1; i
>= 0; i
--) {
366 if (base
< type
->regions
[i
].base
) {
367 type
->regions
[i
+1].base
= type
->regions
[i
].base
;
368 type
->regions
[i
+1].size
= type
->regions
[i
].size
;
370 type
->regions
[i
+1].base
= base
;
371 type
->regions
[i
+1].size
= size
;
376 if (base
< type
->regions
[0].base
) {
377 type
->regions
[0].base
= base
;
378 type
->regions
[0].size
= size
;
383 /* The array is full ? Try to resize it. If that fails, we undo
384 * our allocation and return an error
386 if (type
->cnt
== type
->max
&& memblock_double_array(type
)) {
388 memblock_remove_region(type
, slot
);
395 long __init_memblock
memblock_add(phys_addr_t base
, phys_addr_t size
)
397 return memblock_add_region(&memblock
.memory
, base
, size
);
401 static long __init_memblock
__memblock_remove(struct memblock_type
*type
,
402 phys_addr_t base
, phys_addr_t size
)
404 phys_addr_t end
= base
+ size
;
407 /* Walk through the array for collisions */
408 for (i
= 0; i
< type
->cnt
; i
++) {
409 struct memblock_region
*rgn
= &type
->regions
[i
];
410 phys_addr_t rend
= rgn
->base
+ rgn
->size
;
412 /* Nothing more to do, exit */
413 if (rgn
->base
> end
|| rgn
->size
== 0)
416 /* If we fully enclose the block, drop it */
417 if (base
<= rgn
->base
&& end
>= rend
) {
418 memblock_remove_region(type
, i
--);
422 /* If we are fully enclosed within a block
423 * then we need to split it and we are done
425 if (base
> rgn
->base
&& end
< rend
) {
426 rgn
->size
= base
- rgn
->base
;
427 if (!memblock_add_region(type
, end
, rend
- end
))
429 /* Failure to split is bad, we at least
430 * restore the block before erroring
432 rgn
->size
= rend
- rgn
->base
;
437 /* Check if we need to trim the bottom of a block */
438 if (rgn
->base
< end
&& rend
> end
) {
439 rgn
->size
-= end
- rgn
->base
;
444 /* And check if we need to trim the top of a block */
446 rgn
->size
-= rend
- base
;
452 long __init_memblock
memblock_remove(phys_addr_t base
, phys_addr_t size
)
454 return __memblock_remove(&memblock
.memory
, base
, size
);
457 long __init_memblock
memblock_free(phys_addr_t base
, phys_addr_t size
)
459 return __memblock_remove(&memblock
.reserved
, base
, size
);
462 long __init_memblock
memblock_reserve(phys_addr_t base
, phys_addr_t size
)
464 struct memblock_type
*_rgn
= &memblock
.reserved
;
468 return memblock_add_region(_rgn
, base
, size
);
471 phys_addr_t __init
__memblock_alloc_base(phys_addr_t size
, phys_addr_t align
, phys_addr_t max_addr
)
475 /* We align the size to limit fragmentation. Without this, a lot of
476 * small allocs quickly eat up the whole reserve array on sparc
478 size
= round_up(size
, align
);
480 found
= memblock_find_base(size
, align
, 0, max_addr
);
481 if (found
&& !memblock_add_region(&memblock
.reserved
, found
, size
))
487 phys_addr_t __init
memblock_alloc_base(phys_addr_t size
, phys_addr_t align
, phys_addr_t max_addr
)
491 alloc
= __memblock_alloc_base(size
, align
, max_addr
);
494 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
495 (unsigned long long) size
, (unsigned long long) max_addr
);
500 phys_addr_t __init
memblock_alloc(phys_addr_t size
, phys_addr_t align
)
502 return memblock_alloc_base(size
, align
, MEMBLOCK_ALLOC_ACCESSIBLE
);
507 * Additional node-local allocators. Search for node memory is bottom up
508 * and walks memblock regions within that node bottom-up as well, but allocation
509 * within an memblock region is top-down. XXX I plan to fix that at some stage
511 * WARNING: Only available after early_node_map[] has been populated,
512 * on some architectures, that is after all the calls to add_active_range()
513 * have been done to populate it.
516 phys_addr_t __weak __init
memblock_nid_range(phys_addr_t start
, phys_addr_t end
, int *nid
)
518 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
520 * This code originates from sparc which really wants use to walk by addresses
521 * and returns the nid. This is not very convenient for early_pfn_map[] users
522 * as the map isn't sorted yet, and it really wants to be walked by nid.
524 * For now, I implement the inefficient method below which walks the early
525 * map multiple times. Eventually we may want to use an ARCH config option
526 * to implement a completely different method for both case.
528 unsigned long start_pfn
, end_pfn
;
531 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
532 get_pfn_range_for_nid(i
, &start_pfn
, &end_pfn
);
533 if (start
< PFN_PHYS(start_pfn
) || start
>= PFN_PHYS(end_pfn
))
536 return min(end
, PFN_PHYS(end_pfn
));
544 static phys_addr_t __init
memblock_alloc_nid_region(struct memblock_region
*mp
,
546 phys_addr_t align
, int nid
)
548 phys_addr_t start
, end
;
551 end
= start
+ mp
->size
;
553 start
= round_up(start
, align
);
554 while (start
< end
) {
555 phys_addr_t this_end
;
558 this_end
= memblock_nid_range(start
, end
, &this_nid
);
559 if (this_nid
== nid
) {
560 phys_addr_t ret
= memblock_find_region(start
, this_end
, size
, align
);
562 !memblock_add_region(&memblock
.reserved
, ret
, size
))
571 phys_addr_t __init
memblock_alloc_nid(phys_addr_t size
, phys_addr_t align
, int nid
)
573 struct memblock_type
*mem
= &memblock
.memory
;
578 /* We align the size to limit fragmentation. Without this, a lot of
579 * small allocs quickly eat up the whole reserve array on sparc
581 size
= round_up(size
, align
);
583 /* We do a bottom-up search for a region with the right
584 * nid since that's easier considering how memblock_nid_range()
587 for (i
= 0; i
< mem
->cnt
; i
++) {
588 phys_addr_t ret
= memblock_alloc_nid_region(&mem
->regions
[i
],
597 phys_addr_t __init
memblock_alloc_try_nid(phys_addr_t size
, phys_addr_t align
, int nid
)
599 phys_addr_t res
= memblock_alloc_nid(size
, align
, nid
);
603 return memblock_alloc_base(size
, align
, MEMBLOCK_ALLOC_ACCESSIBLE
);
608 * Remaining API functions
611 /* You must call memblock_analyze() before this. */
612 phys_addr_t __init
memblock_phys_mem_size(void)
614 return memblock
.memory_size
;
617 phys_addr_t __init_memblock
memblock_end_of_DRAM(void)
619 int idx
= memblock
.memory
.cnt
- 1;
621 return (memblock
.memory
.regions
[idx
].base
+ memblock
.memory
.regions
[idx
].size
);
624 /* You must call memblock_analyze() after this. */
625 void __init
memblock_enforce_memory_limit(phys_addr_t memory_limit
)
629 struct memblock_region
*p
;
634 /* Truncate the memblock regions to satisfy the memory limit. */
635 limit
= memory_limit
;
636 for (i
= 0; i
< memblock
.memory
.cnt
; i
++) {
637 if (limit
> memblock
.memory
.regions
[i
].size
) {
638 limit
-= memblock
.memory
.regions
[i
].size
;
642 memblock
.memory
.regions
[i
].size
= limit
;
643 memblock
.memory
.cnt
= i
+ 1;
647 memory_limit
= memblock_end_of_DRAM();
649 /* And truncate any reserves above the limit also. */
650 for (i
= 0; i
< memblock
.reserved
.cnt
; i
++) {
651 p
= &memblock
.reserved
.regions
[i
];
653 if (p
->base
> memory_limit
)
655 else if ((p
->base
+ p
->size
) > memory_limit
)
656 p
->size
= memory_limit
- p
->base
;
659 memblock_remove_region(&memblock
.reserved
, i
);
665 static int __init_memblock
memblock_search(struct memblock_type
*type
, phys_addr_t addr
)
667 unsigned int left
= 0, right
= type
->cnt
;
670 unsigned int mid
= (right
+ left
) / 2;
672 if (addr
< type
->regions
[mid
].base
)
674 else if (addr
>= (type
->regions
[mid
].base
+
675 type
->regions
[mid
].size
))
679 } while (left
< right
);
683 int __init
memblock_is_reserved(phys_addr_t addr
)
685 return memblock_search(&memblock
.reserved
, addr
) != -1;
688 int __init_memblock
memblock_is_memory(phys_addr_t addr
)
690 return memblock_search(&memblock
.memory
, addr
) != -1;
693 int __init_memblock
memblock_is_region_memory(phys_addr_t base
, phys_addr_t size
)
695 int idx
= memblock_search(&memblock
.memory
, base
);
699 return memblock
.memory
.regions
[idx
].base
<= base
&&
700 (memblock
.memory
.regions
[idx
].base
+
701 memblock
.memory
.regions
[idx
].size
) >= (base
+ size
);
704 int __init_memblock
memblock_is_region_reserved(phys_addr_t base
, phys_addr_t size
)
706 return memblock_overlaps_region(&memblock
.reserved
, base
, size
) >= 0;
710 void __init_memblock
memblock_set_current_limit(phys_addr_t limit
)
712 memblock
.current_limit
= limit
;
715 static void __init_memblock
memblock_dump(struct memblock_type
*region
, char *name
)
717 unsigned long long base
, size
;
720 pr_info(" %s.cnt = 0x%lx\n", name
, region
->cnt
);
722 for (i
= 0; i
< region
->cnt
; i
++) {
723 base
= region
->regions
[i
].base
;
724 size
= region
->regions
[i
].size
;
726 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n",
727 name
, i
, base
, base
+ size
- 1, size
);
731 void __init_memblock
memblock_dump_all(void)
736 pr_info("MEMBLOCK configuration:\n");
737 pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock
.memory_size
);
739 memblock_dump(&memblock
.memory
, "memory");
740 memblock_dump(&memblock
.reserved
, "reserved");
743 void __init
memblock_analyze(void)
747 /* Check marker in the unused last array entry */
748 WARN_ON(memblock_memory_init_regions
[INIT_MEMBLOCK_REGIONS
].base
749 != (phys_addr_t
)RED_INACTIVE
);
750 WARN_ON(memblock_reserved_init_regions
[INIT_MEMBLOCK_REGIONS
].base
751 != (phys_addr_t
)RED_INACTIVE
);
753 memblock
.memory_size
= 0;
755 for (i
= 0; i
< memblock
.memory
.cnt
; i
++)
756 memblock
.memory_size
+= memblock
.memory
.regions
[i
].size
;
758 /* We allow resizing from there */
759 memblock_can_resize
= 1;
762 void __init
memblock_init(void)
764 static int init_done __initdata
= 0;
770 /* Hookup the initial arrays */
771 memblock
.memory
.regions
= memblock_memory_init_regions
;
772 memblock
.memory
.max
= INIT_MEMBLOCK_REGIONS
;
773 memblock
.reserved
.regions
= memblock_reserved_init_regions
;
774 memblock
.reserved
.max
= INIT_MEMBLOCK_REGIONS
;
776 /* Write a marker in the unused last array entry */
777 memblock
.memory
.regions
[INIT_MEMBLOCK_REGIONS
].base
= (phys_addr_t
)RED_INACTIVE
;
778 memblock
.reserved
.regions
[INIT_MEMBLOCK_REGIONS
].base
= (phys_addr_t
)RED_INACTIVE
;
780 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
781 * This simplifies the memblock_add() code below...
783 memblock
.memory
.regions
[0].base
= 0;
784 memblock
.memory
.regions
[0].size
= 0;
785 memblock
.memory
.cnt
= 1;
788 memblock
.reserved
.regions
[0].base
= 0;
789 memblock
.reserved
.regions
[0].size
= 0;
790 memblock
.reserved
.cnt
= 1;
792 memblock
.current_limit
= MEMBLOCK_ALLOC_ANYWHERE
;
795 static int __init
early_memblock(char *p
)
797 if (p
&& strstr(p
, "debug"))
801 early_param("memblock", early_memblock
);
803 #if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK)
805 static int memblock_debug_show(struct seq_file
*m
, void *private)
807 struct memblock_type
*type
= m
->private;
808 struct memblock_region
*reg
;
811 for (i
= 0; i
< type
->cnt
; i
++) {
812 reg
= &type
->regions
[i
];
813 seq_printf(m
, "%4d: ", i
);
814 if (sizeof(phys_addr_t
) == 4)
815 seq_printf(m
, "0x%08lx..0x%08lx\n",
816 (unsigned long)reg
->base
,
817 (unsigned long)(reg
->base
+ reg
->size
- 1));
819 seq_printf(m
, "0x%016llx..0x%016llx\n",
820 (unsigned long long)reg
->base
,
821 (unsigned long long)(reg
->base
+ reg
->size
- 1));
827 static int memblock_debug_open(struct inode
*inode
, struct file
*file
)
829 return single_open(file
, memblock_debug_show
, inode
->i_private
);
832 static const struct file_operations memblock_debug_fops
= {
833 .open
= memblock_debug_open
,
836 .release
= single_release
,
839 static int __init
memblock_init_debugfs(void)
841 struct dentry
*root
= debugfs_create_dir("memblock", NULL
);
844 debugfs_create_file("memory", S_IRUGO
, root
, &memblock
.memory
, &memblock_debug_fops
);
845 debugfs_create_file("reserved", S_IRUGO
, root
, &memblock
.reserved
, &memblock_debug_fops
);
849 __initcall(memblock_init_debugfs
);
851 #endif /* CONFIG_DEBUG_FS */