2 * Procedures for maintaining information about logical memory blocks.
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/bitops.h>
17 #include <linux/poison.h>
18 #include <linux/pfn.h>
19 #include <linux/debugfs.h>
20 #include <linux/seq_file.h>
21 #include <linux/memblock.h>
23 struct memblock memblock
;
25 static int memblock_debug
, memblock_can_resize
;
26 static struct memblock_region memblock_memory_init_regions
[INIT_MEMBLOCK_REGIONS
+ 1];
27 static struct memblock_region memblock_reserved_init_regions
[INIT_MEMBLOCK_REGIONS
+ 1];
29 #define MEMBLOCK_ERROR (~(phys_addr_t)0)
31 /* inline so we don't get a warning when pr_debug is compiled out */
32 static inline const char *memblock_type_name(struct memblock_type
*type
)
34 if (type
== &memblock
.memory
)
36 else if (type
== &memblock
.reserved
)
43 * Address comparison utilities
46 static phys_addr_t
memblock_align_down(phys_addr_t addr
, phys_addr_t size
)
48 return addr
& ~(size
- 1);
51 static phys_addr_t
memblock_align_up(phys_addr_t addr
, phys_addr_t size
)
53 return (addr
+ (size
- 1)) & ~(size
- 1);
56 static unsigned long memblock_addrs_overlap(phys_addr_t base1
, phys_addr_t size1
,
57 phys_addr_t base2
, phys_addr_t size2
)
59 return ((base1
< (base2
+ size2
)) && (base2
< (base1
+ size1
)));
62 static long memblock_addrs_adjacent(phys_addr_t base1
, phys_addr_t size1
,
63 phys_addr_t base2
, phys_addr_t size2
)
65 if (base2
== base1
+ size1
)
67 else if (base1
== base2
+ size2
)
73 static long memblock_regions_adjacent(struct memblock_type
*type
,
74 unsigned long r1
, unsigned long r2
)
76 phys_addr_t base1
= type
->regions
[r1
].base
;
77 phys_addr_t size1
= type
->regions
[r1
].size
;
78 phys_addr_t base2
= type
->regions
[r2
].base
;
79 phys_addr_t size2
= type
->regions
[r2
].size
;
81 return memblock_addrs_adjacent(base1
, size1
, base2
, size2
);
84 long memblock_overlaps_region(struct memblock_type
*type
, phys_addr_t base
, phys_addr_t size
)
88 for (i
= 0; i
< type
->cnt
; i
++) {
89 phys_addr_t rgnbase
= type
->regions
[i
].base
;
90 phys_addr_t rgnsize
= type
->regions
[i
].size
;
91 if (memblock_addrs_overlap(base
, size
, rgnbase
, rgnsize
))
95 return (i
< type
->cnt
) ? i
: -1;
99 * Find, allocate, deallocate or reserve unreserved regions. All allocations
103 static phys_addr_t __init
memblock_find_region(phys_addr_t start
, phys_addr_t end
,
104 phys_addr_t size
, phys_addr_t align
)
106 phys_addr_t base
, res_base
;
109 base
= memblock_align_down((end
- size
), align
);
110 while (start
<= base
) {
111 j
= memblock_overlaps_region(&memblock
.reserved
, base
, size
);
114 res_base
= memblock
.reserved
.regions
[j
].base
;
117 base
= memblock_align_down(res_base
- size
, align
);
120 return MEMBLOCK_ERROR
;
123 static phys_addr_t __init
memblock_find_base(phys_addr_t size
, phys_addr_t align
,
124 phys_addr_t start
, phys_addr_t end
)
130 size
= memblock_align_up(size
, align
);
132 /* Pump up max_addr */
133 if (end
== MEMBLOCK_ALLOC_ACCESSIBLE
)
134 end
= memblock
.current_limit
;
136 /* We do a top-down search, this tends to limit memory
137 * fragmentation by keeping early boot allocs near the
140 for (i
= memblock
.memory
.cnt
- 1; i
>= 0; i
--) {
141 phys_addr_t memblockbase
= memblock
.memory
.regions
[i
].base
;
142 phys_addr_t memblocksize
= memblock
.memory
.regions
[i
].size
;
143 phys_addr_t bottom
, top
, found
;
145 if (memblocksize
< size
)
147 if ((memblockbase
+ memblocksize
) <= start
)
149 bottom
= max(memblockbase
, start
);
150 top
= min(memblockbase
+ memblocksize
, end
);
153 found
= memblock_find_region(bottom
, top
, size
, align
);
154 if (found
!= MEMBLOCK_ERROR
)
157 return MEMBLOCK_ERROR
;
160 static void memblock_remove_region(struct memblock_type
*type
, unsigned long r
)
164 for (i
= r
; i
< type
->cnt
- 1; i
++) {
165 type
->regions
[i
].base
= type
->regions
[i
+ 1].base
;
166 type
->regions
[i
].size
= type
->regions
[i
+ 1].size
;
171 /* Assumption: base addr of region 1 < base addr of region 2 */
172 static void memblock_coalesce_regions(struct memblock_type
*type
,
173 unsigned long r1
, unsigned long r2
)
175 type
->regions
[r1
].size
+= type
->regions
[r2
].size
;
176 memblock_remove_region(type
, r2
);
179 /* Defined below but needed now */
180 static long memblock_add_region(struct memblock_type
*type
, phys_addr_t base
, phys_addr_t size
);
182 static int memblock_double_array(struct memblock_type
*type
)
184 struct memblock_region
*new_array
, *old_array
;
185 phys_addr_t old_size
, new_size
, addr
;
186 int use_slab
= slab_is_available();
188 /* We don't allow resizing until we know about the reserved regions
189 * of memory that aren't suitable for allocation
191 if (!memblock_can_resize
)
194 pr_debug("memblock: %s array full, doubling...", memblock_type_name(type
));
196 /* Calculate new doubled size */
197 old_size
= type
->max
* sizeof(struct memblock_region
);
198 new_size
= old_size
<< 1;
200 /* Try to find some space for it.
202 * WARNING: We assume that either slab_is_available() and we use it or
203 * we use MEMBLOCK for allocations. That means that this is unsafe to use
204 * when bootmem is currently active (unless bootmem itself is implemented
205 * on top of MEMBLOCK which isn't the case yet)
207 * This should however not be an issue for now, as we currently only
208 * call into MEMBLOCK while it's still active, or much later when slab is
209 * active for memory hotplug operations
212 new_array
= kmalloc(new_size
, GFP_KERNEL
);
213 addr
= new_array
== NULL
? MEMBLOCK_ERROR
: __pa(new_array
);
215 addr
= memblock_find_base(new_size
, sizeof(phys_addr_t
), 0, MEMBLOCK_ALLOC_ACCESSIBLE
);
216 if (addr
== MEMBLOCK_ERROR
) {
217 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
218 memblock_type_name(type
), type
->max
, type
->max
* 2);
221 new_array
= __va(addr
);
223 /* Found space, we now need to move the array over before
224 * we add the reserved region since it may be our reserved
225 * array itself that is full.
227 memcpy(new_array
, type
->regions
, old_size
);
228 memset(new_array
+ type
->max
, 0, old_size
);
229 old_array
= type
->regions
;
230 type
->regions
= new_array
;
233 /* If we use SLAB that's it, we are done */
237 /* Add the new reserved region now. Should not fail ! */
238 BUG_ON(memblock_add_region(&memblock
.reserved
, addr
, new_size
) < 0);
240 /* If the array wasn't our static init one, then free it. We only do
241 * that before SLAB is available as later on, we don't know whether
242 * to use kfree or free_bootmem_pages(). Shouldn't be a big deal
245 if (old_array
!= memblock_memory_init_regions
&&
246 old_array
!= memblock_reserved_init_regions
)
247 memblock_free(__pa(old_array
), old_size
);
252 extern int __weak
memblock_memory_can_coalesce(phys_addr_t addr1
, phys_addr_t size1
,
253 phys_addr_t addr2
, phys_addr_t size2
)
258 static long memblock_add_region(struct memblock_type
*type
, phys_addr_t base
, phys_addr_t size
)
260 unsigned long coalesced
= 0;
263 if ((type
->cnt
== 1) && (type
->regions
[0].size
== 0)) {
264 type
->regions
[0].base
= base
;
265 type
->regions
[0].size
= size
;
269 /* First try and coalesce this MEMBLOCK with another. */
270 for (i
= 0; i
< type
->cnt
; i
++) {
271 phys_addr_t rgnbase
= type
->regions
[i
].base
;
272 phys_addr_t rgnsize
= type
->regions
[i
].size
;
274 if ((rgnbase
== base
) && (rgnsize
== size
))
275 /* Already have this region, so we're done */
278 adjacent
= memblock_addrs_adjacent(base
, size
, rgnbase
, rgnsize
);
279 /* Check if arch allows coalescing */
280 if (adjacent
!= 0 && type
== &memblock
.memory
&&
281 !memblock_memory_can_coalesce(base
, size
, rgnbase
, rgnsize
))
284 type
->regions
[i
].base
-= size
;
285 type
->regions
[i
].size
+= size
;
288 } else if (adjacent
< 0) {
289 type
->regions
[i
].size
+= size
;
295 /* If we plugged a hole, we may want to also coalesce with the
298 if ((i
< type
->cnt
- 1) && memblock_regions_adjacent(type
, i
, i
+1) &&
299 ((type
!= &memblock
.memory
|| memblock_memory_can_coalesce(type
->regions
[i
].base
,
300 type
->regions
[i
].size
,
301 type
->regions
[i
+1].base
,
302 type
->regions
[i
+1].size
)))) {
303 memblock_coalesce_regions(type
, i
, i
+1);
310 /* If we are out of space, we fail. It's too late to resize the array
311 * but then this shouldn't have happened in the first place.
313 if (WARN_ON(type
->cnt
>= type
->max
))
316 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
317 for (i
= type
->cnt
- 1; i
>= 0; i
--) {
318 if (base
< type
->regions
[i
].base
) {
319 type
->regions
[i
+1].base
= type
->regions
[i
].base
;
320 type
->regions
[i
+1].size
= type
->regions
[i
].size
;
322 type
->regions
[i
+1].base
= base
;
323 type
->regions
[i
+1].size
= size
;
328 if (base
< type
->regions
[0].base
) {
329 type
->regions
[0].base
= base
;
330 type
->regions
[0].size
= size
;
334 /* The array is full ? Try to resize it. If that fails, we undo
335 * our allocation and return an error
337 if (type
->cnt
== type
->max
&& memblock_double_array(type
)) {
345 long memblock_add(phys_addr_t base
, phys_addr_t size
)
347 return memblock_add_region(&memblock
.memory
, base
, size
);
351 static long __memblock_remove(struct memblock_type
*type
, phys_addr_t base
, phys_addr_t size
)
353 phys_addr_t rgnbegin
, rgnend
;
354 phys_addr_t end
= base
+ size
;
357 rgnbegin
= rgnend
= 0; /* supress gcc warnings */
359 /* Find the region where (base, size) belongs to */
360 for (i
=0; i
< type
->cnt
; i
++) {
361 rgnbegin
= type
->regions
[i
].base
;
362 rgnend
= rgnbegin
+ type
->regions
[i
].size
;
364 if ((rgnbegin
<= base
) && (end
<= rgnend
))
368 /* Didn't find the region */
372 /* Check to see if we are removing entire region */
373 if ((rgnbegin
== base
) && (rgnend
== end
)) {
374 memblock_remove_region(type
, i
);
378 /* Check to see if region is matching at the front */
379 if (rgnbegin
== base
) {
380 type
->regions
[i
].base
= end
;
381 type
->regions
[i
].size
-= size
;
385 /* Check to see if the region is matching at the end */
387 type
->regions
[i
].size
-= size
;
392 * We need to split the entry - adjust the current one to the
393 * beginging of the hole and add the region after hole.
395 type
->regions
[i
].size
= base
- type
->regions
[i
].base
;
396 return memblock_add_region(type
, end
, rgnend
- end
);
399 long memblock_remove(phys_addr_t base
, phys_addr_t size
)
401 return __memblock_remove(&memblock
.memory
, base
, size
);
404 long __init
memblock_free(phys_addr_t base
, phys_addr_t size
)
406 return __memblock_remove(&memblock
.reserved
, base
, size
);
409 long __init
memblock_reserve(phys_addr_t base
, phys_addr_t size
)
411 struct memblock_type
*_rgn
= &memblock
.reserved
;
415 return memblock_add_region(_rgn
, base
, size
);
418 phys_addr_t __init
__memblock_alloc_base(phys_addr_t size
, phys_addr_t align
, phys_addr_t max_addr
)
422 /* We align the size to limit fragmentation. Without this, a lot of
423 * small allocs quickly eat up the whole reserve array on sparc
425 size
= memblock_align_up(size
, align
);
427 found
= memblock_find_base(size
, align
, 0, max_addr
);
428 if (found
!= MEMBLOCK_ERROR
&&
429 memblock_add_region(&memblock
.reserved
, found
, size
) >= 0)
435 phys_addr_t __init
memblock_alloc_base(phys_addr_t size
, phys_addr_t align
, phys_addr_t max_addr
)
439 alloc
= __memblock_alloc_base(size
, align
, max_addr
);
442 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
443 (unsigned long long) size
, (unsigned long long) max_addr
);
448 phys_addr_t __init
memblock_alloc(phys_addr_t size
, phys_addr_t align
)
450 return memblock_alloc_base(size
, align
, MEMBLOCK_ALLOC_ACCESSIBLE
);
455 * Additional node-local allocators. Search for node memory is bottom up
456 * and walks memblock regions within that node bottom-up as well, but allocation
457 * within an memblock region is top-down. XXX I plan to fix that at some stage
459 * WARNING: Only available after early_node_map[] has been populated,
460 * on some architectures, that is after all the calls to add_active_range()
461 * have been done to populate it.
464 phys_addr_t __weak __init
memblock_nid_range(phys_addr_t start
, phys_addr_t end
, int *nid
)
466 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
468 * This code originates from sparc which really wants use to walk by addresses
469 * and returns the nid. This is not very convenient for early_pfn_map[] users
470 * as the map isn't sorted yet, and it really wants to be walked by nid.
472 * For now, I implement the inefficient method below which walks the early
473 * map multiple times. Eventually we may want to use an ARCH config option
474 * to implement a completely different method for both case.
476 unsigned long start_pfn
, end_pfn
;
479 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
480 get_pfn_range_for_nid(i
, &start_pfn
, &end_pfn
);
481 if (start
< PFN_PHYS(start_pfn
) || start
>= PFN_PHYS(end_pfn
))
484 return min(end
, PFN_PHYS(end_pfn
));
492 static phys_addr_t __init
memblock_alloc_nid_region(struct memblock_region
*mp
,
494 phys_addr_t align
, int nid
)
496 phys_addr_t start
, end
;
499 end
= start
+ mp
->size
;
501 start
= memblock_align_up(start
, align
);
502 while (start
< end
) {
503 phys_addr_t this_end
;
506 this_end
= memblock_nid_range(start
, end
, &this_nid
);
507 if (this_nid
== nid
) {
508 phys_addr_t ret
= memblock_find_region(start
, this_end
, size
, align
);
509 if (ret
!= MEMBLOCK_ERROR
&&
510 memblock_add_region(&memblock
.reserved
, ret
, size
) >= 0)
516 return MEMBLOCK_ERROR
;
519 phys_addr_t __init
memblock_alloc_nid(phys_addr_t size
, phys_addr_t align
, int nid
)
521 struct memblock_type
*mem
= &memblock
.memory
;
526 /* We align the size to limit fragmentation. Without this, a lot of
527 * small allocs quickly eat up the whole reserve array on sparc
529 size
= memblock_align_up(size
, align
);
531 /* We do a bottom-up search for a region with the right
532 * nid since that's easier considering how memblock_nid_range()
535 for (i
= 0; i
< mem
->cnt
; i
++) {
536 phys_addr_t ret
= memblock_alloc_nid_region(&mem
->regions
[i
],
538 if (ret
!= MEMBLOCK_ERROR
)
545 phys_addr_t __init
memblock_alloc_try_nid(phys_addr_t size
, phys_addr_t align
, int nid
)
547 phys_addr_t res
= memblock_alloc_nid(size
, align
, nid
);
551 return memblock_alloc_base(size
, align
, MEMBLOCK_ALLOC_ANYWHERE
);
556 * Remaining API functions
559 /* You must call memblock_analyze() before this. */
560 phys_addr_t __init
memblock_phys_mem_size(void)
562 return memblock
.memory_size
;
565 phys_addr_t
memblock_end_of_DRAM(void)
567 int idx
= memblock
.memory
.cnt
- 1;
569 return (memblock
.memory
.regions
[idx
].base
+ memblock
.memory
.regions
[idx
].size
);
572 /* You must call memblock_analyze() after this. */
573 void __init
memblock_enforce_memory_limit(phys_addr_t memory_limit
)
577 struct memblock_region
*p
;
582 /* Truncate the memblock regions to satisfy the memory limit. */
583 limit
= memory_limit
;
584 for (i
= 0; i
< memblock
.memory
.cnt
; i
++) {
585 if (limit
> memblock
.memory
.regions
[i
].size
) {
586 limit
-= memblock
.memory
.regions
[i
].size
;
590 memblock
.memory
.regions
[i
].size
= limit
;
591 memblock
.memory
.cnt
= i
+ 1;
595 memory_limit
= memblock_end_of_DRAM();
597 /* And truncate any reserves above the limit also. */
598 for (i
= 0; i
< memblock
.reserved
.cnt
; i
++) {
599 p
= &memblock
.reserved
.regions
[i
];
601 if (p
->base
> memory_limit
)
603 else if ((p
->base
+ p
->size
) > memory_limit
)
604 p
->size
= memory_limit
- p
->base
;
607 memblock_remove_region(&memblock
.reserved
, i
);
613 static int memblock_search(struct memblock_type
*type
, phys_addr_t addr
)
615 unsigned int left
= 0, right
= type
->cnt
;
618 unsigned int mid
= (right
+ left
) / 2;
620 if (addr
< type
->regions
[mid
].base
)
622 else if (addr
>= (type
->regions
[mid
].base
+
623 type
->regions
[mid
].size
))
627 } while (left
< right
);
631 int __init
memblock_is_reserved(phys_addr_t addr
)
633 return memblock_search(&memblock
.reserved
, addr
) != -1;
636 int memblock_is_memory(phys_addr_t addr
)
638 return memblock_search(&memblock
.memory
, addr
) != -1;
641 int memblock_is_region_memory(phys_addr_t base
, phys_addr_t size
)
643 int idx
= memblock_search(&memblock
.reserved
, base
);
647 return memblock
.reserved
.regions
[idx
].base
<= base
&&
648 (memblock
.reserved
.regions
[idx
].base
+
649 memblock
.reserved
.regions
[idx
].size
) >= (base
+ size
);
652 int memblock_is_region_reserved(phys_addr_t base
, phys_addr_t size
)
654 return memblock_overlaps_region(&memblock
.reserved
, base
, size
) >= 0;
658 void __init
memblock_set_current_limit(phys_addr_t limit
)
660 memblock
.current_limit
= limit
;
663 static void memblock_dump(struct memblock_type
*region
, char *name
)
665 unsigned long long base
, size
;
668 pr_info(" %s.cnt = 0x%lx\n", name
, region
->cnt
);
670 for (i
= 0; i
< region
->cnt
; i
++) {
671 base
= region
->regions
[i
].base
;
672 size
= region
->regions
[i
].size
;
674 pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
675 name
, i
, base
, base
+ size
- 1, size
);
679 void memblock_dump_all(void)
684 pr_info("MEMBLOCK configuration:\n");
685 pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock
.memory_size
);
687 memblock_dump(&memblock
.memory
, "memory");
688 memblock_dump(&memblock
.reserved
, "reserved");
691 void __init
memblock_analyze(void)
695 /* Check marker in the unused last array entry */
696 WARN_ON(memblock_memory_init_regions
[INIT_MEMBLOCK_REGIONS
].base
697 != (phys_addr_t
)RED_INACTIVE
);
698 WARN_ON(memblock_reserved_init_regions
[INIT_MEMBLOCK_REGIONS
].base
699 != (phys_addr_t
)RED_INACTIVE
);
701 memblock
.memory_size
= 0;
703 for (i
= 0; i
< memblock
.memory
.cnt
; i
++)
704 memblock
.memory_size
+= memblock
.memory
.regions
[i
].size
;
706 /* We allow resizing from there */
707 memblock_can_resize
= 1;
710 void __init
memblock_init(void)
712 /* Hookup the initial arrays */
713 memblock
.memory
.regions
= memblock_memory_init_regions
;
714 memblock
.memory
.max
= INIT_MEMBLOCK_REGIONS
;
715 memblock
.reserved
.regions
= memblock_reserved_init_regions
;
716 memblock
.reserved
.max
= INIT_MEMBLOCK_REGIONS
;
718 /* Write a marker in the unused last array entry */
719 memblock
.memory
.regions
[INIT_MEMBLOCK_REGIONS
].base
= (phys_addr_t
)RED_INACTIVE
;
720 memblock
.reserved
.regions
[INIT_MEMBLOCK_REGIONS
].base
= (phys_addr_t
)RED_INACTIVE
;
722 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
723 * This simplifies the memblock_add() code below...
725 memblock
.memory
.regions
[0].base
= 0;
726 memblock
.memory
.regions
[0].size
= 0;
727 memblock
.memory
.cnt
= 1;
730 memblock
.reserved
.regions
[0].base
= 0;
731 memblock
.reserved
.regions
[0].size
= 0;
732 memblock
.reserved
.cnt
= 1;
734 memblock
.current_limit
= MEMBLOCK_ALLOC_ANYWHERE
;
737 static int __init
early_memblock(char *p
)
739 if (p
&& strstr(p
, "debug"))
743 early_param("memblock", early_memblock
);
745 #ifdef CONFIG_DEBUG_FS
747 static int memblock_debug_show(struct seq_file
*m
, void *private)
749 struct memblock_type
*type
= m
->private;
750 struct memblock_region
*reg
;
753 for (i
= 0; i
< type
->cnt
; i
++) {
754 reg
= &type
->regions
[i
];
755 seq_printf(m
, "%4d: ", i
);
756 if (sizeof(phys_addr_t
) == 4)
757 seq_printf(m
, "0x%08lx..0x%08lx\n",
758 (unsigned long)reg
->base
,
759 (unsigned long)(reg
->base
+ reg
->size
- 1));
761 seq_printf(m
, "0x%016llx..0x%016llx\n",
762 (unsigned long long)reg
->base
,
763 (unsigned long long)(reg
->base
+ reg
->size
- 1));
769 static int memblock_debug_open(struct inode
*inode
, struct file
*file
)
771 return single_open(file
, memblock_debug_show
, inode
->i_private
);
774 static const struct file_operations memblock_debug_fops
= {
775 .open
= memblock_debug_open
,
778 .release
= single_release
,
781 static int __init
memblock_init_debugfs(void)
783 struct dentry
*root
= debugfs_create_dir("memblock", NULL
);
786 debugfs_create_file("memory", S_IRUGO
, root
, &memblock
.memory
, &memblock_debug_fops
);
787 debugfs_create_file("reserved", S_IRUGO
, root
, &memblock
.reserved
, &memblock_debug_fops
);
791 __initcall(memblock_init_debugfs
);
793 #endif /* CONFIG_DEBUG_FS */