Commit | Line | Data |
---|---|---|
b79cd8f1 YL |
1 | /* |
2 | * Handle the memory map. | |
3 | * The functions here do the job until bootmem takes over. | |
4 | * | |
5 | * Getting sanitize_e820_map() in sync with i386 version by applying change: | |
6 | * - Provisions for empty E820 memory regions (reported by certain BIOSes). | |
7 | * Alex Achenbach <xela@slit.de>, December 2002. | |
8 | * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | |
9 | * | |
10 | */ | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/types.h> | |
13 | #include <linux/init.h> | |
14 | #include <linux/bootmem.h> | |
15 | #include <linux/ioport.h> | |
16 | #include <linux/string.h> | |
17 | #include <linux/kexec.h> | |
18 | #include <linux/module.h> | |
19 | #include <linux/mm.h> | |
20 | #include <linux/pfn.h> | |
bf62f398 | 21 | #include <linux/suspend.h> |
5dfcf14d | 22 | #include <linux/firmware-map.h> |
b79cd8f1 YL |
23 | |
24 | #include <asm/pgtable.h> | |
25 | #include <asm/page.h> | |
26 | #include <asm/e820.h> | |
a4c81cf6 | 27 | #include <asm/proto.h> |
b79cd8f1 | 28 | #include <asm/setup.h> |
a4c81cf6 | 29 | #include <asm/trampoline.h> |
b79cd8f1 | 30 | |
5dfcf14d BW |
31 | /* |
32 | * The e820 map is the map that gets modified e.g. with command line parameters | |
33 | * and that is also registered with modifications in the kernel resource tree | |
34 | * with the iomem_resource as parent. | |
35 | * | |
36 | * The e820_saved is directly saved after the BIOS-provided memory map is | |
37 | * copied. It doesn't get modified afterwards. It's registered for the | |
38 | * /sys/firmware/memmap interface. | |
39 | * | |
40 | * That memory map is not modified and is used as base for kexec. The kexec'd | |
41 | * kernel should get the same memory map as the firmware provides. Then the | |
42 | * user can e.g. boot the original kernel with mem=1G while still booting the | |
43 | * next kernel with full memory. | |
44 | */ | |
b79cd8f1 | 45 | struct e820map e820; |
5dfcf14d | 46 | struct e820map e820_saved; |
b79cd8f1 YL |
47 | |
48 | /* For PCI or other memory-mapped resources */ | |
49 | unsigned long pci_mem_start = 0xaeedbabe; | |
50 | #ifdef CONFIG_PCI | |
51 | EXPORT_SYMBOL(pci_mem_start); | |
52 | #endif | |
53 | ||
54 | /* | |
55 | * This function checks if any part of the range <start,end> is mapped | |
56 | * with type. | |
57 | */ | |
58 | int | |
59 | e820_any_mapped(u64 start, u64 end, unsigned type) | |
60 | { | |
61 | int i; | |
62 | ||
63 | for (i = 0; i < e820.nr_map; i++) { | |
64 | struct e820entry *ei = &e820.map[i]; | |
65 | ||
66 | if (type && ei->type != type) | |
67 | continue; | |
68 | if (ei->addr >= end || ei->addr + ei->size <= start) | |
69 | continue; | |
70 | return 1; | |
71 | } | |
72 | return 0; | |
73 | } | |
74 | EXPORT_SYMBOL_GPL(e820_any_mapped); | |
75 | ||
76 | /* | |
77 | * This function checks if the entire range <start,end> is mapped with type. | |
78 | * | |
79 | * Note: this function only works correct if the e820 table is sorted and | |
80 | * not-overlapping, which is the case | |
81 | */ | |
82 | int __init e820_all_mapped(u64 start, u64 end, unsigned type) | |
83 | { | |
84 | int i; | |
85 | ||
86 | for (i = 0; i < e820.nr_map; i++) { | |
87 | struct e820entry *ei = &e820.map[i]; | |
88 | ||
89 | if (type && ei->type != type) | |
90 | continue; | |
91 | /* is the region (part) in overlap with the current region ?*/ | |
92 | if (ei->addr >= end || ei->addr + ei->size <= start) | |
93 | continue; | |
94 | ||
95 | /* if the region is at the beginning of <start,end> we move | |
96 | * start to the end of the region since it's ok until there | |
97 | */ | |
98 | if (ei->addr <= start) | |
99 | start = ei->addr + ei->size; | |
100 | /* | |
101 | * if start is now at or beyond end, we're done, full | |
102 | * coverage | |
103 | */ | |
104 | if (start >= end) | |
105 | return 1; | |
106 | } | |
107 | return 0; | |
108 | } | |
109 | ||
110 | /* | |
111 | * Add a memory region to the kernel e820 map. | |
112 | */ | |
773e673d YL |
113 | static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, |
114 | int type) | |
b79cd8f1 | 115 | { |
773e673d | 116 | int x = e820x->nr_map; |
b79cd8f1 | 117 | |
773e673d | 118 | if (x == ARRAY_SIZE(e820x->map)) { |
b79cd8f1 YL |
119 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
120 | return; | |
121 | } | |
122 | ||
773e673d YL |
123 | e820x->map[x].addr = start; |
124 | e820x->map[x].size = size; | |
125 | e820x->map[x].type = type; | |
126 | e820x->nr_map++; | |
127 | } | |
128 | ||
129 | void __init e820_add_region(u64 start, u64 size, int type) | |
130 | { | |
131 | __e820_add_region(&e820, start, size, type); | |
b79cd8f1 YL |
132 | } |
133 | ||
c61cf4cf YL |
134 | static void __init e820_print_type(u32 type) |
135 | { | |
136 | switch (type) { | |
137 | case E820_RAM: | |
138 | case E820_RESERVED_KERN: | |
139 | printk(KERN_CONT "(usable)"); | |
140 | break; | |
141 | case E820_RESERVED: | |
142 | printk(KERN_CONT "(reserved)"); | |
143 | break; | |
144 | case E820_ACPI: | |
145 | printk(KERN_CONT "(ACPI data)"); | |
146 | break; | |
147 | case E820_NVS: | |
148 | printk(KERN_CONT "(ACPI NVS)"); | |
149 | break; | |
150 | case E820_UNUSABLE: | |
151 | printk(KERN_CONT "(unusable)"); | |
152 | break; | |
153 | default: | |
154 | printk(KERN_CONT "type %u", type); | |
155 | break; | |
156 | } | |
157 | } | |
158 | ||
b79cd8f1 YL |
159 | void __init e820_print_map(char *who) |
160 | { | |
161 | int i; | |
162 | ||
163 | for (i = 0; i < e820.nr_map; i++) { | |
164 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | |
165 | (unsigned long long) e820.map[i].addr, | |
166 | (unsigned long long) | |
167 | (e820.map[i].addr + e820.map[i].size)); | |
c61cf4cf YL |
168 | e820_print_type(e820.map[i].type); |
169 | printk(KERN_CONT "\n"); | |
b79cd8f1 YL |
170 | } |
171 | } | |
172 | ||
173 | /* | |
174 | * Sanitize the BIOS e820 map. | |
175 | * | |
176 | * Some e820 responses include overlapping entries. The following | |
5b7eb2e9 PJ |
177 | * replaces the original e820 map with a new one, removing overlaps, |
178 | * and resolving conflicting memory types in favor of highest | |
179 | * numbered type. | |
b79cd8f1 | 180 | * |
5b7eb2e9 PJ |
181 | * The input parameter biosmap points to an array of 'struct |
182 | * e820entry' which on entry has elements in the range [0, *pnr_map) | |
183 | * valid, and which has space for up to max_nr_map entries. | |
184 | * On return, the resulting sanitized e820 map entries will be in | |
185 | * overwritten in the same location, starting at biosmap. | |
186 | * | |
187 | * The integer pointed to by pnr_map must be valid on entry (the | |
188 | * current number of valid entries located at biosmap) and will | |
189 | * be updated on return, with the new number of valid entries | |
190 | * (something no more than max_nr_map.) | |
191 | * | |
192 | * The return value from sanitize_e820_map() is zero if it | |
193 | * successfully 'sanitized' the map entries passed in, and is -1 | |
194 | * if it did nothing, which can happen if either of (1) it was | |
195 | * only passed one map entry, or (2) any of the input map entries | |
196 | * were invalid (start + size < start, meaning that the size was | |
197 | * so big the described memory range wrapped around through zero.) | |
198 | * | |
199 | * Visually we're performing the following | |
200 | * (1,2,3,4 = memory types)... | |
201 | * | |
202 | * Sample memory map (w/overlaps): | |
203 | * ____22__________________ | |
204 | * ______________________4_ | |
205 | * ____1111________________ | |
206 | * _44_____________________ | |
207 | * 11111111________________ | |
208 | * ____________________33__ | |
209 | * ___________44___________ | |
210 | * __________33333_________ | |
211 | * ______________22________ | |
212 | * ___________________2222_ | |
213 | * _________111111111______ | |
214 | * _____________________11_ | |
215 | * _________________4______ | |
216 | * | |
217 | * Sanitized equivalent (no overlap): | |
218 | * 1_______________________ | |
219 | * _44_____________________ | |
220 | * ___1____________________ | |
221 | * ____22__________________ | |
222 | * ______11________________ | |
223 | * _________1______________ | |
224 | * __________3_____________ | |
225 | * ___________44___________ | |
226 | * _____________33_________ | |
227 | * _______________2________ | |
228 | * ________________1_______ | |
229 | * _________________4______ | |
230 | * ___________________2____ | |
231 | * ____________________33__ | |
232 | * ______________________4_ | |
b79cd8f1 | 233 | */ |
5b7eb2e9 | 234 | |
c3965bd1 | 235 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, |
ba639039 | 236 | u32 *pnr_map) |
b79cd8f1 YL |
237 | { |
238 | struct change_member { | |
239 | struct e820entry *pbios; /* pointer to original bios entry */ | |
240 | unsigned long long addr; /* address for this change point */ | |
241 | }; | |
157fabf0 PJ |
242 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; |
243 | static struct change_member *change_point[2*E820_X_MAX] __initdata; | |
244 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; | |
245 | static struct e820entry new_bios[E820_X_MAX] __initdata; | |
b79cd8f1 YL |
246 | struct change_member *change_tmp; |
247 | unsigned long current_type, last_type; | |
248 | unsigned long long last_addr; | |
249 | int chgidx, still_changing; | |
250 | int overlap_entries; | |
251 | int new_bios_entry; | |
252 | int old_nr, new_nr, chg_nr; | |
253 | int i; | |
254 | ||
b79cd8f1 YL |
255 | /* if there's only one memory region, don't bother */ |
256 | if (*pnr_map < 2) | |
257 | return -1; | |
258 | ||
259 | old_nr = *pnr_map; | |
6e9bcc79 | 260 | BUG_ON(old_nr > max_nr_map); |
b79cd8f1 YL |
261 | |
262 | /* bail out if we find any unreasonable addresses in bios map */ | |
263 | for (i = 0; i < old_nr; i++) | |
264 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | |
265 | return -1; | |
266 | ||
267 | /* create pointers for initial change-point information (for sorting) */ | |
268 | for (i = 0; i < 2 * old_nr; i++) | |
269 | change_point[i] = &change_point_list[i]; | |
270 | ||
271 | /* record all known change-points (starting and ending addresses), | |
272 | omitting those that are for empty memory regions */ | |
273 | chgidx = 0; | |
274 | for (i = 0; i < old_nr; i++) { | |
275 | if (biosmap[i].size != 0) { | |
276 | change_point[chgidx]->addr = biosmap[i].addr; | |
277 | change_point[chgidx++]->pbios = &biosmap[i]; | |
278 | change_point[chgidx]->addr = biosmap[i].addr + | |
279 | biosmap[i].size; | |
280 | change_point[chgidx++]->pbios = &biosmap[i]; | |
281 | } | |
282 | } | |
283 | chg_nr = chgidx; | |
284 | ||
285 | /* sort change-point list by memory addresses (low -> high) */ | |
286 | still_changing = 1; | |
287 | while (still_changing) { | |
288 | still_changing = 0; | |
289 | for (i = 1; i < chg_nr; i++) { | |
290 | unsigned long long curaddr, lastaddr; | |
291 | unsigned long long curpbaddr, lastpbaddr; | |
292 | ||
293 | curaddr = change_point[i]->addr; | |
294 | lastaddr = change_point[i - 1]->addr; | |
295 | curpbaddr = change_point[i]->pbios->addr; | |
296 | lastpbaddr = change_point[i - 1]->pbios->addr; | |
297 | ||
298 | /* | |
299 | * swap entries, when: | |
300 | * | |
301 | * curaddr > lastaddr or | |
302 | * curaddr == lastaddr and curaddr == curpbaddr and | |
303 | * lastaddr != lastpbaddr | |
304 | */ | |
305 | if (curaddr < lastaddr || | |
306 | (curaddr == lastaddr && curaddr == curpbaddr && | |
307 | lastaddr != lastpbaddr)) { | |
308 | change_tmp = change_point[i]; | |
309 | change_point[i] = change_point[i-1]; | |
310 | change_point[i-1] = change_tmp; | |
311 | still_changing = 1; | |
312 | } | |
313 | } | |
314 | } | |
315 | ||
316 | /* create a new bios memory map, removing overlaps */ | |
317 | overlap_entries = 0; /* number of entries in the overlap table */ | |
318 | new_bios_entry = 0; /* index for creating new bios map entries */ | |
319 | last_type = 0; /* start with undefined memory type */ | |
320 | last_addr = 0; /* start with 0 as last starting address */ | |
321 | ||
322 | /* loop through change-points, determining affect on the new bios map */ | |
323 | for (chgidx = 0; chgidx < chg_nr; chgidx++) { | |
324 | /* keep track of all overlapping bios entries */ | |
325 | if (change_point[chgidx]->addr == | |
326 | change_point[chgidx]->pbios->addr) { | |
327 | /* | |
328 | * add map entry to overlap list (> 1 entry | |
329 | * implies an overlap) | |
330 | */ | |
331 | overlap_list[overlap_entries++] = | |
332 | change_point[chgidx]->pbios; | |
333 | } else { | |
334 | /* | |
335 | * remove entry from list (order independent, | |
336 | * so swap with last) | |
337 | */ | |
338 | for (i = 0; i < overlap_entries; i++) { | |
339 | if (overlap_list[i] == | |
340 | change_point[chgidx]->pbios) | |
341 | overlap_list[i] = | |
342 | overlap_list[overlap_entries-1]; | |
343 | } | |
344 | overlap_entries--; | |
345 | } | |
346 | /* | |
347 | * if there are overlapping entries, decide which | |
348 | * "type" to use (larger value takes precedence -- | |
349 | * 1=usable, 2,3,4,4+=unusable) | |
350 | */ | |
351 | current_type = 0; | |
352 | for (i = 0; i < overlap_entries; i++) | |
353 | if (overlap_list[i]->type > current_type) | |
354 | current_type = overlap_list[i]->type; | |
355 | /* | |
356 | * continue building up new bios map based on this | |
357 | * information | |
358 | */ | |
359 | if (current_type != last_type) { | |
360 | if (last_type != 0) { | |
361 | new_bios[new_bios_entry].size = | |
362 | change_point[chgidx]->addr - last_addr; | |
363 | /* | |
364 | * move forward only if the new size | |
365 | * was non-zero | |
366 | */ | |
367 | if (new_bios[new_bios_entry].size != 0) | |
368 | /* | |
369 | * no more space left for new | |
370 | * bios entries ? | |
371 | */ | |
c3965bd1 | 372 | if (++new_bios_entry >= max_nr_map) |
b79cd8f1 YL |
373 | break; |
374 | } | |
375 | if (current_type != 0) { | |
376 | new_bios[new_bios_entry].addr = | |
377 | change_point[chgidx]->addr; | |
378 | new_bios[new_bios_entry].type = current_type; | |
379 | last_addr = change_point[chgidx]->addr; | |
380 | } | |
381 | last_type = current_type; | |
382 | } | |
383 | } | |
384 | /* retain count for new bios entries */ | |
385 | new_nr = new_bios_entry; | |
386 | ||
387 | /* copy new bios mapping into original location */ | |
388 | memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | |
389 | *pnr_map = new_nr; | |
390 | ||
391 | return 0; | |
392 | } | |
393 | ||
dc8e8120 | 394 | static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) |
8c5beb50 HY |
395 | { |
396 | while (nr_map) { | |
397 | u64 start = biosmap->addr; | |
398 | u64 size = biosmap->size; | |
399 | u64 end = start + size; | |
400 | u32 type = biosmap->type; | |
401 | ||
402 | /* Overflow in 64 bits? Ignore the memory map. */ | |
403 | if (start > end) | |
404 | return -1; | |
405 | ||
406 | e820_add_region(start, size, type); | |
407 | ||
408 | biosmap++; | |
409 | nr_map--; | |
410 | } | |
411 | return 0; | |
412 | } | |
413 | ||
b79cd8f1 YL |
414 | /* |
415 | * Copy the BIOS e820 map into a safe place. | |
416 | * | |
417 | * Sanity-check it while we're at it.. | |
418 | * | |
419 | * If we're lucky and live on a modern system, the setup code | |
420 | * will have given us a memory map that we can use to properly | |
421 | * set up memory. If we aren't, we'll fake a memory map. | |
422 | */ | |
dc8e8120 | 423 | static int __init append_e820_map(struct e820entry *biosmap, int nr_map) |
b79cd8f1 YL |
424 | { |
425 | /* Only one memory region (or negative)? Ignore it */ | |
426 | if (nr_map < 2) | |
427 | return -1; | |
428 | ||
dc8e8120 | 429 | return __append_e820_map(biosmap, nr_map); |
b79cd8f1 YL |
430 | } |
431 | ||
773e673d | 432 | static u64 __init __e820_update_range(struct e820map *e820x, u64 start, |
fc9036ea YL |
433 | u64 size, unsigned old_type, |
434 | unsigned new_type) | |
b79cd8f1 | 435 | { |
78a8b35b | 436 | u64 end; |
773e673d | 437 | unsigned int i; |
b79cd8f1 YL |
438 | u64 real_updated_size = 0; |
439 | ||
440 | BUG_ON(old_type == new_type); | |
441 | ||
232b957a YL |
442 | if (size > (ULLONG_MAX - start)) |
443 | size = ULLONG_MAX - start; | |
444 | ||
78a8b35b | 445 | end = start + size; |
c61cf4cf YL |
446 | printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", |
447 | (unsigned long long) start, | |
448 | (unsigned long long) end); | |
449 | e820_print_type(old_type); | |
450 | printk(KERN_CONT " ==> "); | |
451 | e820_print_type(new_type); | |
452 | printk(KERN_CONT "\n"); | |
453 | ||
5c0e6f03 | 454 | for (i = 0; i < e820x->nr_map; i++) { |
fc9036ea | 455 | struct e820entry *ei = &e820x->map[i]; |
b79cd8f1 | 456 | u64 final_start, final_end; |
78a8b35b YL |
457 | u64 ei_end; |
458 | ||
b79cd8f1 YL |
459 | if (ei->type != old_type) |
460 | continue; | |
78a8b35b YL |
461 | |
462 | ei_end = ei->addr + ei->size; | |
463 | /* totally covered by new range? */ | |
464 | if (ei->addr >= start && ei_end <= end) { | |
b79cd8f1 YL |
465 | ei->type = new_type; |
466 | real_updated_size += ei->size; | |
467 | continue; | |
468 | } | |
78a8b35b YL |
469 | |
470 | /* new range is totally covered? */ | |
471 | if (ei->addr < start && ei_end > end) { | |
472 | __e820_add_region(e820x, start, size, new_type); | |
473 | __e820_add_region(e820x, end, ei_end - end, ei->type); | |
474 | ei->size = start - ei->addr; | |
475 | real_updated_size += size; | |
476 | continue; | |
477 | } | |
478 | ||
b79cd8f1 YL |
479 | /* partially covered */ |
480 | final_start = max(start, ei->addr); | |
78a8b35b | 481 | final_end = min(end, ei_end); |
b79cd8f1 YL |
482 | if (final_start >= final_end) |
483 | continue; | |
5c0e6f03 | 484 | |
773e673d YL |
485 | __e820_add_region(e820x, final_start, final_end - final_start, |
486 | new_type); | |
5c0e6f03 | 487 | |
b79cd8f1 | 488 | real_updated_size += final_end - final_start; |
976dd4dc | 489 | |
773e673d YL |
490 | /* |
491 | * left range could be head or tail, so need to update | |
492 | * size at first. | |
493 | */ | |
494 | ei->size -= final_end - final_start; | |
976dd4dc YL |
495 | if (ei->addr < final_start) |
496 | continue; | |
497 | ei->addr = final_end; | |
b79cd8f1 YL |
498 | } |
499 | return real_updated_size; | |
500 | } | |
501 | ||
fc9036ea YL |
502 | u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, |
503 | unsigned new_type) | |
504 | { | |
773e673d | 505 | return __e820_update_range(&e820, start, size, old_type, new_type); |
fc9036ea YL |
506 | } |
507 | ||
508 | static u64 __init e820_update_range_saved(u64 start, u64 size, | |
509 | unsigned old_type, unsigned new_type) | |
510 | { | |
773e673d | 511 | return __e820_update_range(&e820_saved, start, size, old_type, |
fc9036ea YL |
512 | new_type); |
513 | } | |
514 | ||
7a1fd986 YL |
515 | /* make e820 not cover the range */ |
516 | u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |
517 | int checktype) | |
518 | { | |
519 | int i; | |
520 | u64 real_removed_size = 0; | |
521 | ||
232b957a YL |
522 | if (size > (ULLONG_MAX - start)) |
523 | size = ULLONG_MAX - start; | |
524 | ||
7a1fd986 YL |
525 | for (i = 0; i < e820.nr_map; i++) { |
526 | struct e820entry *ei = &e820.map[i]; | |
527 | u64 final_start, final_end; | |
528 | ||
529 | if (checktype && ei->type != old_type) | |
530 | continue; | |
531 | /* totally covered? */ | |
532 | if (ei->addr >= start && | |
533 | (ei->addr + ei->size) <= (start + size)) { | |
534 | real_removed_size += ei->size; | |
535 | memset(ei, 0, sizeof(struct e820entry)); | |
536 | continue; | |
537 | } | |
538 | /* partially covered */ | |
539 | final_start = max(start, ei->addr); | |
540 | final_end = min(start + size, ei->addr + ei->size); | |
541 | if (final_start >= final_end) | |
542 | continue; | |
543 | real_removed_size += final_end - final_start; | |
544 | ||
545 | ei->size -= final_end - final_start; | |
546 | if (ei->addr < final_start) | |
547 | continue; | |
548 | ei->addr = final_end; | |
549 | } | |
550 | return real_removed_size; | |
551 | } | |
552 | ||
b79cd8f1 YL |
553 | void __init update_e820(void) |
554 | { | |
ba639039 | 555 | u32 nr_map; |
b79cd8f1 YL |
556 | |
557 | nr_map = e820.nr_map; | |
c3965bd1 | 558 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) |
b79cd8f1 YL |
559 | return; |
560 | e820.nr_map = nr_map; | |
561 | printk(KERN_INFO "modified physical RAM map:\n"); | |
562 | e820_print_map("modified"); | |
563 | } | |
fc9036ea YL |
564 | static void __init update_e820_saved(void) |
565 | { | |
ba639039 | 566 | u32 nr_map; |
fc9036ea YL |
567 | |
568 | nr_map = e820_saved.nr_map; | |
569 | if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) | |
570 | return; | |
571 | e820_saved.nr_map = nr_map; | |
572 | } | |
fd6493e1 | 573 | #define MAX_GAP_END 0x100000000ull |
b79cd8f1 | 574 | /* |
fd6493e1 | 575 | * Search for a gap in the e820 memory space from start_addr to end_addr. |
b79cd8f1 | 576 | */ |
3381959d | 577 | __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, |
fd6493e1 | 578 | unsigned long start_addr, unsigned long long end_addr) |
b79cd8f1 | 579 | { |
fd6493e1 | 580 | unsigned long long last; |
3381959d | 581 | int i = e820.nr_map; |
b79cd8f1 YL |
582 | int found = 0; |
583 | ||
fd6493e1 AK |
584 | last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; |
585 | ||
b79cd8f1 YL |
586 | while (--i >= 0) { |
587 | unsigned long long start = e820.map[i].addr; | |
588 | unsigned long long end = start + e820.map[i].size; | |
589 | ||
3381959d AK |
590 | if (end < start_addr) |
591 | continue; | |
592 | ||
b79cd8f1 YL |
593 | /* |
594 | * Since "last" is at most 4GB, we know we'll | |
595 | * fit in 32 bits if this condition is true | |
596 | */ | |
597 | if (last > end) { | |
598 | unsigned long gap = last - end; | |
599 | ||
3381959d AK |
600 | if (gap >= *gapsize) { |
601 | *gapsize = gap; | |
602 | *gapstart = end; | |
b79cd8f1 YL |
603 | found = 1; |
604 | } | |
605 | } | |
606 | if (start < last) | |
607 | last = start; | |
608 | } | |
3381959d AK |
609 | return found; |
610 | } | |
611 | ||
612 | /* | |
613 | * Search for the biggest gap in the low 32 bits of the e820 | |
614 | * memory space. We pass this space to PCI to assign MMIO resources | |
615 | * for hotplug or unconfigured devices in. | |
616 | * Hopefully the BIOS let enough space left. | |
617 | */ | |
618 | __init void e820_setup_gap(void) | |
619 | { | |
5d423ccd | 620 | unsigned long gapstart, gapsize; |
3381959d AK |
621 | int found; |
622 | ||
623 | gapstart = 0x10000000; | |
624 | gapsize = 0x400000; | |
fd6493e1 | 625 | found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); |
b79cd8f1 YL |
626 | |
627 | #ifdef CONFIG_X86_64 | |
628 | if (!found) { | |
c987d12f | 629 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; |
ad361c98 JP |
630 | printk(KERN_ERR |
631 | "PCI: Warning: Cannot find a gap in the 32bit address range\n" | |
632 | "PCI: Unassigned devices with 32bit resource registers may break!\n"); | |
b79cd8f1 YL |
633 | } |
634 | #endif | |
635 | ||
636 | /* | |
5d423ccd | 637 | * e820_reserve_resources_late protect stolen RAM already |
b79cd8f1 | 638 | */ |
5d423ccd | 639 | pci_mem_start = gapstart; |
b79cd8f1 YL |
640 | |
641 | printk(KERN_INFO | |
642 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | |
643 | pci_mem_start, gapstart, gapsize); | |
644 | } | |
645 | ||
8c5beb50 HY |
646 | /** |
647 | * Because of the size limitation of struct boot_params, only first | |
648 | * 128 E820 memory entries are passed to kernel via | |
649 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of | |
650 | * linked list of struct setup_data, which is parsed here. | |
651 | */ | |
652 | void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) | |
653 | { | |
654 | u32 map_len; | |
655 | int entries; | |
656 | struct e820entry *extmap; | |
657 | ||
658 | entries = sdata->len / sizeof(struct e820entry); | |
659 | map_len = sdata->len + sizeof(struct setup_data); | |
660 | if (map_len > PAGE_SIZE) | |
661 | sdata = early_ioremap(pa_data, map_len); | |
662 | extmap = (struct e820entry *)(sdata->data); | |
dc8e8120 | 663 | __append_e820_map(extmap, entries); |
8c5beb50 HY |
664 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
665 | if (map_len > PAGE_SIZE) | |
666 | early_iounmap(sdata, map_len); | |
667 | printk(KERN_INFO "extended physical RAM map:\n"); | |
668 | e820_print_map("extended"); | |
669 | } | |
670 | ||
bf62f398 YL |
671 | #if defined(CONFIG_X86_64) || \ |
672 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | |
673 | /** | |
674 | * Find the ranges of physical addresses that do not correspond to | |
675 | * e820 RAM areas and mark the corresponding pages as nosave for | |
676 | * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). | |
677 | * | |
678 | * This function requires the e820 map to be sorted and without any | |
679 | * overlapping entries and assumes the first e820 area to be RAM. | |
680 | */ | |
681 | void __init e820_mark_nosave_regions(unsigned long limit_pfn) | |
682 | { | |
683 | int i; | |
684 | unsigned long pfn; | |
685 | ||
686 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | |
687 | for (i = 1; i < e820.nr_map; i++) { | |
688 | struct e820entry *ei = &e820.map[i]; | |
689 | ||
690 | if (pfn < PFN_UP(ei->addr)) | |
691 | register_nosave_region(pfn, PFN_UP(ei->addr)); | |
692 | ||
693 | pfn = PFN_DOWN(ei->addr + ei->size); | |
28bb2237 | 694 | if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) |
bf62f398 YL |
695 | register_nosave_region(PFN_UP(ei->addr), pfn); |
696 | ||
697 | if (pfn >= limit_pfn) | |
698 | break; | |
699 | } | |
700 | } | |
701 | #endif | |
a4c81cf6 | 702 | |
b69edc76 RW |
703 | #ifdef CONFIG_HIBERNATION |
704 | /** | |
705 | * Mark ACPI NVS memory region, so that we can save/restore it during | |
706 | * hibernation and the subsequent resume. | |
707 | */ | |
708 | static int __init e820_mark_nvs_memory(void) | |
709 | { | |
710 | int i; | |
711 | ||
712 | for (i = 0; i < e820.nr_map; i++) { | |
713 | struct e820entry *ei = &e820.map[i]; | |
714 | ||
715 | if (ei->type == E820_NVS) | |
716 | hibernate_nvs_register(ei->addr, ei->size); | |
717 | } | |
718 | ||
719 | return 0; | |
720 | } | |
721 | core_initcall(e820_mark_nvs_memory); | |
722 | #endif | |
723 | ||
a4c81cf6 YL |
724 | /* |
725 | * Early reserved memory areas. | |
726 | */ | |
727 | #define MAX_EARLY_RES 20 | |
728 | ||
729 | struct early_res { | |
730 | u64 start, end; | |
731 | char name[16]; | |
c4ba1320 | 732 | char overlap_ok; |
a4c81cf6 YL |
733 | }; |
734 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | |
735 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | |
a4c81cf6 YL |
736 | {} |
737 | }; | |
738 | ||
d3fbe5ea | 739 | static int __init find_overlapped_early(u64 start, u64 end) |
a4c81cf6 YL |
740 | { |
741 | int i; | |
742 | struct early_res *r; | |
d3fbe5ea | 743 | |
a4c81cf6 YL |
744 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { |
745 | r = &early_res[i]; | |
746 | if (end > r->start && start < r->end) | |
d3fbe5ea | 747 | break; |
a4c81cf6 | 748 | } |
d3fbe5ea HY |
749 | |
750 | return i; | |
751 | } | |
752 | ||
c4ba1320 PJ |
753 | /* |
754 | * Drop the i-th range from the early reservation map, | |
755 | * by copying any higher ranges down one over it, and | |
756 | * clearing what had been the last slot. | |
757 | */ | |
758 | static void __init drop_range(int i) | |
759 | { | |
760 | int j; | |
761 | ||
762 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | |
763 | ; | |
764 | ||
765 | memmove(&early_res[i], &early_res[i + 1], | |
766 | (j - 1 - i) * sizeof(struct early_res)); | |
767 | ||
768 | early_res[j - 1].end = 0; | |
769 | } | |
770 | ||
771 | /* | |
772 | * Split any existing ranges that: | |
773 | * 1) are marked 'overlap_ok', and | |
774 | * 2) overlap with the stated range [start, end) | |
775 | * into whatever portion (if any) of the existing range is entirely | |
776 | * below or entirely above the stated range. Drop the portion | |
777 | * of the existing range that overlaps with the stated range, | |
778 | * which will allow the caller of this routine to then add that | |
779 | * stated range without conflicting with any existing range. | |
780 | */ | |
781 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | |
782 | { | |
783 | int i; | |
784 | struct early_res *r; | |
785 | u64 lower_start, lower_end; | |
786 | u64 upper_start, upper_end; | |
787 | char name[16]; | |
788 | ||
789 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
790 | r = &early_res[i]; | |
791 | ||
792 | /* Continue past non-overlapping ranges */ | |
793 | if (end <= r->start || start >= r->end) | |
794 | continue; | |
795 | ||
796 | /* | |
797 | * Leave non-ok overlaps as is; let caller | |
798 | * panic "Overlapping early reservations" | |
799 | * when it hits this overlap. | |
800 | */ | |
801 | if (!r->overlap_ok) | |
802 | return; | |
803 | ||
804 | /* | |
805 | * We have an ok overlap. We will drop it from the early | |
806 | * reservation map, and add back in any non-overlapping | |
807 | * portions (lower or upper) as separate, overlap_ok, | |
808 | * non-overlapping ranges. | |
809 | */ | |
810 | ||
811 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | |
812 | strncpy(name, r->name, sizeof(name) - 1); | |
813 | ||
814 | lower_start = lower_end = 0; | |
815 | upper_start = upper_end = 0; | |
816 | if (r->start < start) { | |
817 | lower_start = r->start; | |
818 | lower_end = start; | |
819 | } | |
820 | if (r->end > end) { | |
821 | upper_start = end; | |
822 | upper_end = r->end; | |
823 | } | |
824 | ||
825 | /* 2. Drop the original ok overlapping range */ | |
826 | drop_range(i); | |
827 | ||
828 | i--; /* resume for-loop on copied down entry */ | |
829 | ||
830 | /* 3. Add back in any non-overlapping ranges. */ | |
831 | if (lower_end) | |
832 | reserve_early_overlap_ok(lower_start, lower_end, name); | |
833 | if (upper_end) | |
834 | reserve_early_overlap_ok(upper_start, upper_end, name); | |
835 | } | |
836 | } | |
837 | ||
838 | static void __init __reserve_early(u64 start, u64 end, char *name, | |
839 | int overlap_ok) | |
d3fbe5ea HY |
840 | { |
841 | int i; | |
842 | struct early_res *r; | |
843 | ||
844 | i = find_overlapped_early(start, end); | |
a4c81cf6 YL |
845 | if (i >= MAX_EARLY_RES) |
846 | panic("Too many early reservations"); | |
847 | r = &early_res[i]; | |
d3fbe5ea HY |
848 | if (r->end) |
849 | panic("Overlapping early reservations " | |
850 | "%llx-%llx %s to %llx-%llx %s\n", | |
851 | start, end - 1, name?name:"", r->start, | |
852 | r->end - 1, r->name); | |
a4c81cf6 YL |
853 | r->start = start; |
854 | r->end = end; | |
c4ba1320 | 855 | r->overlap_ok = overlap_ok; |
a4c81cf6 YL |
856 | if (name) |
857 | strncpy(r->name, name, sizeof(r->name) - 1); | |
858 | } | |
859 | ||
c4ba1320 PJ |
860 | /* |
861 | * A few early reservtations come here. | |
862 | * | |
863 | * The 'overlap_ok' in the name of this routine does -not- mean it | |
864 | * is ok for these reservations to overlap an earlier reservation. | |
865 | * Rather it means that it is ok for subsequent reservations to | |
866 | * overlap this one. | |
867 | * | |
868 | * Use this entry point to reserve early ranges when you are doing | |
869 | * so out of "Paranoia", reserving perhaps more memory than you need, | |
870 | * just in case, and don't mind a subsequent overlapping reservation | |
871 | * that is known to be needed. | |
872 | * | |
873 | * The drop_overlaps_that_are_ok() call here isn't really needed. | |
874 | * It would be needed if we had two colliding 'overlap_ok' | |
875 | * reservations, so that the second such would not panic on the | |
876 | * overlap with the first. We don't have any such as of this | |
877 | * writing, but might as well tolerate such if it happens in | |
878 | * the future. | |
879 | */ | |
880 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | |
881 | { | |
882 | drop_overlaps_that_are_ok(start, end); | |
883 | __reserve_early(start, end, name, 1); | |
884 | } | |
885 | ||
886 | /* | |
887 | * Most early reservations come here. | |
888 | * | |
889 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | |
890 | * 'overlap_ok' ranges, so that we can then reserve this memory | |
891 | * range without risk of panic'ing on an overlapping overlap_ok | |
892 | * early reservation. | |
893 | */ | |
894 | void __init reserve_early(u64 start, u64 end, char *name) | |
895 | { | |
46cb27f5 YL |
896 | if (start >= end) |
897 | return; | |
898 | ||
c4ba1320 PJ |
899 | drop_overlaps_that_are_ok(start, end); |
900 | __reserve_early(start, end, name, 0); | |
901 | } | |
902 | ||
a4c81cf6 YL |
903 | void __init free_early(u64 start, u64 end) |
904 | { | |
905 | struct early_res *r; | |
c4ba1320 | 906 | int i; |
a4c81cf6 | 907 | |
d3fbe5ea HY |
908 | i = find_overlapped_early(start, end); |
909 | r = &early_res[i]; | |
910 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | |
a4c81cf6 | 911 | panic("free_early on not reserved area: %llx-%llx!", |
d3fbe5ea | 912 | start, end - 1); |
a4c81cf6 | 913 | |
c4ba1320 | 914 | drop_range(i); |
a4c81cf6 YL |
915 | } |
916 | ||
917 | void __init early_res_to_bootmem(u64 start, u64 end) | |
918 | { | |
ab67715c | 919 | int i, count; |
a4c81cf6 | 920 | u64 final_start, final_end; |
ab67715c YL |
921 | |
922 | count = 0; | |
923 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | |
924 | count++; | |
925 | ||
5f1f2b3d YL |
926 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", |
927 | count, start, end); | |
ab67715c | 928 | for (i = 0; i < count; i++) { |
a4c81cf6 | 929 | struct early_res *r = &early_res[i]; |
4fcc545a | 930 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, |
ab67715c | 931 | r->start, r->end, r->name); |
a4c81cf6 YL |
932 | final_start = max(start, r->start); |
933 | final_end = min(end, r->end); | |
ab67715c YL |
934 | if (final_start >= final_end) { |
935 | printk(KERN_CONT "\n"); | |
a4c81cf6 | 936 | continue; |
ab67715c | 937 | } |
4fcc545a | 938 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", |
ab67715c | 939 | final_start, final_end); |
d2dbf343 | 940 | reserve_bootmem_generic(final_start, final_end - final_start, |
a4c81cf6 | 941 | BOOTMEM_DEFAULT); |
a4c81cf6 YL |
942 | } |
943 | } | |
944 | ||
945 | /* Check for already reserved areas */ | |
946 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | |
947 | { | |
948 | int i; | |
d3fbe5ea | 949 | u64 addr = *addrp; |
a4c81cf6 | 950 | int changed = 0; |
d3fbe5ea | 951 | struct early_res *r; |
a4c81cf6 | 952 | again: |
d3fbe5ea HY |
953 | i = find_overlapped_early(addr, addr + size); |
954 | r = &early_res[i]; | |
955 | if (i < MAX_EARLY_RES && r->end) { | |
956 | *addrp = addr = round_up(r->end, align); | |
957 | changed = 1; | |
958 | goto again; | |
a4c81cf6 YL |
959 | } |
960 | return changed; | |
961 | } | |
962 | ||
963 | /* Check for already reserved areas */ | |
964 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | |
965 | { | |
966 | int i; | |
967 | u64 addr = *addrp, last; | |
968 | u64 size = *sizep; | |
969 | int changed = 0; | |
970 | again: | |
971 | last = addr + size; | |
972 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
973 | struct early_res *r = &early_res[i]; | |
974 | if (last > r->start && addr < r->start) { | |
975 | size = r->start - addr; | |
976 | changed = 1; | |
977 | goto again; | |
978 | } | |
979 | if (last > r->end && addr < r->end) { | |
980 | addr = round_up(r->end, align); | |
981 | size = last - addr; | |
982 | changed = 1; | |
983 | goto again; | |
984 | } | |
985 | if (last <= r->end && addr >= r->start) { | |
986 | (*sizep)++; | |
987 | return 0; | |
988 | } | |
989 | } | |
990 | if (changed) { | |
991 | *addrp = addr; | |
992 | *sizep = size; | |
993 | } | |
994 | return changed; | |
995 | } | |
996 | ||
997 | /* | |
998 | * Find a free area with specified alignment in a specific range. | |
999 | */ | |
1000 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | |
1001 | { | |
1002 | int i; | |
1003 | ||
1004 | for (i = 0; i < e820.nr_map; i++) { | |
1005 | struct e820entry *ei = &e820.map[i]; | |
1006 | u64 addr, last; | |
1007 | u64 ei_last; | |
1008 | ||
1009 | if (ei->type != E820_RAM) | |
1010 | continue; | |
1011 | addr = round_up(ei->addr, align); | |
1012 | ei_last = ei->addr + ei->size; | |
1013 | if (addr < start) | |
1014 | addr = round_up(start, align); | |
1015 | if (addr >= ei_last) | |
1016 | continue; | |
1017 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | |
1018 | ; | |
1019 | last = addr + size; | |
1020 | if (last > ei_last) | |
1021 | continue; | |
1022 | if (last > end) | |
1023 | continue; | |
1024 | return addr; | |
1025 | } | |
1026 | return -1ULL; | |
1027 | } | |
1028 | ||
1029 | /* | |
1030 | * Find next free range after *start | |
1031 | */ | |
1032 | u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |
1033 | { | |
1034 | int i; | |
1035 | ||
1036 | for (i = 0; i < e820.nr_map; i++) { | |
1037 | struct e820entry *ei = &e820.map[i]; | |
1038 | u64 addr, last; | |
1039 | u64 ei_last; | |
1040 | ||
1041 | if (ei->type != E820_RAM) | |
1042 | continue; | |
1043 | addr = round_up(ei->addr, align); | |
1044 | ei_last = ei->addr + ei->size; | |
1045 | if (addr < start) | |
1046 | addr = round_up(start, align); | |
1047 | if (addr >= ei_last) | |
1048 | continue; | |
1049 | *sizep = ei_last - addr; | |
1050 | while (bad_addr_size(&addr, sizep, align) && | |
1051 | addr + *sizep <= ei_last) | |
1052 | ; | |
1053 | last = addr + *sizep; | |
1054 | if (last > ei_last) | |
1055 | continue; | |
1056 | return addr; | |
1057 | } | |
a4c81cf6 | 1058 | |
5c0e6f03 | 1059 | return -1ULL; |
a4c81cf6 | 1060 | } |
2944e16b YL |
1061 | |
1062 | /* | |
1063 | * pre allocated 4k and reserved it in e820 | |
1064 | */ | |
1065 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |
1066 | { | |
1067 | u64 size = 0; | |
1068 | u64 addr; | |
1069 | u64 start; | |
1070 | ||
61438766 | 1071 | for (start = startt; ; start += size) { |
2944e16b | 1072 | start = find_e820_area_size(start, &size, align); |
61438766 JB |
1073 | if (!(start + 1)) |
1074 | return 0; | |
1075 | if (size >= sizet) | |
1076 | break; | |
1077 | } | |
2944e16b | 1078 | |
5c0e6f03 JB |
1079 | #ifdef CONFIG_X86_32 |
1080 | if (start >= MAXMEM) | |
1081 | return 0; | |
1082 | if (start + size > MAXMEM) | |
1083 | size = MAXMEM - start; | |
1084 | #endif | |
1085 | ||
2944e16b | 1086 | addr = round_down(start + size - sizet, align); |
5c0e6f03 JB |
1087 | if (addr < start) |
1088 | return 0; | |
d0be6bde | 1089 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); |
fc9036ea | 1090 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); |
2944e16b YL |
1091 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); |
1092 | update_e820(); | |
fc9036ea | 1093 | update_e820_saved(); |
2944e16b YL |
1094 | |
1095 | return addr; | |
1096 | } | |
1097 | ||
ee0c80fa YL |
1098 | #ifdef CONFIG_X86_32 |
1099 | # ifdef CONFIG_X86_PAE | |
1100 | # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) | |
1101 | # else | |
1102 | # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) | |
1103 | # endif | |
1104 | #else /* CONFIG_X86_32 */ | |
bd70e522 | 1105 | # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT |
ee0c80fa YL |
1106 | #endif |
1107 | ||
ee0c80fa YL |
1108 | /* |
1109 | * Find the highest page frame number we have available | |
1110 | */ | |
f361a450 | 1111 | static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) |
ee0c80fa | 1112 | { |
2dc807b3 YL |
1113 | int i; |
1114 | unsigned long last_pfn = 0; | |
ee0c80fa YL |
1115 | unsigned long max_arch_pfn = MAX_ARCH_PFN; |
1116 | ||
2dc807b3 YL |
1117 | for (i = 0; i < e820.nr_map; i++) { |
1118 | struct e820entry *ei = &e820.map[i]; | |
f361a450 | 1119 | unsigned long start_pfn; |
2dc807b3 YL |
1120 | unsigned long end_pfn; |
1121 | ||
f361a450 | 1122 | if (ei->type != type) |
c22d4c18 | 1123 | continue; |
c22d4c18 | 1124 | |
f361a450 | 1125 | start_pfn = ei->addr >> PAGE_SHIFT; |
2dc807b3 | 1126 | end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; |
f361a450 YL |
1127 | |
1128 | if (start_pfn >= limit_pfn) | |
1129 | continue; | |
1130 | if (end_pfn > limit_pfn) { | |
1131 | last_pfn = limit_pfn; | |
1132 | break; | |
1133 | } | |
2dc807b3 YL |
1134 | if (end_pfn > last_pfn) |
1135 | last_pfn = end_pfn; | |
1136 | } | |
ee0c80fa YL |
1137 | |
1138 | if (last_pfn > max_arch_pfn) | |
1139 | last_pfn = max_arch_pfn; | |
ee0c80fa | 1140 | |
5dab8ec1 | 1141 | printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", |
ee0c80fa YL |
1142 | last_pfn, max_arch_pfn); |
1143 | return last_pfn; | |
1144 | } | |
f361a450 YL |
1145 | unsigned long __init e820_end_of_ram_pfn(void) |
1146 | { | |
1147 | return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); | |
1148 | } | |
ee0c80fa | 1149 | |
f361a450 YL |
1150 | unsigned long __init e820_end_of_low_ram_pfn(void) |
1151 | { | |
1152 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); | |
1153 | } | |
ee0c80fa YL |
1154 | /* |
1155 | * Finds an active region in the address range from start_pfn to last_pfn and | |
1156 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | |
1157 | */ | |
1158 | int __init e820_find_active_region(const struct e820entry *ei, | |
1159 | unsigned long start_pfn, | |
1160 | unsigned long last_pfn, | |
1161 | unsigned long *ei_startpfn, | |
1162 | unsigned long *ei_endpfn) | |
1163 | { | |
1164 | u64 align = PAGE_SIZE; | |
1165 | ||
1166 | *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | |
1167 | *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | |
1168 | ||
1169 | /* Skip map entries smaller than a page */ | |
1170 | if (*ei_startpfn >= *ei_endpfn) | |
1171 | return 0; | |
1172 | ||
1173 | /* Skip if map is outside the node */ | |
1174 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | |
1175 | *ei_startpfn >= last_pfn) | |
1176 | return 0; | |
1177 | ||
1178 | /* Check for overlaps */ | |
1179 | if (*ei_startpfn < start_pfn) | |
1180 | *ei_startpfn = start_pfn; | |
1181 | if (*ei_endpfn > last_pfn) | |
1182 | *ei_endpfn = last_pfn; | |
1183 | ||
ee0c80fa YL |
1184 | return 1; |
1185 | } | |
1186 | ||
1187 | /* Walk the e820 map and register active regions within a node */ | |
1188 | void __init e820_register_active_regions(int nid, unsigned long start_pfn, | |
1189 | unsigned long last_pfn) | |
1190 | { | |
1191 | unsigned long ei_startpfn; | |
1192 | unsigned long ei_endpfn; | |
1193 | int i; | |
1194 | ||
1195 | for (i = 0; i < e820.nr_map; i++) | |
1196 | if (e820_find_active_region(&e820.map[i], | |
1197 | start_pfn, last_pfn, | |
1198 | &ei_startpfn, &ei_endpfn)) | |
1199 | add_active_range(nid, ei_startpfn, ei_endpfn); | |
1200 | } | |
1201 | ||
1202 | /* | |
1203 | * Find the hole size (in bytes) in the memory range. | |
1204 | * @start: starting address of the memory range to scan | |
1205 | * @end: ending address of the memory range to scan | |
1206 | */ | |
1207 | u64 __init e820_hole_size(u64 start, u64 end) | |
1208 | { | |
1209 | unsigned long start_pfn = start >> PAGE_SHIFT; | |
1210 | unsigned long last_pfn = end >> PAGE_SHIFT; | |
1211 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | |
1212 | int i; | |
1213 | ||
1214 | for (i = 0; i < e820.nr_map; i++) { | |
1215 | if (e820_find_active_region(&e820.map[i], | |
1216 | start_pfn, last_pfn, | |
1217 | &ei_startpfn, &ei_endpfn)) | |
1218 | ram += ei_endpfn - ei_startpfn; | |
1219 | } | |
1220 | return end - start - ((u64)ram << PAGE_SHIFT); | |
1221 | } | |
ab4a465e YL |
1222 | |
1223 | static void early_panic(char *msg) | |
1224 | { | |
1225 | early_printk(msg); | |
1226 | panic(msg); | |
1227 | } | |
1228 | ||
69a7704d YL |
1229 | static int userdef __initdata; |
1230 | ||
ab4a465e YL |
1231 | /* "mem=nopentium" disables the 4MB page tables. */ |
1232 | static int __init parse_memopt(char *p) | |
1233 | { | |
1234 | u64 mem_size; | |
1235 | ||
1236 | if (!p) | |
1237 | return -EINVAL; | |
1238 | ||
1239 | #ifdef CONFIG_X86_32 | |
1240 | if (!strcmp(p, "nopentium")) { | |
1241 | setup_clear_cpu_cap(X86_FEATURE_PSE); | |
1242 | return 0; | |
1243 | } | |
1244 | #endif | |
1245 | ||
69a7704d | 1246 | userdef = 1; |
ab4a465e | 1247 | mem_size = memparse(p, &p); |
69a7704d | 1248 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
611dfd78 | 1249 | |
ab4a465e YL |
1250 | return 0; |
1251 | } | |
1252 | early_param("mem", parse_memopt); | |
1253 | ||
ab4a465e YL |
1254 | static int __init parse_memmap_opt(char *p) |
1255 | { | |
1256 | char *oldp; | |
1257 | u64 start_at, mem_size; | |
1258 | ||
a737abd1 CG |
1259 | if (!p) |
1260 | return -EINVAL; | |
1261 | ||
d6be118a | 1262 | if (!strncmp(p, "exactmap", 8)) { |
ab4a465e YL |
1263 | #ifdef CONFIG_CRASH_DUMP |
1264 | /* | |
1265 | * If we are doing a crash dump, we still need to know | |
1266 | * the real mem size before original memory map is | |
1267 | * reset. | |
1268 | */ | |
f361a450 | 1269 | saved_max_pfn = e820_end_of_ram_pfn(); |
ab4a465e YL |
1270 | #endif |
1271 | e820.nr_map = 0; | |
1272 | userdef = 1; | |
1273 | return 0; | |
1274 | } | |
1275 | ||
1276 | oldp = p; | |
1277 | mem_size = memparse(p, &p); | |
1278 | if (p == oldp) | |
1279 | return -EINVAL; | |
1280 | ||
1281 | userdef = 1; | |
1282 | if (*p == '@') { | |
1283 | start_at = memparse(p+1, &p); | |
d0be6bde | 1284 | e820_add_region(start_at, mem_size, E820_RAM); |
ab4a465e YL |
1285 | } else if (*p == '#') { |
1286 | start_at = memparse(p+1, &p); | |
d0be6bde | 1287 | e820_add_region(start_at, mem_size, E820_ACPI); |
ab4a465e YL |
1288 | } else if (*p == '$') { |
1289 | start_at = memparse(p+1, &p); | |
d0be6bde | 1290 | e820_add_region(start_at, mem_size, E820_RESERVED); |
7b479bec | 1291 | } else |
69a7704d | 1292 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
7b479bec | 1293 | |
ab4a465e YL |
1294 | return *p == '\0' ? 0 : -EINVAL; |
1295 | } | |
1296 | early_param("memmap", parse_memmap_opt); | |
1297 | ||
1298 | void __init finish_e820_parsing(void) | |
1299 | { | |
1300 | if (userdef) { | |
ba639039 | 1301 | u32 nr = e820.nr_map; |
ab4a465e YL |
1302 | |
1303 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) | |
1304 | early_panic("Invalid user supplied memory map"); | |
1305 | e820.nr_map = nr; | |
1306 | ||
1307 | printk(KERN_INFO "user-defined physical RAM map:\n"); | |
1308 | e820_print_map("user"); | |
1309 | } | |
1310 | } | |
41c094fd | 1311 | |
5dfcf14d BW |
1312 | static inline const char *e820_type_to_string(int e820_type) |
1313 | { | |
1314 | switch (e820_type) { | |
1315 | case E820_RESERVED_KERN: | |
1316 | case E820_RAM: return "System RAM"; | |
1317 | case E820_ACPI: return "ACPI Tables"; | |
1318 | case E820_NVS: return "ACPI Non-volatile Storage"; | |
671eef85 | 1319 | case E820_UNUSABLE: return "Unusable memory"; |
5dfcf14d BW |
1320 | default: return "reserved"; |
1321 | } | |
1322 | } | |
1323 | ||
41c094fd YL |
1324 | /* |
1325 | * Mark e820 reserved areas as busy for the resource manager. | |
1326 | */ | |
a5444d15 | 1327 | static struct resource __initdata *e820_res; |
41c094fd YL |
1328 | void __init e820_reserve_resources(void) |
1329 | { | |
1330 | int i; | |
58f7c988 | 1331 | struct resource *res; |
a5444d15 | 1332 | u64 end; |
41c094fd YL |
1333 | |
1334 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | |
58f7c988 | 1335 | e820_res = res; |
41c094fd | 1336 | for (i = 0; i < e820.nr_map; i++) { |
b4df32f4 | 1337 | end = e820.map[i].addr + e820.map[i].size - 1; |
8308c54d | 1338 | if (end != (resource_size_t)end) { |
41c094fd YL |
1339 | res++; |
1340 | continue; | |
1341 | } | |
5dfcf14d | 1342 | res->name = e820_type_to_string(e820.map[i].type); |
b4df32f4 YL |
1343 | res->start = e820.map[i].addr; |
1344 | res->end = end; | |
1345 | ||
1f987577 | 1346 | res->flags = IORESOURCE_MEM; |
a5444d15 IM |
1347 | |
1348 | /* | |
1349 | * don't register the region that could be conflicted with | |
1350 | * pci device BAR resource and insert them later in | |
1351 | * pcibios_resource_survey() | |
1352 | */ | |
1f987577 LT |
1353 | if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { |
1354 | res->flags |= IORESOURCE_BUSY; | |
58f7c988 | 1355 | insert_resource(&iomem_resource, res); |
1f987577 | 1356 | } |
41c094fd YL |
1357 | res++; |
1358 | } | |
5dfcf14d BW |
1359 | |
1360 | for (i = 0; i < e820_saved.nr_map; i++) { | |
1361 | struct e820entry *entry = &e820_saved.map[i]; | |
1362 | firmware_map_add_early(entry->addr, | |
1363 | entry->addr + entry->size - 1, | |
1364 | e820_type_to_string(entry->type)); | |
1365 | } | |
41c094fd YL |
1366 | } |
1367 | ||
45fbe3ee LT |
1368 | /* How much should we pad RAM ending depending on where it is? */ |
1369 | static unsigned long ram_alignment(resource_size_t pos) | |
1370 | { | |
1371 | unsigned long mb = pos >> 20; | |
1372 | ||
1373 | /* To 64kB in the first megabyte */ | |
1374 | if (!mb) | |
1375 | return 64*1024; | |
1376 | ||
1377 | /* To 1MB in the first 16MB */ | |
1378 | if (mb < 16) | |
1379 | return 1024*1024; | |
1380 | ||
1381 | /* To 32MB for anything above that */ | |
1382 | return 32*1024*1024; | |
1383 | } | |
1384 | ||
7c5371c4 YL |
1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) |
1386 | ||
58f7c988 YL |
1387 | void __init e820_reserve_resources_late(void) |
1388 | { | |
1389 | int i; | |
1390 | struct resource *res; | |
1391 | ||
1392 | res = e820_res; | |
1393 | for (i = 0; i < e820.nr_map; i++) { | |
a5444d15 | 1394 | if (!res->parent && res->end) |
1f987577 | 1395 | insert_resource_expand_to_fit(&iomem_resource, res); |
58f7c988 YL |
1396 | res++; |
1397 | } | |
45fbe3ee LT |
1398 | |
1399 | /* | |
1400 | * Try to bump up RAM regions to reasonable boundaries to | |
1401 | * avoid stolen RAM: | |
1402 | */ | |
1403 | for (i = 0; i < e820.nr_map; i++) { | |
7c5371c4 YL |
1404 | struct e820entry *entry = &e820.map[i]; |
1405 | u64 start, end; | |
45fbe3ee LT |
1406 | |
1407 | if (entry->type != E820_RAM) | |
1408 | continue; | |
1409 | start = entry->addr + entry->size; | |
7c5371c4 YL |
1410 | end = round_up(start, ram_alignment(start)) - 1; |
1411 | if (end > MAX_RESOURCE_SIZE) | |
1412 | end = MAX_RESOURCE_SIZE; | |
1413 | if (start >= end) | |
45fbe3ee | 1414 | continue; |
7c5371c4 YL |
1415 | reserve_region_with_split(&iomem_resource, start, end, |
1416 | "RAM buffer"); | |
45fbe3ee | 1417 | } |
58f7c988 YL |
1418 | } |
1419 | ||
95a71a45 | 1420 | char *__init default_machine_specific_memory_setup(void) |
064d25f1 YL |
1421 | { |
1422 | char *who = "BIOS-e820"; | |
ba639039 | 1423 | u32 new_nr; |
064d25f1 YL |
1424 | /* |
1425 | * Try to copy the BIOS-supplied E820-map. | |
1426 | * | |
1427 | * Otherwise fake a memory map; one section from 0k->640k, | |
1428 | * the next section from 1mb->appropriate_mem_k | |
1429 | */ | |
1430 | new_nr = boot_params.e820_entries; | |
1431 | sanitize_e820_map(boot_params.e820_map, | |
1432 | ARRAY_SIZE(boot_params.e820_map), | |
1433 | &new_nr); | |
1434 | boot_params.e820_entries = new_nr; | |
dc8e8120 YL |
1435 | if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) |
1436 | < 0) { | |
95a71a45 | 1437 | u64 mem_size; |
064d25f1 YL |
1438 | |
1439 | /* compare results from other methods and take the greater */ | |
1440 | if (boot_params.alt_mem_k | |
1441 | < boot_params.screen_info.ext_mem_k) { | |
1442 | mem_size = boot_params.screen_info.ext_mem_k; | |
1443 | who = "BIOS-88"; | |
1444 | } else { | |
1445 | mem_size = boot_params.alt_mem_k; | |
1446 | who = "BIOS-e801"; | |
1447 | } | |
1448 | ||
1449 | e820.nr_map = 0; | |
1450 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); | |
1451 | e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | |
064d25f1 YL |
1452 | } |
1453 | ||
1454 | /* In case someone cares... */ | |
1455 | return who; | |
1456 | } | |
1457 | ||
95a71a45 YL |
1458 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) |
1459 | { | |
3c9cb6de YL |
1460 | if (x86_quirks->arch_memory_setup) { |
1461 | char *who = x86_quirks->arch_memory_setup(); | |
3b33553b IM |
1462 | |
1463 | if (who) | |
1464 | return who; | |
1465 | } | |
95a71a45 YL |
1466 | return default_machine_specific_memory_setup(); |
1467 | } | |
1468 | ||
064d25f1 YL |
1469 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ |
1470 | char * __init __attribute__((weak)) memory_setup(void) | |
1471 | { | |
1472 | return machine_specific_memory_setup(); | |
1473 | } | |
1474 | ||
1475 | void __init setup_memory_map(void) | |
1476 | { | |
0be15526 YL |
1477 | char *who; |
1478 | ||
1479 | who = memory_setup(); | |
1480 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | |
064d25f1 | 1481 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
0be15526 | 1482 | e820_print_map(who); |
064d25f1 | 1483 | } |