Commit | Line | Data |
---|---|---|
b79cd8f1 YL |
1 | /* |
2 | * Handle the memory map. | |
3 | * The functions here do the job until bootmem takes over. | |
4 | * | |
5 | * Getting sanitize_e820_map() in sync with i386 version by applying change: | |
6 | * - Provisions for empty E820 memory regions (reported by certain BIOSes). | |
7 | * Alex Achenbach <xela@slit.de>, December 2002. | |
8 | * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | |
9 | * | |
10 | */ | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/types.h> | |
13 | #include <linux/init.h> | |
14 | #include <linux/bootmem.h> | |
15 | #include <linux/ioport.h> | |
16 | #include <linux/string.h> | |
17 | #include <linux/kexec.h> | |
18 | #include <linux/module.h> | |
19 | #include <linux/mm.h> | |
20 | #include <linux/pfn.h> | |
bf62f398 | 21 | #include <linux/suspend.h> |
5dfcf14d | 22 | #include <linux/firmware-map.h> |
b79cd8f1 YL |
23 | |
24 | #include <asm/pgtable.h> | |
25 | #include <asm/page.h> | |
26 | #include <asm/e820.h> | |
a4c81cf6 | 27 | #include <asm/proto.h> |
b79cd8f1 | 28 | #include <asm/setup.h> |
a4c81cf6 | 29 | #include <asm/trampoline.h> |
b79cd8f1 | 30 | |
5dfcf14d BW |
31 | /* |
32 | * The e820 map is the map that gets modified e.g. with command line parameters | |
33 | * and that is also registered with modifications in the kernel resource tree | |
34 | * with the iomem_resource as parent. | |
35 | * | |
36 | * The e820_saved is directly saved after the BIOS-provided memory map is | |
37 | * copied. It doesn't get modified afterwards. It's registered for the | |
38 | * /sys/firmware/memmap interface. | |
39 | * | |
40 | * That memory map is not modified and is used as base for kexec. The kexec'd | |
41 | * kernel should get the same memory map as the firmware provides. Then the | |
42 | * user can e.g. boot the original kernel with mem=1G while still booting the | |
43 | * next kernel with full memory. | |
44 | */ | |
b79cd8f1 | 45 | struct e820map e820; |
5dfcf14d | 46 | struct e820map e820_saved; |
b79cd8f1 YL |
47 | |
48 | /* For PCI or other memory-mapped resources */ | |
49 | unsigned long pci_mem_start = 0xaeedbabe; | |
50 | #ifdef CONFIG_PCI | |
51 | EXPORT_SYMBOL(pci_mem_start); | |
52 | #endif | |
53 | ||
54 | /* | |
55 | * This function checks if any part of the range <start,end> is mapped | |
56 | * with type. | |
57 | */ | |
58 | int | |
59 | e820_any_mapped(u64 start, u64 end, unsigned type) | |
60 | { | |
61 | int i; | |
62 | ||
63 | for (i = 0; i < e820.nr_map; i++) { | |
64 | struct e820entry *ei = &e820.map[i]; | |
65 | ||
66 | if (type && ei->type != type) | |
67 | continue; | |
68 | if (ei->addr >= end || ei->addr + ei->size <= start) | |
69 | continue; | |
70 | return 1; | |
71 | } | |
72 | return 0; | |
73 | } | |
74 | EXPORT_SYMBOL_GPL(e820_any_mapped); | |
75 | ||
76 | /* | |
77 | * This function checks if the entire range <start,end> is mapped with type. | |
78 | * | |
79 | * Note: this function only works correct if the e820 table is sorted and | |
80 | * not-overlapping, which is the case | |
81 | */ | |
82 | int __init e820_all_mapped(u64 start, u64 end, unsigned type) | |
83 | { | |
84 | int i; | |
85 | ||
86 | for (i = 0; i < e820.nr_map; i++) { | |
87 | struct e820entry *ei = &e820.map[i]; | |
88 | ||
89 | if (type && ei->type != type) | |
90 | continue; | |
91 | /* is the region (part) in overlap with the current region ?*/ | |
92 | if (ei->addr >= end || ei->addr + ei->size <= start) | |
93 | continue; | |
94 | ||
95 | /* if the region is at the beginning of <start,end> we move | |
96 | * start to the end of the region since it's ok until there | |
97 | */ | |
98 | if (ei->addr <= start) | |
99 | start = ei->addr + ei->size; | |
100 | /* | |
101 | * if start is now at or beyond end, we're done, full | |
102 | * coverage | |
103 | */ | |
104 | if (start >= end) | |
105 | return 1; | |
106 | } | |
107 | return 0; | |
108 | } | |
109 | ||
110 | /* | |
111 | * Add a memory region to the kernel e820 map. | |
112 | */ | |
d0be6bde | 113 | void __init e820_add_region(u64 start, u64 size, int type) |
b79cd8f1 YL |
114 | { |
115 | int x = e820.nr_map; | |
116 | ||
c3965bd1 | 117 | if (x == ARRAY_SIZE(e820.map)) { |
b79cd8f1 YL |
118 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
119 | return; | |
120 | } | |
121 | ||
122 | e820.map[x].addr = start; | |
123 | e820.map[x].size = size; | |
124 | e820.map[x].type = type; | |
125 | e820.nr_map++; | |
126 | } | |
127 | ||
128 | void __init e820_print_map(char *who) | |
129 | { | |
130 | int i; | |
131 | ||
132 | for (i = 0; i < e820.nr_map; i++) { | |
133 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | |
134 | (unsigned long long) e820.map[i].addr, | |
135 | (unsigned long long) | |
136 | (e820.map[i].addr + e820.map[i].size)); | |
137 | switch (e820.map[i].type) { | |
138 | case E820_RAM: | |
28bb2237 | 139 | case E820_RESERVED_KERN: |
b79cd8f1 YL |
140 | printk(KERN_CONT "(usable)\n"); |
141 | break; | |
142 | case E820_RESERVED: | |
143 | printk(KERN_CONT "(reserved)\n"); | |
144 | break; | |
145 | case E820_ACPI: | |
146 | printk(KERN_CONT "(ACPI data)\n"); | |
147 | break; | |
148 | case E820_NVS: | |
149 | printk(KERN_CONT "(ACPI NVS)\n"); | |
150 | break; | |
151 | default: | |
152 | printk(KERN_CONT "type %u\n", e820.map[i].type); | |
153 | break; | |
154 | } | |
155 | } | |
156 | } | |
157 | ||
158 | /* | |
159 | * Sanitize the BIOS e820 map. | |
160 | * | |
161 | * Some e820 responses include overlapping entries. The following | |
5b7eb2e9 PJ |
162 | * replaces the original e820 map with a new one, removing overlaps, |
163 | * and resolving conflicting memory types in favor of highest | |
164 | * numbered type. | |
b79cd8f1 | 165 | * |
5b7eb2e9 PJ |
166 | * The input parameter biosmap points to an array of 'struct |
167 | * e820entry' which on entry has elements in the range [0, *pnr_map) | |
168 | * valid, and which has space for up to max_nr_map entries. | |
169 | * On return, the resulting sanitized e820 map entries will be in | |
170 | * overwritten in the same location, starting at biosmap. | |
171 | * | |
172 | * The integer pointed to by pnr_map must be valid on entry (the | |
173 | * current number of valid entries located at biosmap) and will | |
174 | * be updated on return, with the new number of valid entries | |
175 | * (something no more than max_nr_map.) | |
176 | * | |
177 | * The return value from sanitize_e820_map() is zero if it | |
178 | * successfully 'sanitized' the map entries passed in, and is -1 | |
179 | * if it did nothing, which can happen if either of (1) it was | |
180 | * only passed one map entry, or (2) any of the input map entries | |
181 | * were invalid (start + size < start, meaning that the size was | |
182 | * so big the described memory range wrapped around through zero.) | |
183 | * | |
184 | * Visually we're performing the following | |
185 | * (1,2,3,4 = memory types)... | |
186 | * | |
187 | * Sample memory map (w/overlaps): | |
188 | * ____22__________________ | |
189 | * ______________________4_ | |
190 | * ____1111________________ | |
191 | * _44_____________________ | |
192 | * 11111111________________ | |
193 | * ____________________33__ | |
194 | * ___________44___________ | |
195 | * __________33333_________ | |
196 | * ______________22________ | |
197 | * ___________________2222_ | |
198 | * _________111111111______ | |
199 | * _____________________11_ | |
200 | * _________________4______ | |
201 | * | |
202 | * Sanitized equivalent (no overlap): | |
203 | * 1_______________________ | |
204 | * _44_____________________ | |
205 | * ___1____________________ | |
206 | * ____22__________________ | |
207 | * ______11________________ | |
208 | * _________1______________ | |
209 | * __________3_____________ | |
210 | * ___________44___________ | |
211 | * _____________33_________ | |
212 | * _______________2________ | |
213 | * ________________1_______ | |
214 | * _________________4______ | |
215 | * ___________________2____ | |
216 | * ____________________33__ | |
217 | * ______________________4_ | |
b79cd8f1 | 218 | */ |
5b7eb2e9 | 219 | |
c3965bd1 | 220 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, |
6e9bcc79 | 221 | int *pnr_map) |
b79cd8f1 YL |
222 | { |
223 | struct change_member { | |
224 | struct e820entry *pbios; /* pointer to original bios entry */ | |
225 | unsigned long long addr; /* address for this change point */ | |
226 | }; | |
157fabf0 PJ |
227 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; |
228 | static struct change_member *change_point[2*E820_X_MAX] __initdata; | |
229 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; | |
230 | static struct e820entry new_bios[E820_X_MAX] __initdata; | |
b79cd8f1 YL |
231 | struct change_member *change_tmp; |
232 | unsigned long current_type, last_type; | |
233 | unsigned long long last_addr; | |
234 | int chgidx, still_changing; | |
235 | int overlap_entries; | |
236 | int new_bios_entry; | |
237 | int old_nr, new_nr, chg_nr; | |
238 | int i; | |
239 | ||
b79cd8f1 YL |
240 | /* if there's only one memory region, don't bother */ |
241 | if (*pnr_map < 2) | |
242 | return -1; | |
243 | ||
244 | old_nr = *pnr_map; | |
6e9bcc79 | 245 | BUG_ON(old_nr > max_nr_map); |
b79cd8f1 YL |
246 | |
247 | /* bail out if we find any unreasonable addresses in bios map */ | |
248 | for (i = 0; i < old_nr; i++) | |
249 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | |
250 | return -1; | |
251 | ||
252 | /* create pointers for initial change-point information (for sorting) */ | |
253 | for (i = 0; i < 2 * old_nr; i++) | |
254 | change_point[i] = &change_point_list[i]; | |
255 | ||
256 | /* record all known change-points (starting and ending addresses), | |
257 | omitting those that are for empty memory regions */ | |
258 | chgidx = 0; | |
259 | for (i = 0; i < old_nr; i++) { | |
260 | if (biosmap[i].size != 0) { | |
261 | change_point[chgidx]->addr = biosmap[i].addr; | |
262 | change_point[chgidx++]->pbios = &biosmap[i]; | |
263 | change_point[chgidx]->addr = biosmap[i].addr + | |
264 | biosmap[i].size; | |
265 | change_point[chgidx++]->pbios = &biosmap[i]; | |
266 | } | |
267 | } | |
268 | chg_nr = chgidx; | |
269 | ||
270 | /* sort change-point list by memory addresses (low -> high) */ | |
271 | still_changing = 1; | |
272 | while (still_changing) { | |
273 | still_changing = 0; | |
274 | for (i = 1; i < chg_nr; i++) { | |
275 | unsigned long long curaddr, lastaddr; | |
276 | unsigned long long curpbaddr, lastpbaddr; | |
277 | ||
278 | curaddr = change_point[i]->addr; | |
279 | lastaddr = change_point[i - 1]->addr; | |
280 | curpbaddr = change_point[i]->pbios->addr; | |
281 | lastpbaddr = change_point[i - 1]->pbios->addr; | |
282 | ||
283 | /* | |
284 | * swap entries, when: | |
285 | * | |
286 | * curaddr > lastaddr or | |
287 | * curaddr == lastaddr and curaddr == curpbaddr and | |
288 | * lastaddr != lastpbaddr | |
289 | */ | |
290 | if (curaddr < lastaddr || | |
291 | (curaddr == lastaddr && curaddr == curpbaddr && | |
292 | lastaddr != lastpbaddr)) { | |
293 | change_tmp = change_point[i]; | |
294 | change_point[i] = change_point[i-1]; | |
295 | change_point[i-1] = change_tmp; | |
296 | still_changing = 1; | |
297 | } | |
298 | } | |
299 | } | |
300 | ||
301 | /* create a new bios memory map, removing overlaps */ | |
302 | overlap_entries = 0; /* number of entries in the overlap table */ | |
303 | new_bios_entry = 0; /* index for creating new bios map entries */ | |
304 | last_type = 0; /* start with undefined memory type */ | |
305 | last_addr = 0; /* start with 0 as last starting address */ | |
306 | ||
307 | /* loop through change-points, determining affect on the new bios map */ | |
308 | for (chgidx = 0; chgidx < chg_nr; chgidx++) { | |
309 | /* keep track of all overlapping bios entries */ | |
310 | if (change_point[chgidx]->addr == | |
311 | change_point[chgidx]->pbios->addr) { | |
312 | /* | |
313 | * add map entry to overlap list (> 1 entry | |
314 | * implies an overlap) | |
315 | */ | |
316 | overlap_list[overlap_entries++] = | |
317 | change_point[chgidx]->pbios; | |
318 | } else { | |
319 | /* | |
320 | * remove entry from list (order independent, | |
321 | * so swap with last) | |
322 | */ | |
323 | for (i = 0; i < overlap_entries; i++) { | |
324 | if (overlap_list[i] == | |
325 | change_point[chgidx]->pbios) | |
326 | overlap_list[i] = | |
327 | overlap_list[overlap_entries-1]; | |
328 | } | |
329 | overlap_entries--; | |
330 | } | |
331 | /* | |
332 | * if there are overlapping entries, decide which | |
333 | * "type" to use (larger value takes precedence -- | |
334 | * 1=usable, 2,3,4,4+=unusable) | |
335 | */ | |
336 | current_type = 0; | |
337 | for (i = 0; i < overlap_entries; i++) | |
338 | if (overlap_list[i]->type > current_type) | |
339 | current_type = overlap_list[i]->type; | |
340 | /* | |
341 | * continue building up new bios map based on this | |
342 | * information | |
343 | */ | |
344 | if (current_type != last_type) { | |
345 | if (last_type != 0) { | |
346 | new_bios[new_bios_entry].size = | |
347 | change_point[chgidx]->addr - last_addr; | |
348 | /* | |
349 | * move forward only if the new size | |
350 | * was non-zero | |
351 | */ | |
352 | if (new_bios[new_bios_entry].size != 0) | |
353 | /* | |
354 | * no more space left for new | |
355 | * bios entries ? | |
356 | */ | |
c3965bd1 | 357 | if (++new_bios_entry >= max_nr_map) |
b79cd8f1 YL |
358 | break; |
359 | } | |
360 | if (current_type != 0) { | |
361 | new_bios[new_bios_entry].addr = | |
362 | change_point[chgidx]->addr; | |
363 | new_bios[new_bios_entry].type = current_type; | |
364 | last_addr = change_point[chgidx]->addr; | |
365 | } | |
366 | last_type = current_type; | |
367 | } | |
368 | } | |
369 | /* retain count for new bios entries */ | |
370 | new_nr = new_bios_entry; | |
371 | ||
372 | /* copy new bios mapping into original location */ | |
373 | memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | |
374 | *pnr_map = new_nr; | |
375 | ||
376 | return 0; | |
377 | } | |
378 | ||
dc8e8120 | 379 | static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) |
8c5beb50 HY |
380 | { |
381 | while (nr_map) { | |
382 | u64 start = biosmap->addr; | |
383 | u64 size = biosmap->size; | |
384 | u64 end = start + size; | |
385 | u32 type = biosmap->type; | |
386 | ||
387 | /* Overflow in 64 bits? Ignore the memory map. */ | |
388 | if (start > end) | |
389 | return -1; | |
390 | ||
391 | e820_add_region(start, size, type); | |
392 | ||
393 | biosmap++; | |
394 | nr_map--; | |
395 | } | |
396 | return 0; | |
397 | } | |
398 | ||
b79cd8f1 YL |
399 | /* |
400 | * Copy the BIOS e820 map into a safe place. | |
401 | * | |
402 | * Sanity-check it while we're at it.. | |
403 | * | |
404 | * If we're lucky and live on a modern system, the setup code | |
405 | * will have given us a memory map that we can use to properly | |
406 | * set up memory. If we aren't, we'll fake a memory map. | |
407 | */ | |
dc8e8120 | 408 | static int __init append_e820_map(struct e820entry *biosmap, int nr_map) |
b79cd8f1 YL |
409 | { |
410 | /* Only one memory region (or negative)? Ignore it */ | |
411 | if (nr_map < 2) | |
412 | return -1; | |
413 | ||
dc8e8120 | 414 | return __append_e820_map(biosmap, nr_map); |
b79cd8f1 YL |
415 | } |
416 | ||
fc9036ea YL |
417 | static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, |
418 | u64 size, unsigned old_type, | |
419 | unsigned new_type) | |
b79cd8f1 YL |
420 | { |
421 | int i; | |
422 | u64 real_updated_size = 0; | |
423 | ||
424 | BUG_ON(old_type == new_type); | |
425 | ||
232b957a YL |
426 | if (size > (ULLONG_MAX - start)) |
427 | size = ULLONG_MAX - start; | |
428 | ||
b79cd8f1 | 429 | for (i = 0; i < e820.nr_map; i++) { |
fc9036ea | 430 | struct e820entry *ei = &e820x->map[i]; |
b79cd8f1 YL |
431 | u64 final_start, final_end; |
432 | if (ei->type != old_type) | |
433 | continue; | |
434 | /* totally covered? */ | |
435 | if (ei->addr >= start && | |
436 | (ei->addr + ei->size) <= (start + size)) { | |
437 | ei->type = new_type; | |
438 | real_updated_size += ei->size; | |
439 | continue; | |
440 | } | |
441 | /* partially covered */ | |
442 | final_start = max(start, ei->addr); | |
443 | final_end = min(start + size, ei->addr + ei->size); | |
444 | if (final_start >= final_end) | |
445 | continue; | |
d0be6bde | 446 | e820_add_region(final_start, final_end - final_start, |
b79cd8f1 YL |
447 | new_type); |
448 | real_updated_size += final_end - final_start; | |
976dd4dc YL |
449 | |
450 | ei->size -= final_end - final_start; | |
451 | if (ei->addr < final_start) | |
452 | continue; | |
453 | ei->addr = final_end; | |
b79cd8f1 YL |
454 | } |
455 | return real_updated_size; | |
456 | } | |
457 | ||
fc9036ea YL |
458 | u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, |
459 | unsigned new_type) | |
460 | { | |
461 | return e820_update_range_map(&e820, start, size, old_type, new_type); | |
462 | } | |
463 | ||
464 | static u64 __init e820_update_range_saved(u64 start, u64 size, | |
465 | unsigned old_type, unsigned new_type) | |
466 | { | |
467 | return e820_update_range_map(&e820_saved, start, size, old_type, | |
468 | new_type); | |
469 | } | |
470 | ||
7a1fd986 YL |
471 | /* make e820 not cover the range */ |
472 | u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |
473 | int checktype) | |
474 | { | |
475 | int i; | |
476 | u64 real_removed_size = 0; | |
477 | ||
232b957a YL |
478 | if (size > (ULLONG_MAX - start)) |
479 | size = ULLONG_MAX - start; | |
480 | ||
7a1fd986 YL |
481 | for (i = 0; i < e820.nr_map; i++) { |
482 | struct e820entry *ei = &e820.map[i]; | |
483 | u64 final_start, final_end; | |
484 | ||
485 | if (checktype && ei->type != old_type) | |
486 | continue; | |
487 | /* totally covered? */ | |
488 | if (ei->addr >= start && | |
489 | (ei->addr + ei->size) <= (start + size)) { | |
490 | real_removed_size += ei->size; | |
491 | memset(ei, 0, sizeof(struct e820entry)); | |
492 | continue; | |
493 | } | |
494 | /* partially covered */ | |
495 | final_start = max(start, ei->addr); | |
496 | final_end = min(start + size, ei->addr + ei->size); | |
497 | if (final_start >= final_end) | |
498 | continue; | |
499 | real_removed_size += final_end - final_start; | |
500 | ||
501 | ei->size -= final_end - final_start; | |
502 | if (ei->addr < final_start) | |
503 | continue; | |
504 | ei->addr = final_end; | |
505 | } | |
506 | return real_removed_size; | |
507 | } | |
508 | ||
b79cd8f1 YL |
509 | void __init update_e820(void) |
510 | { | |
6e9bcc79 | 511 | int nr_map; |
b79cd8f1 YL |
512 | |
513 | nr_map = e820.nr_map; | |
c3965bd1 | 514 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) |
b79cd8f1 YL |
515 | return; |
516 | e820.nr_map = nr_map; | |
517 | printk(KERN_INFO "modified physical RAM map:\n"); | |
518 | e820_print_map("modified"); | |
519 | } | |
fc9036ea YL |
520 | static void __init update_e820_saved(void) |
521 | { | |
522 | int nr_map; | |
523 | ||
524 | nr_map = e820_saved.nr_map; | |
525 | if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) | |
526 | return; | |
527 | e820_saved.nr_map = nr_map; | |
528 | } | |
fd6493e1 | 529 | #define MAX_GAP_END 0x100000000ull |
b79cd8f1 | 530 | /* |
fd6493e1 | 531 | * Search for a gap in the e820 memory space from start_addr to end_addr. |
b79cd8f1 | 532 | */ |
3381959d | 533 | __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, |
fd6493e1 | 534 | unsigned long start_addr, unsigned long long end_addr) |
b79cd8f1 | 535 | { |
fd6493e1 | 536 | unsigned long long last; |
3381959d | 537 | int i = e820.nr_map; |
b79cd8f1 YL |
538 | int found = 0; |
539 | ||
fd6493e1 AK |
540 | last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; |
541 | ||
b79cd8f1 YL |
542 | while (--i >= 0) { |
543 | unsigned long long start = e820.map[i].addr; | |
544 | unsigned long long end = start + e820.map[i].size; | |
545 | ||
3381959d AK |
546 | if (end < start_addr) |
547 | continue; | |
548 | ||
b79cd8f1 YL |
549 | /* |
550 | * Since "last" is at most 4GB, we know we'll | |
551 | * fit in 32 bits if this condition is true | |
552 | */ | |
553 | if (last > end) { | |
554 | unsigned long gap = last - end; | |
555 | ||
3381959d AK |
556 | if (gap >= *gapsize) { |
557 | *gapsize = gap; | |
558 | *gapstart = end; | |
b79cd8f1 YL |
559 | found = 1; |
560 | } | |
561 | } | |
562 | if (start < last) | |
563 | last = start; | |
564 | } | |
3381959d AK |
565 | return found; |
566 | } | |
567 | ||
568 | /* | |
569 | * Search for the biggest gap in the low 32 bits of the e820 | |
570 | * memory space. We pass this space to PCI to assign MMIO resources | |
571 | * for hotplug or unconfigured devices in. | |
572 | * Hopefully the BIOS let enough space left. | |
573 | */ | |
574 | __init void e820_setup_gap(void) | |
575 | { | |
576 | unsigned long gapstart, gapsize, round; | |
577 | int found; | |
578 | ||
579 | gapstart = 0x10000000; | |
580 | gapsize = 0x400000; | |
fd6493e1 | 581 | found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); |
b79cd8f1 YL |
582 | |
583 | #ifdef CONFIG_X86_64 | |
584 | if (!found) { | |
c987d12f | 585 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; |
b79cd8f1 YL |
586 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " |
587 | "address range\n" | |
588 | KERN_ERR "PCI: Unassigned devices with 32bit resource " | |
589 | "registers may break!\n"); | |
590 | } | |
591 | #endif | |
592 | ||
593 | /* | |
594 | * See how much we want to round up: start off with | |
595 | * rounding to the next 1MB area. | |
596 | */ | |
597 | round = 0x100000; | |
598 | while ((gapsize >> 4) > round) | |
599 | round += round; | |
600 | /* Fun with two's complement */ | |
601 | pci_mem_start = (gapstart + round) & -round; | |
602 | ||
603 | printk(KERN_INFO | |
604 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | |
605 | pci_mem_start, gapstart, gapsize); | |
606 | } | |
607 | ||
8c5beb50 HY |
608 | /** |
609 | * Because of the size limitation of struct boot_params, only first | |
610 | * 128 E820 memory entries are passed to kernel via | |
611 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of | |
612 | * linked list of struct setup_data, which is parsed here. | |
613 | */ | |
614 | void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) | |
615 | { | |
616 | u32 map_len; | |
617 | int entries; | |
618 | struct e820entry *extmap; | |
619 | ||
620 | entries = sdata->len / sizeof(struct e820entry); | |
621 | map_len = sdata->len + sizeof(struct setup_data); | |
622 | if (map_len > PAGE_SIZE) | |
623 | sdata = early_ioremap(pa_data, map_len); | |
624 | extmap = (struct e820entry *)(sdata->data); | |
dc8e8120 | 625 | __append_e820_map(extmap, entries); |
8c5beb50 HY |
626 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
627 | if (map_len > PAGE_SIZE) | |
628 | early_iounmap(sdata, map_len); | |
629 | printk(KERN_INFO "extended physical RAM map:\n"); | |
630 | e820_print_map("extended"); | |
631 | } | |
632 | ||
bf62f398 YL |
633 | #if defined(CONFIG_X86_64) || \ |
634 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | |
635 | /** | |
636 | * Find the ranges of physical addresses that do not correspond to | |
637 | * e820 RAM areas and mark the corresponding pages as nosave for | |
638 | * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). | |
639 | * | |
640 | * This function requires the e820 map to be sorted and without any | |
641 | * overlapping entries and assumes the first e820 area to be RAM. | |
642 | */ | |
643 | void __init e820_mark_nosave_regions(unsigned long limit_pfn) | |
644 | { | |
645 | int i; | |
646 | unsigned long pfn; | |
647 | ||
648 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | |
649 | for (i = 1; i < e820.nr_map; i++) { | |
650 | struct e820entry *ei = &e820.map[i]; | |
651 | ||
652 | if (pfn < PFN_UP(ei->addr)) | |
653 | register_nosave_region(pfn, PFN_UP(ei->addr)); | |
654 | ||
655 | pfn = PFN_DOWN(ei->addr + ei->size); | |
28bb2237 | 656 | if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) |
bf62f398 YL |
657 | register_nosave_region(PFN_UP(ei->addr), pfn); |
658 | ||
659 | if (pfn >= limit_pfn) | |
660 | break; | |
661 | } | |
662 | } | |
663 | #endif | |
a4c81cf6 YL |
664 | |
665 | /* | |
666 | * Early reserved memory areas. | |
667 | */ | |
668 | #define MAX_EARLY_RES 20 | |
669 | ||
670 | struct early_res { | |
671 | u64 start, end; | |
672 | char name[16]; | |
c4ba1320 | 673 | char overlap_ok; |
a4c81cf6 YL |
674 | }; |
675 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | |
676 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | |
677 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | |
678 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | |
679 | #endif | |
680 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | |
681 | /* | |
682 | * But first pinch a few for the stack/trampoline stuff | |
683 | * FIXME: Don't need the extra page at 4K, but need to fix | |
684 | * trampoline before removing it. (see the GDT stuff) | |
685 | */ | |
686 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | |
687 | /* | |
688 | * Has to be in very low memory so we can execute | |
689 | * real-mode AP code. | |
690 | */ | |
691 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | |
692 | #endif | |
693 | {} | |
694 | }; | |
695 | ||
d3fbe5ea | 696 | static int __init find_overlapped_early(u64 start, u64 end) |
a4c81cf6 YL |
697 | { |
698 | int i; | |
699 | struct early_res *r; | |
d3fbe5ea | 700 | |
a4c81cf6 YL |
701 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { |
702 | r = &early_res[i]; | |
703 | if (end > r->start && start < r->end) | |
d3fbe5ea | 704 | break; |
a4c81cf6 | 705 | } |
d3fbe5ea HY |
706 | |
707 | return i; | |
708 | } | |
709 | ||
c4ba1320 PJ |
710 | /* |
711 | * Drop the i-th range from the early reservation map, | |
712 | * by copying any higher ranges down one over it, and | |
713 | * clearing what had been the last slot. | |
714 | */ | |
715 | static void __init drop_range(int i) | |
716 | { | |
717 | int j; | |
718 | ||
719 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | |
720 | ; | |
721 | ||
722 | memmove(&early_res[i], &early_res[i + 1], | |
723 | (j - 1 - i) * sizeof(struct early_res)); | |
724 | ||
725 | early_res[j - 1].end = 0; | |
726 | } | |
727 | ||
728 | /* | |
729 | * Split any existing ranges that: | |
730 | * 1) are marked 'overlap_ok', and | |
731 | * 2) overlap with the stated range [start, end) | |
732 | * into whatever portion (if any) of the existing range is entirely | |
733 | * below or entirely above the stated range. Drop the portion | |
734 | * of the existing range that overlaps with the stated range, | |
735 | * which will allow the caller of this routine to then add that | |
736 | * stated range without conflicting with any existing range. | |
737 | */ | |
738 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | |
739 | { | |
740 | int i; | |
741 | struct early_res *r; | |
742 | u64 lower_start, lower_end; | |
743 | u64 upper_start, upper_end; | |
744 | char name[16]; | |
745 | ||
746 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
747 | r = &early_res[i]; | |
748 | ||
749 | /* Continue past non-overlapping ranges */ | |
750 | if (end <= r->start || start >= r->end) | |
751 | continue; | |
752 | ||
753 | /* | |
754 | * Leave non-ok overlaps as is; let caller | |
755 | * panic "Overlapping early reservations" | |
756 | * when it hits this overlap. | |
757 | */ | |
758 | if (!r->overlap_ok) | |
759 | return; | |
760 | ||
761 | /* | |
762 | * We have an ok overlap. We will drop it from the early | |
763 | * reservation map, and add back in any non-overlapping | |
764 | * portions (lower or upper) as separate, overlap_ok, | |
765 | * non-overlapping ranges. | |
766 | */ | |
767 | ||
768 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | |
769 | strncpy(name, r->name, sizeof(name) - 1); | |
770 | ||
771 | lower_start = lower_end = 0; | |
772 | upper_start = upper_end = 0; | |
773 | if (r->start < start) { | |
774 | lower_start = r->start; | |
775 | lower_end = start; | |
776 | } | |
777 | if (r->end > end) { | |
778 | upper_start = end; | |
779 | upper_end = r->end; | |
780 | } | |
781 | ||
782 | /* 2. Drop the original ok overlapping range */ | |
783 | drop_range(i); | |
784 | ||
785 | i--; /* resume for-loop on copied down entry */ | |
786 | ||
787 | /* 3. Add back in any non-overlapping ranges. */ | |
788 | if (lower_end) | |
789 | reserve_early_overlap_ok(lower_start, lower_end, name); | |
790 | if (upper_end) | |
791 | reserve_early_overlap_ok(upper_start, upper_end, name); | |
792 | } | |
793 | } | |
794 | ||
795 | static void __init __reserve_early(u64 start, u64 end, char *name, | |
796 | int overlap_ok) | |
d3fbe5ea HY |
797 | { |
798 | int i; | |
799 | struct early_res *r; | |
800 | ||
801 | i = find_overlapped_early(start, end); | |
a4c81cf6 YL |
802 | if (i >= MAX_EARLY_RES) |
803 | panic("Too many early reservations"); | |
804 | r = &early_res[i]; | |
d3fbe5ea HY |
805 | if (r->end) |
806 | panic("Overlapping early reservations " | |
807 | "%llx-%llx %s to %llx-%llx %s\n", | |
808 | start, end - 1, name?name:"", r->start, | |
809 | r->end - 1, r->name); | |
a4c81cf6 YL |
810 | r->start = start; |
811 | r->end = end; | |
c4ba1320 | 812 | r->overlap_ok = overlap_ok; |
a4c81cf6 YL |
813 | if (name) |
814 | strncpy(r->name, name, sizeof(r->name) - 1); | |
815 | } | |
816 | ||
c4ba1320 PJ |
817 | /* |
818 | * A few early reservtations come here. | |
819 | * | |
820 | * The 'overlap_ok' in the name of this routine does -not- mean it | |
821 | * is ok for these reservations to overlap an earlier reservation. | |
822 | * Rather it means that it is ok for subsequent reservations to | |
823 | * overlap this one. | |
824 | * | |
825 | * Use this entry point to reserve early ranges when you are doing | |
826 | * so out of "Paranoia", reserving perhaps more memory than you need, | |
827 | * just in case, and don't mind a subsequent overlapping reservation | |
828 | * that is known to be needed. | |
829 | * | |
830 | * The drop_overlaps_that_are_ok() call here isn't really needed. | |
831 | * It would be needed if we had two colliding 'overlap_ok' | |
832 | * reservations, so that the second such would not panic on the | |
833 | * overlap with the first. We don't have any such as of this | |
834 | * writing, but might as well tolerate such if it happens in | |
835 | * the future. | |
836 | */ | |
837 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | |
838 | { | |
839 | drop_overlaps_that_are_ok(start, end); | |
840 | __reserve_early(start, end, name, 1); | |
841 | } | |
842 | ||
843 | /* | |
844 | * Most early reservations come here. | |
845 | * | |
846 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | |
847 | * 'overlap_ok' ranges, so that we can then reserve this memory | |
848 | * range without risk of panic'ing on an overlapping overlap_ok | |
849 | * early reservation. | |
850 | */ | |
851 | void __init reserve_early(u64 start, u64 end, char *name) | |
852 | { | |
853 | drop_overlaps_that_are_ok(start, end); | |
854 | __reserve_early(start, end, name, 0); | |
855 | } | |
856 | ||
a4c81cf6 YL |
857 | void __init free_early(u64 start, u64 end) |
858 | { | |
859 | struct early_res *r; | |
c4ba1320 | 860 | int i; |
a4c81cf6 | 861 | |
d3fbe5ea HY |
862 | i = find_overlapped_early(start, end); |
863 | r = &early_res[i]; | |
864 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | |
a4c81cf6 | 865 | panic("free_early on not reserved area: %llx-%llx!", |
d3fbe5ea | 866 | start, end - 1); |
a4c81cf6 | 867 | |
c4ba1320 | 868 | drop_range(i); |
a4c81cf6 YL |
869 | } |
870 | ||
871 | void __init early_res_to_bootmem(u64 start, u64 end) | |
872 | { | |
ab67715c | 873 | int i, count; |
a4c81cf6 | 874 | u64 final_start, final_end; |
ab67715c YL |
875 | |
876 | count = 0; | |
877 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | |
878 | count++; | |
879 | ||
5f1f2b3d YL |
880 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", |
881 | count, start, end); | |
ab67715c | 882 | for (i = 0; i < count; i++) { |
a4c81cf6 | 883 | struct early_res *r = &early_res[i]; |
4fcc545a | 884 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, |
ab67715c | 885 | r->start, r->end, r->name); |
a4c81cf6 YL |
886 | final_start = max(start, r->start); |
887 | final_end = min(end, r->end); | |
ab67715c YL |
888 | if (final_start >= final_end) { |
889 | printk(KERN_CONT "\n"); | |
a4c81cf6 | 890 | continue; |
ab67715c | 891 | } |
4fcc545a | 892 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", |
ab67715c | 893 | final_start, final_end); |
d2dbf343 | 894 | reserve_bootmem_generic(final_start, final_end - final_start, |
a4c81cf6 | 895 | BOOTMEM_DEFAULT); |
a4c81cf6 YL |
896 | } |
897 | } | |
898 | ||
899 | /* Check for already reserved areas */ | |
900 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | |
901 | { | |
902 | int i; | |
d3fbe5ea | 903 | u64 addr = *addrp; |
a4c81cf6 | 904 | int changed = 0; |
d3fbe5ea | 905 | struct early_res *r; |
a4c81cf6 | 906 | again: |
d3fbe5ea HY |
907 | i = find_overlapped_early(addr, addr + size); |
908 | r = &early_res[i]; | |
909 | if (i < MAX_EARLY_RES && r->end) { | |
910 | *addrp = addr = round_up(r->end, align); | |
911 | changed = 1; | |
912 | goto again; | |
a4c81cf6 YL |
913 | } |
914 | return changed; | |
915 | } | |
916 | ||
917 | /* Check for already reserved areas */ | |
918 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | |
919 | { | |
920 | int i; | |
921 | u64 addr = *addrp, last; | |
922 | u64 size = *sizep; | |
923 | int changed = 0; | |
924 | again: | |
925 | last = addr + size; | |
926 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | |
927 | struct early_res *r = &early_res[i]; | |
928 | if (last > r->start && addr < r->start) { | |
929 | size = r->start - addr; | |
930 | changed = 1; | |
931 | goto again; | |
932 | } | |
933 | if (last > r->end && addr < r->end) { | |
934 | addr = round_up(r->end, align); | |
935 | size = last - addr; | |
936 | changed = 1; | |
937 | goto again; | |
938 | } | |
939 | if (last <= r->end && addr >= r->start) { | |
940 | (*sizep)++; | |
941 | return 0; | |
942 | } | |
943 | } | |
944 | if (changed) { | |
945 | *addrp = addr; | |
946 | *sizep = size; | |
947 | } | |
948 | return changed; | |
949 | } | |
950 | ||
951 | /* | |
952 | * Find a free area with specified alignment in a specific range. | |
953 | */ | |
954 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | |
955 | { | |
956 | int i; | |
957 | ||
958 | for (i = 0; i < e820.nr_map; i++) { | |
959 | struct e820entry *ei = &e820.map[i]; | |
960 | u64 addr, last; | |
961 | u64 ei_last; | |
962 | ||
963 | if (ei->type != E820_RAM) | |
964 | continue; | |
965 | addr = round_up(ei->addr, align); | |
966 | ei_last = ei->addr + ei->size; | |
967 | if (addr < start) | |
968 | addr = round_up(start, align); | |
969 | if (addr >= ei_last) | |
970 | continue; | |
971 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | |
972 | ; | |
973 | last = addr + size; | |
974 | if (last > ei_last) | |
975 | continue; | |
976 | if (last > end) | |
977 | continue; | |
978 | return addr; | |
979 | } | |
980 | return -1ULL; | |
981 | } | |
982 | ||
983 | /* | |
984 | * Find next free range after *start | |
985 | */ | |
986 | u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |
987 | { | |
988 | int i; | |
989 | ||
990 | for (i = 0; i < e820.nr_map; i++) { | |
991 | struct e820entry *ei = &e820.map[i]; | |
992 | u64 addr, last; | |
993 | u64 ei_last; | |
994 | ||
995 | if (ei->type != E820_RAM) | |
996 | continue; | |
997 | addr = round_up(ei->addr, align); | |
998 | ei_last = ei->addr + ei->size; | |
999 | if (addr < start) | |
1000 | addr = round_up(start, align); | |
1001 | if (addr >= ei_last) | |
1002 | continue; | |
1003 | *sizep = ei_last - addr; | |
1004 | while (bad_addr_size(&addr, sizep, align) && | |
1005 | addr + *sizep <= ei_last) | |
1006 | ; | |
1007 | last = addr + *sizep; | |
1008 | if (last > ei_last) | |
1009 | continue; | |
1010 | return addr; | |
1011 | } | |
1012 | return -1UL; | |
1013 | ||
1014 | } | |
2944e16b YL |
1015 | |
1016 | /* | |
1017 | * pre allocated 4k and reserved it in e820 | |
1018 | */ | |
1019 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |
1020 | { | |
1021 | u64 size = 0; | |
1022 | u64 addr; | |
1023 | u64 start; | |
1024 | ||
1025 | start = startt; | |
1026 | while (size < sizet) | |
1027 | start = find_e820_area_size(start, &size, align); | |
1028 | ||
1029 | if (size < sizet) | |
1030 | return 0; | |
1031 | ||
1032 | addr = round_down(start + size - sizet, align); | |
d0be6bde | 1033 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); |
fc9036ea | 1034 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); |
2944e16b YL |
1035 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); |
1036 | update_e820(); | |
fc9036ea | 1037 | update_e820_saved(); |
2944e16b YL |
1038 | |
1039 | return addr; | |
1040 | } | |
1041 | ||
ee0c80fa YL |
1042 | #ifdef CONFIG_X86_32 |
1043 | # ifdef CONFIG_X86_PAE | |
1044 | # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) | |
1045 | # else | |
1046 | # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) | |
1047 | # endif | |
1048 | #else /* CONFIG_X86_32 */ | |
bd70e522 | 1049 | # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT |
ee0c80fa YL |
1050 | #endif |
1051 | ||
ee0c80fa YL |
1052 | /* |
1053 | * Find the highest page frame number we have available | |
1054 | */ | |
f361a450 | 1055 | static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) |
ee0c80fa | 1056 | { |
2dc807b3 YL |
1057 | int i; |
1058 | unsigned long last_pfn = 0; | |
ee0c80fa YL |
1059 | unsigned long max_arch_pfn = MAX_ARCH_PFN; |
1060 | ||
2dc807b3 YL |
1061 | for (i = 0; i < e820.nr_map; i++) { |
1062 | struct e820entry *ei = &e820.map[i]; | |
f361a450 | 1063 | unsigned long start_pfn; |
2dc807b3 YL |
1064 | unsigned long end_pfn; |
1065 | ||
f361a450 | 1066 | if (ei->type != type) |
c22d4c18 | 1067 | continue; |
c22d4c18 | 1068 | |
f361a450 | 1069 | start_pfn = ei->addr >> PAGE_SHIFT; |
2dc807b3 | 1070 | end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; |
f361a450 YL |
1071 | |
1072 | if (start_pfn >= limit_pfn) | |
1073 | continue; | |
1074 | if (end_pfn > limit_pfn) { | |
1075 | last_pfn = limit_pfn; | |
1076 | break; | |
1077 | } | |
2dc807b3 YL |
1078 | if (end_pfn > last_pfn) |
1079 | last_pfn = end_pfn; | |
1080 | } | |
ee0c80fa YL |
1081 | |
1082 | if (last_pfn > max_arch_pfn) | |
1083 | last_pfn = max_arch_pfn; | |
ee0c80fa | 1084 | |
5dab8ec1 | 1085 | printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", |
ee0c80fa YL |
1086 | last_pfn, max_arch_pfn); |
1087 | return last_pfn; | |
1088 | } | |
f361a450 YL |
1089 | unsigned long __init e820_end_of_ram_pfn(void) |
1090 | { | |
1091 | return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); | |
1092 | } | |
ee0c80fa | 1093 | |
f361a450 YL |
1094 | unsigned long __init e820_end_of_low_ram_pfn(void) |
1095 | { | |
1096 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); | |
1097 | } | |
ee0c80fa YL |
1098 | /* |
1099 | * Finds an active region in the address range from start_pfn to last_pfn and | |
1100 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | |
1101 | */ | |
1102 | int __init e820_find_active_region(const struct e820entry *ei, | |
1103 | unsigned long start_pfn, | |
1104 | unsigned long last_pfn, | |
1105 | unsigned long *ei_startpfn, | |
1106 | unsigned long *ei_endpfn) | |
1107 | { | |
1108 | u64 align = PAGE_SIZE; | |
1109 | ||
1110 | *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | |
1111 | *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | |
1112 | ||
1113 | /* Skip map entries smaller than a page */ | |
1114 | if (*ei_startpfn >= *ei_endpfn) | |
1115 | return 0; | |
1116 | ||
1117 | /* Skip if map is outside the node */ | |
1118 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | |
1119 | *ei_startpfn >= last_pfn) | |
1120 | return 0; | |
1121 | ||
1122 | /* Check for overlaps */ | |
1123 | if (*ei_startpfn < start_pfn) | |
1124 | *ei_startpfn = start_pfn; | |
1125 | if (*ei_endpfn > last_pfn) | |
1126 | *ei_endpfn = last_pfn; | |
1127 | ||
ee0c80fa YL |
1128 | return 1; |
1129 | } | |
1130 | ||
1131 | /* Walk the e820 map and register active regions within a node */ | |
1132 | void __init e820_register_active_regions(int nid, unsigned long start_pfn, | |
1133 | unsigned long last_pfn) | |
1134 | { | |
1135 | unsigned long ei_startpfn; | |
1136 | unsigned long ei_endpfn; | |
1137 | int i; | |
1138 | ||
1139 | for (i = 0; i < e820.nr_map; i++) | |
1140 | if (e820_find_active_region(&e820.map[i], | |
1141 | start_pfn, last_pfn, | |
1142 | &ei_startpfn, &ei_endpfn)) | |
1143 | add_active_range(nid, ei_startpfn, ei_endpfn); | |
1144 | } | |
1145 | ||
1146 | /* | |
1147 | * Find the hole size (in bytes) in the memory range. | |
1148 | * @start: starting address of the memory range to scan | |
1149 | * @end: ending address of the memory range to scan | |
1150 | */ | |
1151 | u64 __init e820_hole_size(u64 start, u64 end) | |
1152 | { | |
1153 | unsigned long start_pfn = start >> PAGE_SHIFT; | |
1154 | unsigned long last_pfn = end >> PAGE_SHIFT; | |
1155 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | |
1156 | int i; | |
1157 | ||
1158 | for (i = 0; i < e820.nr_map; i++) { | |
1159 | if (e820_find_active_region(&e820.map[i], | |
1160 | start_pfn, last_pfn, | |
1161 | &ei_startpfn, &ei_endpfn)) | |
1162 | ram += ei_endpfn - ei_startpfn; | |
1163 | } | |
1164 | return end - start - ((u64)ram << PAGE_SHIFT); | |
1165 | } | |
ab4a465e YL |
1166 | |
1167 | static void early_panic(char *msg) | |
1168 | { | |
1169 | early_printk(msg); | |
1170 | panic(msg); | |
1171 | } | |
1172 | ||
69a7704d YL |
1173 | static int userdef __initdata; |
1174 | ||
ab4a465e YL |
1175 | /* "mem=nopentium" disables the 4MB page tables. */ |
1176 | static int __init parse_memopt(char *p) | |
1177 | { | |
1178 | u64 mem_size; | |
1179 | ||
1180 | if (!p) | |
1181 | return -EINVAL; | |
1182 | ||
1183 | #ifdef CONFIG_X86_32 | |
1184 | if (!strcmp(p, "nopentium")) { | |
1185 | setup_clear_cpu_cap(X86_FEATURE_PSE); | |
1186 | return 0; | |
1187 | } | |
1188 | #endif | |
1189 | ||
69a7704d | 1190 | userdef = 1; |
ab4a465e | 1191 | mem_size = memparse(p, &p); |
69a7704d | 1192 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
611dfd78 | 1193 | |
ab4a465e YL |
1194 | return 0; |
1195 | } | |
1196 | early_param("mem", parse_memopt); | |
1197 | ||
ab4a465e YL |
1198 | static int __init parse_memmap_opt(char *p) |
1199 | { | |
1200 | char *oldp; | |
1201 | u64 start_at, mem_size; | |
1202 | ||
a737abd1 CG |
1203 | if (!p) |
1204 | return -EINVAL; | |
1205 | ||
ab4a465e YL |
1206 | if (!strcmp(p, "exactmap")) { |
1207 | #ifdef CONFIG_CRASH_DUMP | |
1208 | /* | |
1209 | * If we are doing a crash dump, we still need to know | |
1210 | * the real mem size before original memory map is | |
1211 | * reset. | |
1212 | */ | |
f361a450 | 1213 | saved_max_pfn = e820_end_of_ram_pfn(); |
ab4a465e YL |
1214 | #endif |
1215 | e820.nr_map = 0; | |
1216 | userdef = 1; | |
1217 | return 0; | |
1218 | } | |
1219 | ||
1220 | oldp = p; | |
1221 | mem_size = memparse(p, &p); | |
1222 | if (p == oldp) | |
1223 | return -EINVAL; | |
1224 | ||
1225 | userdef = 1; | |
1226 | if (*p == '@') { | |
1227 | start_at = memparse(p+1, &p); | |
d0be6bde | 1228 | e820_add_region(start_at, mem_size, E820_RAM); |
ab4a465e YL |
1229 | } else if (*p == '#') { |
1230 | start_at = memparse(p+1, &p); | |
d0be6bde | 1231 | e820_add_region(start_at, mem_size, E820_ACPI); |
ab4a465e YL |
1232 | } else if (*p == '$') { |
1233 | start_at = memparse(p+1, &p); | |
d0be6bde | 1234 | e820_add_region(start_at, mem_size, E820_RESERVED); |
7b479bec | 1235 | } else |
69a7704d | 1236 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
7b479bec | 1237 | |
ab4a465e YL |
1238 | return *p == '\0' ? 0 : -EINVAL; |
1239 | } | |
1240 | early_param("memmap", parse_memmap_opt); | |
1241 | ||
1242 | void __init finish_e820_parsing(void) | |
1243 | { | |
1244 | if (userdef) { | |
1245 | int nr = e820.nr_map; | |
1246 | ||
1247 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) | |
1248 | early_panic("Invalid user supplied memory map"); | |
1249 | e820.nr_map = nr; | |
1250 | ||
1251 | printk(KERN_INFO "user-defined physical RAM map:\n"); | |
1252 | e820_print_map("user"); | |
1253 | } | |
1254 | } | |
41c094fd | 1255 | |
5dfcf14d BW |
1256 | static inline const char *e820_type_to_string(int e820_type) |
1257 | { | |
1258 | switch (e820_type) { | |
1259 | case E820_RESERVED_KERN: | |
1260 | case E820_RAM: return "System RAM"; | |
1261 | case E820_ACPI: return "ACPI Tables"; | |
1262 | case E820_NVS: return "ACPI Non-volatile Storage"; | |
1263 | default: return "reserved"; | |
1264 | } | |
1265 | } | |
1266 | ||
41c094fd YL |
1267 | /* |
1268 | * Mark e820 reserved areas as busy for the resource manager. | |
1269 | */ | |
1270 | void __init e820_reserve_resources(void) | |
1271 | { | |
1272 | int i; | |
1273 | struct resource *res; | |
b4df32f4 | 1274 | u64 end; |
41c094fd YL |
1275 | |
1276 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | |
1277 | for (i = 0; i < e820.nr_map; i++) { | |
b4df32f4 | 1278 | end = e820.map[i].addr + e820.map[i].size - 1; |
41c094fd | 1279 | #ifndef CONFIG_RESOURCES_64BIT |
b4df32f4 | 1280 | if (end > 0x100000000ULL) { |
41c094fd YL |
1281 | res++; |
1282 | continue; | |
1283 | } | |
1284 | #endif | |
5dfcf14d | 1285 | res->name = e820_type_to_string(e820.map[i].type); |
b4df32f4 YL |
1286 | res->start = e820.map[i].addr; |
1287 | res->end = end; | |
1288 | ||
41c094fd YL |
1289 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
1290 | insert_resource(&iomem_resource, res); | |
1291 | res++; | |
1292 | } | |
5dfcf14d BW |
1293 | |
1294 | for (i = 0; i < e820_saved.nr_map; i++) { | |
1295 | struct e820entry *entry = &e820_saved.map[i]; | |
1296 | firmware_map_add_early(entry->addr, | |
1297 | entry->addr + entry->size - 1, | |
1298 | e820_type_to_string(entry->type)); | |
1299 | } | |
41c094fd YL |
1300 | } |
1301 | ||
95a71a45 | 1302 | char *__init default_machine_specific_memory_setup(void) |
064d25f1 YL |
1303 | { |
1304 | char *who = "BIOS-e820"; | |
1305 | int new_nr; | |
1306 | /* | |
1307 | * Try to copy the BIOS-supplied E820-map. | |
1308 | * | |
1309 | * Otherwise fake a memory map; one section from 0k->640k, | |
1310 | * the next section from 1mb->appropriate_mem_k | |
1311 | */ | |
1312 | new_nr = boot_params.e820_entries; | |
1313 | sanitize_e820_map(boot_params.e820_map, | |
1314 | ARRAY_SIZE(boot_params.e820_map), | |
1315 | &new_nr); | |
1316 | boot_params.e820_entries = new_nr; | |
dc8e8120 YL |
1317 | if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) |
1318 | < 0) { | |
95a71a45 | 1319 | u64 mem_size; |
064d25f1 YL |
1320 | |
1321 | /* compare results from other methods and take the greater */ | |
1322 | if (boot_params.alt_mem_k | |
1323 | < boot_params.screen_info.ext_mem_k) { | |
1324 | mem_size = boot_params.screen_info.ext_mem_k; | |
1325 | who = "BIOS-88"; | |
1326 | } else { | |
1327 | mem_size = boot_params.alt_mem_k; | |
1328 | who = "BIOS-e801"; | |
1329 | } | |
1330 | ||
1331 | e820.nr_map = 0; | |
1332 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); | |
1333 | e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | |
064d25f1 YL |
1334 | } |
1335 | ||
1336 | /* In case someone cares... */ | |
1337 | return who; | |
1338 | } | |
1339 | ||
95a71a45 YL |
1340 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) |
1341 | { | |
3c9cb6de YL |
1342 | if (x86_quirks->arch_memory_setup) { |
1343 | char *who = x86_quirks->arch_memory_setup(); | |
3b33553b IM |
1344 | |
1345 | if (who) | |
1346 | return who; | |
1347 | } | |
95a71a45 YL |
1348 | return default_machine_specific_memory_setup(); |
1349 | } | |
1350 | ||
064d25f1 YL |
1351 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ |
1352 | char * __init __attribute__((weak)) memory_setup(void) | |
1353 | { | |
1354 | return machine_specific_memory_setup(); | |
1355 | } | |
1356 | ||
1357 | void __init setup_memory_map(void) | |
1358 | { | |
0be15526 YL |
1359 | char *who; |
1360 | ||
1361 | who = memory_setup(); | |
1362 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | |
064d25f1 | 1363 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
0be15526 | 1364 | e820_print_map(who); |
064d25f1 | 1365 | } |