Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * acpi_numa.c - ACPI NUMA support | |
3 | * | |
4 | * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> | |
5 | * | |
6 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
1da177e4 LT |
18 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
19 | * | |
20 | */ | |
21 | #include <linux/module.h> | |
1da177e4 LT |
22 | #include <linux/init.h> |
23 | #include <linux/kernel.h> | |
24 | #include <linux/types.h> | |
25 | #include <linux/errno.h> | |
26 | #include <linux/acpi.h> | |
b552a8c5 | 27 | #include <linux/numa.h> |
99759869 TK |
28 | #include <linux/nodemask.h> |
29 | #include <linux/topology.h> | |
1da177e4 | 30 | |
a192a958 LB |
31 | #define PREFIX "ACPI: " |
32 | ||
1da177e4 LT |
33 | #define ACPI_NUMA 0x80000000 |
34 | #define _COMPONENT ACPI_NUMA | |
f52fd66d | 35 | ACPI_MODULE_NAME("numa"); |
1da177e4 | 36 | |
762834e8 | 37 | static nodemask_t nodes_found_map = NODE_MASK_NONE; |
762834e8 YG |
38 | |
39 | /* maps to convert between proximity domain and logical node ID */ | |
ffada891 | 40 | static int pxm_to_node_map[MAX_PXM_DOMAINS] |
b552a8c5 | 41 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; |
ffada891 | 42 | static int node_to_pxm_map[MAX_NUMNODES] |
b552a8c5 | 43 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; |
762834e8 | 44 | |
8df0eb7c KG |
45 | unsigned char acpi_srat_revision __initdata; |
46 | ||
f363d16f | 47 | int pxm_to_node(int pxm) |
762834e8 YG |
48 | { |
49 | if (pxm < 0) | |
b552a8c5 | 50 | return NUMA_NO_NODE; |
762834e8 YG |
51 | return pxm_to_node_map[pxm]; |
52 | } | |
53 | ||
f363d16f | 54 | int node_to_pxm(int node) |
762834e8 YG |
55 | { |
56 | if (node < 0) | |
57 | return PXM_INVAL; | |
58 | return node_to_pxm_map[node]; | |
59 | } | |
60 | ||
d79ed248 | 61 | static void __acpi_map_pxm_to_node(int pxm, int node) |
3484d798 | 62 | { |
0f9b75ef DR |
63 | if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) |
64 | pxm_to_node_map[pxm] = node; | |
65 | if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) | |
66 | node_to_pxm_map[node] = pxm; | |
3484d798 DR |
67 | } |
68 | ||
8ff6f48d | 69 | int acpi_map_pxm_to_node(int pxm) |
762834e8 | 70 | { |
99759869 TK |
71 | int node; |
72 | ||
73 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) | |
74 | return NUMA_NO_NODE; | |
75 | ||
76 | node = pxm_to_node_map[pxm]; | |
762834e8 | 77 | |
1bb25df0 | 78 | if (node == NUMA_NO_NODE) { |
762834e8 | 79 | if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) |
b552a8c5 | 80 | return NUMA_NO_NODE; |
762834e8 | 81 | node = first_unset_node(nodes_found_map); |
3484d798 | 82 | __acpi_map_pxm_to_node(pxm, node); |
762834e8 YG |
83 | node_set(node, nodes_found_map); |
84 | } | |
85 | ||
86 | return node; | |
87 | } | |
88 | ||
99759869 TK |
89 | /** |
90 | * acpi_map_pxm_to_online_node - Map proximity ID to online node | |
91 | * @pxm: ACPI proximity ID | |
92 | * | |
93 | * This is similar to acpi_map_pxm_to_node(), but always returns an online | |
94 | * node. When the mapped node from a given proximity ID is offline, it | |
95 | * looks up the node distance table and returns the nearest online node. | |
96 | * | |
97 | * ACPI device drivers, which are called after the NUMA initialization has | |
98 | * completed in the kernel, can call this interface to obtain their device | |
99 | * NUMA topology from ACPI tables. Such drivers do not have to deal with | |
100 | * offline nodes. A node may be offline when a device proximity ID is | |
101 | * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. | |
102 | * "numa=off" on x86. | |
103 | */ | |
104 | int acpi_map_pxm_to_online_node(int pxm) | |
105 | { | |
106 | int node, n, dist, min_dist; | |
107 | ||
108 | node = acpi_map_pxm_to_node(pxm); | |
109 | ||
110 | if (node == NUMA_NO_NODE) | |
111 | node = 0; | |
112 | ||
113 | if (!node_online(node)) { | |
114 | min_dist = INT_MAX; | |
115 | for_each_online_node(n) { | |
116 | dist = node_distance(node, n); | |
117 | if (dist < min_dist) { | |
118 | min_dist = dist; | |
119 | node = n; | |
120 | } | |
121 | } | |
122 | } | |
123 | ||
124 | return node; | |
125 | } | |
126 | EXPORT_SYMBOL(acpi_map_pxm_to_online_node); | |
127 | ||
ae2c6dcf DR |
128 | static void __init |
129 | acpi_table_print_srat_entry(struct acpi_subtable_header *header) | |
1da177e4 LT |
130 | { |
131 | ||
4be44fcd | 132 | ACPI_FUNCTION_NAME("acpi_table_print_srat_entry"); |
1da177e4 LT |
133 | |
134 | if (!header) | |
135 | return; | |
136 | ||
137 | switch (header->type) { | |
138 | ||
15a58ed1 | 139 | case ACPI_SRAT_TYPE_CPU_AFFINITY: |
1da177e4 | 140 | #ifdef ACPI_DEBUG_OUTPUT |
4be44fcd | 141 | { |
15a58ed1 AS |
142 | struct acpi_srat_cpu_affinity *p = |
143 | (struct acpi_srat_cpu_affinity *)header; | |
4be44fcd LB |
144 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
145 | "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", | |
15a58ed1 AS |
146 | p->apic_id, p->local_sapic_eid, |
147 | p->proximity_domain_lo, | |
148 | (p->flags & ACPI_SRAT_CPU_ENABLED)? | |
149 | "enabled" : "disabled")); | |
4be44fcd LB |
150 | } |
151 | #endif /* ACPI_DEBUG_OUTPUT */ | |
1da177e4 LT |
152 | break; |
153 | ||
15a58ed1 | 154 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: |
1da177e4 | 155 | #ifdef ACPI_DEBUG_OUTPUT |
4be44fcd | 156 | { |
15a58ed1 AS |
157 | struct acpi_srat_mem_affinity *p = |
158 | (struct acpi_srat_mem_affinity *)header; | |
4be44fcd | 159 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
208f6cc9 | 160 | "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n", |
15a58ed1 AS |
161 | (unsigned long)p->base_address, |
162 | (unsigned long)p->length, | |
19d0cfe9 | 163 | p->proximity_domain, |
15a58ed1 AS |
164 | (p->flags & ACPI_SRAT_MEM_ENABLED)? |
165 | "enabled" : "disabled", | |
166 | (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)? | |
208f6cc9 DB |
167 | " hot-pluggable" : "", |
168 | (p->flags & ACPI_SRAT_MEM_NON_VOLATILE)? | |
169 | " non-volatile" : "")); | |
4be44fcd LB |
170 | } |
171 | #endif /* ACPI_DEBUG_OUTPUT */ | |
1da177e4 LT |
172 | break; |
173 | ||
7237d3de SS |
174 | case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: |
175 | #ifdef ACPI_DEBUG_OUTPUT | |
176 | { | |
177 | struct acpi_srat_x2apic_cpu_affinity *p = | |
178 | (struct acpi_srat_x2apic_cpu_affinity *)header; | |
179 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | |
180 | "SRAT Processor (x2apicid[0x%08x]) in" | |
181 | " proximity domain %d %s\n", | |
182 | p->apic_id, | |
183 | p->proximity_domain, | |
184 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? | |
185 | "enabled" : "disabled")); | |
186 | } | |
187 | #endif /* ACPI_DEBUG_OUTPUT */ | |
188 | break; | |
1da177e4 | 189 | default: |
4be44fcd LB |
190 | printk(KERN_WARNING PREFIX |
191 | "Found unsupported SRAT entry (type = 0x%x)\n", | |
192 | header->type); | |
1da177e4 LT |
193 | break; |
194 | } | |
195 | } | |
196 | ||
39b8931b FY |
197 | /* |
198 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | |
199 | * up the NUMA heuristics which wants the local node to have a smaller | |
200 | * distance than the others. | |
201 | * Do some quick checks here and only use the SLIT if it passes. | |
202 | */ | |
40e31856 | 203 | static int __init slit_valid(struct acpi_table_slit *slit) |
39b8931b FY |
204 | { |
205 | int i, j; | |
206 | int d = slit->locality_count; | |
207 | for (i = 0; i < d; i++) { | |
208 | for (j = 0; j < d; j++) { | |
209 | u8 val = slit->entry[d*i + j]; | |
210 | if (i == j) { | |
211 | if (val != LOCAL_DISTANCE) | |
212 | return 0; | |
213 | } else if (val <= LOCAL_DISTANCE) | |
214 | return 0; | |
215 | } | |
216 | } | |
217 | return 1; | |
218 | } | |
219 | ||
15a58ed1 | 220 | static int __init acpi_parse_slit(struct acpi_table_header *table) |
1da177e4 | 221 | { |
2fad9308 | 222 | struct acpi_table_slit *slit = (struct acpi_table_slit *)table; |
1da177e4 | 223 | |
39b8931b FY |
224 | if (!slit_valid(slit)) { |
225 | printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); | |
226 | return -EINVAL; | |
227 | } | |
1da177e4 LT |
228 | acpi_numa_slit_init(slit); |
229 | ||
230 | return 0; | |
231 | } | |
232 | ||
beffbe54 | 233 | void __init __weak |
7237d3de SS |
234 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) |
235 | { | |
236 | printk(KERN_WARNING PREFIX | |
237 | "Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id); | |
238 | return; | |
239 | } | |
240 | ||
241 | ||
242 | static int __init | |
243 | acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, | |
244 | const unsigned long end) | |
245 | { | |
246 | struct acpi_srat_x2apic_cpu_affinity *processor_affinity; | |
247 | ||
248 | processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; | |
249 | if (!processor_affinity) | |
250 | return -EINVAL; | |
251 | ||
252 | acpi_table_print_srat_entry(header); | |
253 | ||
254 | /* let architecture-dependent part to do it */ | |
255 | acpi_numa_x2apic_affinity_init(processor_affinity); | |
256 | ||
257 | return 0; | |
258 | } | |
259 | ||
1da177e4 | 260 | static int __init |
7237d3de | 261 | acpi_parse_processor_affinity(struct acpi_subtable_header *header, |
4be44fcd | 262 | const unsigned long end) |
1da177e4 | 263 | { |
15a58ed1 | 264 | struct acpi_srat_cpu_affinity *processor_affinity; |
1da177e4 | 265 | |
15a58ed1 | 266 | processor_affinity = (struct acpi_srat_cpu_affinity *)header; |
1da177e4 LT |
267 | if (!processor_affinity) |
268 | return -EINVAL; | |
269 | ||
270 | acpi_table_print_srat_entry(header); | |
271 | ||
272 | /* let architecture-dependent part to do it */ | |
273 | acpi_numa_processor_affinity_init(processor_affinity); | |
274 | ||
275 | return 0; | |
276 | } | |
277 | ||
095adbb6 TR |
278 | static int __initdata parsed_numa_memblks; |
279 | ||
1da177e4 | 280 | static int __init |
15a58ed1 | 281 | acpi_parse_memory_affinity(struct acpi_subtable_header * header, |
4be44fcd | 282 | const unsigned long end) |
1da177e4 | 283 | { |
15a58ed1 | 284 | struct acpi_srat_mem_affinity *memory_affinity; |
1da177e4 | 285 | |
15a58ed1 | 286 | memory_affinity = (struct acpi_srat_mem_affinity *)header; |
1da177e4 LT |
287 | if (!memory_affinity) |
288 | return -EINVAL; | |
289 | ||
290 | acpi_table_print_srat_entry(header); | |
291 | ||
292 | /* let architecture-dependent part to do it */ | |
095adbb6 TR |
293 | if (!acpi_numa_memory_affinity_init(memory_affinity)) |
294 | parsed_numa_memblks++; | |
1da177e4 LT |
295 | return 0; |
296 | } | |
297 | ||
15a58ed1 | 298 | static int __init acpi_parse_srat(struct acpi_table_header *table) |
1da177e4 | 299 | { |
2fad9308 | 300 | struct acpi_table_srat *srat = (struct acpi_table_srat *)table; |
1da177e4 | 301 | |
8df0eb7c KG |
302 | acpi_srat_revision = srat->header.revision; |
303 | ||
cfa806f0 | 304 | /* Real work done in acpi_table_parse_srat below. */ |
1da177e4 LT |
305 | |
306 | return 0; | |
307 | } | |
308 | ||
ae2c6dcf | 309 | static int __init |
15a58ed1 | 310 | acpi_table_parse_srat(enum acpi_srat_type id, |
b43e1065 | 311 | acpi_tbl_entry_handler handler, unsigned int max_entries) |
1da177e4 | 312 | { |
6eb87fed | 313 | return acpi_table_parse_entries(ACPI_SIG_SRAT, |
4be44fcd LB |
314 | sizeof(struct acpi_table_srat), id, |
315 | handler, max_entries); | |
1da177e4 LT |
316 | } |
317 | ||
20e6926d | 318 | int __init acpi_numa_init(void) |
e8d19552 | 319 | { |
20e6926d YL |
320 | int cnt = 0; |
321 | ||
d3bd0588 YL |
322 | /* |
323 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= | |
324 | * SRAT cpu entries could have different order with that in MADT. | |
325 | * So go over all cpu entries in SRAT to get apicid to node mapping. | |
326 | */ | |
8716273c | 327 | |
1da177e4 | 328 | /* SRAT: Static Resource Affinity Table */ |
7f8f97c3 | 329 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { |
702b07fc LA |
330 | struct acpi_subtable_proc srat_proc[2]; |
331 | ||
332 | memset(srat_proc, 0, sizeof(srat_proc)); | |
333 | srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; | |
334 | srat_proc[0].handler = acpi_parse_processor_affinity; | |
335 | srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY; | |
336 | srat_proc[1].handler = acpi_parse_x2apic_affinity; | |
337 | ||
338 | acpi_table_parse_entries_array(ACPI_SIG_SRAT, | |
339 | sizeof(struct acpi_table_srat), | |
340 | srat_proc, ARRAY_SIZE(srat_proc), 0); | |
341 | ||
20e6926d YL |
342 | cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, |
343 | acpi_parse_memory_affinity, | |
344 | NR_NODE_MEMBLKS); | |
1da177e4 LT |
345 | } |
346 | ||
347 | /* SLIT: System Locality Information Table */ | |
7f8f97c3 | 348 | acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); |
1da177e4 LT |
349 | |
350 | acpi_numa_arch_fixup(); | |
940fed2e | 351 | |
20e6926d YL |
352 | if (cnt < 0) |
353 | return cnt; | |
095adbb6 | 354 | else if (!parsed_numa_memblks) |
f3946fb6 | 355 | return -ENOENT; |
940fed2e | 356 | return 0; |
1da177e4 LT |
357 | } |
358 | ||
d79ed248 | 359 | static int acpi_get_pxm(acpi_handle h) |
1da177e4 | 360 | { |
27663c58 | 361 | unsigned long long pxm; |
1da177e4 LT |
362 | acpi_status status; |
363 | acpi_handle handle; | |
364 | acpi_handle phandle = h; | |
365 | ||
366 | do { | |
367 | handle = phandle; | |
368 | status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); | |
369 | if (ACPI_SUCCESS(status)) | |
50dd0969 | 370 | return pxm; |
1da177e4 | 371 | status = acpi_get_parent(handle, &phandle); |
4be44fcd | 372 | } while (ACPI_SUCCESS(status)); |
1da177e4 LT |
373 | return -1; |
374 | } | |
1e3590e2 | 375 | |
486c79b5 | 376 | int acpi_get_node(acpi_handle handle) |
1e3590e2 | 377 | { |
962fe9c9 | 378 | int pxm; |
1e3590e2 | 379 | |
1e3590e2 | 380 | pxm = acpi_get_pxm(handle); |
1e3590e2 | 381 | |
962fe9c9 | 382 | return acpi_map_pxm_to_node(pxm); |
1e3590e2 YG |
383 | } |
384 | EXPORT_SYMBOL(acpi_get_node); |