Commit | Line | Data |
---|---|---|
0332c2d4 ME |
1 | /* |
2 | * pseries CPU Hotplug infrastructure. | |
3 | * | |
413f7c40 ME |
4 | * Split out from arch/powerpc/platforms/pseries/setup.c |
5 | * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c | |
0332c2d4 ME |
6 | * |
7 | * Peter Bergner, IBM March 2001. | |
8 | * Copyright (C) 2001 IBM. | |
413f7c40 ME |
9 | * Dave Engebretsen, Peter Bergner, and |
10 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | |
11 | * Plus various changes from other IBM teams... | |
0332c2d4 ME |
12 | * |
13 | * Copyright (C) 2006 Michael Ellerman, IBM Corporation | |
14 | * | |
15 | * This program is free software; you can redistribute it and/or | |
16 | * modify it under the terms of the GNU General Public License | |
17 | * as published by the Free Software Foundation; either version | |
18 | * 2 of the License, or (at your option) any later version. | |
19 | */ | |
20 | ||
21 | #include <linux/kernel.h> | |
0b05ac6e | 22 | #include <linux/interrupt.h> |
0332c2d4 | 23 | #include <linux/delay.h> |
62fe91bb | 24 | #include <linux/sched.h> /* for idle_task_exit */ |
0332c2d4 | 25 | #include <linux/cpu.h> |
1cf3d8b3 | 26 | #include <linux/of.h> |
0332c2d4 ME |
27 | #include <asm/prom.h> |
28 | #include <asm/rtas.h> | |
29 | #include <asm/firmware.h> | |
30 | #include <asm/machdep.h> | |
31 | #include <asm/vdso_datapage.h> | |
0b05ac6e | 32 | #include <asm/xics.h> |
212bebb4 DD |
33 | #include <asm/plpar_wrappers.h> |
34 | ||
3aa565f5 | 35 | #include "offline_states.h" |
0332c2d4 ME |
36 | |
37 | /* This version can't take the spinlock, because it never returns */ | |
41dd03a9 | 38 | static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; |
0332c2d4 | 39 | |
3aa565f5 GS |
40 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = |
41 | CPU_STATE_OFFLINE; | |
42 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | |
43 | ||
44 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | |
45 | ||
46 | static int cede_offline_enabled __read_mostly = 1; | |
47 | ||
48 | /* | |
49 | * Enable/disable cede_offline when available. | |
50 | */ | |
51 | static int __init setup_cede_offline(char *str) | |
52 | { | |
53 | if (!strcmp(str, "off")) | |
54 | cede_offline_enabled = 0; | |
55 | else if (!strcmp(str, "on")) | |
56 | cede_offline_enabled = 1; | |
57 | else | |
58 | return 0; | |
59 | return 1; | |
60 | } | |
61 | ||
62 | __setup("cede_offline=", setup_cede_offline); | |
63 | ||
64 | enum cpu_state_vals get_cpu_current_state(int cpu) | |
65 | { | |
66 | return per_cpu(current_state, cpu); | |
67 | } | |
68 | ||
69 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | |
70 | { | |
71 | per_cpu(current_state, cpu) = state; | |
72 | } | |
73 | ||
74 | enum cpu_state_vals get_preferred_offline_state(int cpu) | |
75 | { | |
76 | return per_cpu(preferred_offline_state, cpu); | |
77 | } | |
78 | ||
79 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | |
80 | { | |
81 | per_cpu(preferred_offline_state, cpu) = state; | |
82 | } | |
83 | ||
84 | void set_default_offline_state(int cpu) | |
85 | { | |
86 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | |
87 | } | |
88 | ||
04da6af9 | 89 | static void rtas_stop_self(void) |
0332c2d4 | 90 | { |
4fb8d027 | 91 | static struct rtas_args args = { |
41dd03a9 | 92 | .nargs = 0, |
e36d1227 | 93 | .nret = cpu_to_be32(1), |
41dd03a9 TB |
94 | .rets = &args.args[0], |
95 | }; | |
0332c2d4 | 96 | |
4fb8d027 LZ |
97 | args.token = cpu_to_be32(rtas_stop_self_token); |
98 | ||
0332c2d4 ME |
99 | local_irq_disable(); |
100 | ||
41dd03a9 | 101 | BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); |
0332c2d4 ME |
102 | |
103 | printk("cpu %u (hwid %u) Ready to die...\n", | |
104 | smp_processor_id(), hard_smp_processor_id()); | |
41dd03a9 | 105 | enter_rtas(__pa(&args)); |
0332c2d4 ME |
106 | |
107 | panic("Alas, I survived.\n"); | |
108 | } | |
109 | ||
06ba30b6 | 110 | static void pseries_mach_cpu_die(void) |
04da6af9 | 111 | { |
3aa565f5 GS |
112 | unsigned int cpu = smp_processor_id(); |
113 | unsigned int hwcpu = hard_smp_processor_id(); | |
114 | u8 cede_latency_hint = 0; | |
115 | ||
04da6af9 ME |
116 | local_irq_disable(); |
117 | idle_task_exit(); | |
c3e8506c | 118 | xics_teardown_cpu(); |
3aa565f5 GS |
119 | |
120 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | |
121 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | |
32d8ad4e BK |
122 | if (ppc_md.suspend_disable_cpu) |
123 | ppc_md.suspend_disable_cpu(); | |
124 | ||
3aa565f5 GS |
125 | cede_latency_hint = 2; |
126 | ||
127 | get_lppaca()->idle = 1; | |
f13c13a0 | 128 | if (!lppaca_shared_proc(get_lppaca())) |
3aa565f5 GS |
129 | get_lppaca()->donate_dedicated_cpu = 1; |
130 | ||
3aa565f5 | 131 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
fb912568 LZ |
132 | while (!prep_irq_for_idle()) { |
133 | local_irq_enable(); | |
134 | local_irq_disable(); | |
135 | } | |
136 | ||
3aa565f5 | 137 | extended_cede_processor(cede_latency_hint); |
3aa565f5 GS |
138 | } |
139 | ||
fb912568 LZ |
140 | local_irq_disable(); |
141 | ||
f13c13a0 | 142 | if (!lppaca_shared_proc(get_lppaca())) |
3aa565f5 GS |
143 | get_lppaca()->donate_dedicated_cpu = 0; |
144 | get_lppaca()->idle = 0; | |
3aa565f5 | 145 | |
0212f260 | 146 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { |
598c8231 | 147 | unregister_slb_shadow(hwcpu); |
3aa565f5 | 148 | |
fb912568 | 149 | hard_irq_disable(); |
0212f260 VS |
150 | /* |
151 | * Call to start_secondary_resume() will not return. | |
152 | * Kernel stack will be reset and start_secondary() | |
153 | * will be called to continue the online operation. | |
154 | */ | |
155 | start_secondary_resume(); | |
156 | } | |
157 | } | |
3aa565f5 | 158 | |
0212f260 VS |
159 | /* Requested state is CPU_STATE_OFFLINE at this point */ |
160 | WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); | |
3aa565f5 | 161 | |
0212f260 | 162 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); |
598c8231 | 163 | unregister_slb_shadow(hwcpu); |
0212f260 | 164 | rtas_stop_self(); |
3aa565f5 | 165 | |
04da6af9 ME |
166 | /* Should never get here... */ |
167 | BUG(); | |
168 | for(;;); | |
169 | } | |
170 | ||
06ba30b6 | 171 | static int pseries_cpu_disable(void) |
413f7c40 ME |
172 | { |
173 | int cpu = smp_processor_id(); | |
174 | ||
ea0f1cab | 175 | set_cpu_online(cpu, false); |
413f7c40 ME |
176 | vdso_data->processorCount--; |
177 | ||
178 | /*fix boot_cpuid here*/ | |
179 | if (cpu == boot_cpuid) | |
8729faaa | 180 | boot_cpuid = cpumask_any(cpu_online_mask); |
413f7c40 ME |
181 | |
182 | /* FIXME: abstract this to not be platform specific later on */ | |
183 | xics_migrate_irqs_away(); | |
184 | return 0; | |
185 | } | |
186 | ||
3aa565f5 GS |
187 | /* |
188 | * pseries_cpu_die: Wait for the cpu to die. | |
189 | * @cpu: logical processor id of the CPU whose death we're awaiting. | |
190 | * | |
191 | * This function is called from the context of the thread which is performing | |
192 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | |
193 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | |
194 | * notifications. | |
195 | * | |
196 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | |
197 | * self-destruct. | |
198 | */ | |
06ba30b6 | 199 | static void pseries_cpu_die(unsigned int cpu) |
413f7c40 ME |
200 | { |
201 | int tries; | |
3aa565f5 | 202 | int cpu_status = 1; |
413f7c40 ME |
203 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
204 | ||
3aa565f5 GS |
205 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
206 | cpu_status = 1; | |
940ce422 | 207 | for (tries = 0; tries < 5000; tries++) { |
3aa565f5 GS |
208 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { |
209 | cpu_status = 0; | |
210 | break; | |
211 | } | |
940ce422 | 212 | msleep(1); |
3aa565f5 GS |
213 | } |
214 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | |
215 | ||
216 | for (tries = 0; tries < 25; tries++) { | |
f8b67691 MN |
217 | cpu_status = smp_query_cpu_stopped(pcpu); |
218 | if (cpu_status == QCSS_STOPPED || | |
219 | cpu_status == QCSS_HARDWARE_ERROR) | |
3aa565f5 GS |
220 | break; |
221 | cpu_relax(); | |
222 | } | |
413f7c40 | 223 | } |
3aa565f5 | 224 | |
413f7c40 ME |
225 | if (cpu_status != 0) { |
226 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | |
227 | cpu, pcpu, cpu_status); | |
228 | } | |
229 | ||
25985edc | 230 | /* Isolation and deallocation are definitely done by |
413f7c40 ME |
231 | * drslot_chrp_cpu. If they were not they would be |
232 | * done here. Change isolate state to Isolate and | |
233 | * change allocation-state to Unusable. | |
234 | */ | |
235 | paca[cpu].cpu_start = 0; | |
236 | } | |
237 | ||
238 | /* | |
828a6986 | 239 | * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle |
413f7c40 ME |
240 | * here is that a cpu device node may represent up to two logical cpus |
241 | * in the SMT case. We must honor the assumption in other code that | |
242 | * the logical ids for sibling SMT threads x and y are adjacent, such | |
243 | * that x^1 == y and y^1 == x. | |
244 | */ | |
06ba30b6 | 245 | static int pseries_add_processor(struct device_node *np) |
413f7c40 ME |
246 | { |
247 | unsigned int cpu; | |
8729faaa | 248 | cpumask_var_t candidate_mask, tmp; |
413f7c40 | 249 | int err = -ENOSPC, len, nthreads, i; |
d6f1e7ab | 250 | const __be32 *intserv; |
413f7c40 | 251 | |
e2eb6392 | 252 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
253 | if (!intserv) |
254 | return 0; | |
255 | ||
8729faaa AB |
256 | zalloc_cpumask_var(&candidate_mask, GFP_KERNEL); |
257 | zalloc_cpumask_var(&tmp, GFP_KERNEL); | |
258 | ||
413f7c40 ME |
259 | nthreads = len / sizeof(u32); |
260 | for (i = 0; i < nthreads; i++) | |
8729faaa | 261 | cpumask_set_cpu(i, tmp); |
413f7c40 | 262 | |
86ef5c9a | 263 | cpu_maps_update_begin(); |
413f7c40 | 264 | |
8729faaa | 265 | BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); |
413f7c40 ME |
266 | |
267 | /* Get a bitmap of unoccupied slots. */ | |
8729faaa AB |
268 | cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); |
269 | if (cpumask_empty(candidate_mask)) { | |
413f7c40 ME |
270 | /* If we get here, it most likely means that NR_CPUS is |
271 | * less than the partition's max processors setting. | |
272 | */ | |
273 | printk(KERN_ERR "Cannot add cpu %s; this system configuration" | |
274 | " supports %d logical cpus.\n", np->full_name, | |
8729faaa | 275 | cpumask_weight(cpu_possible_mask)); |
413f7c40 ME |
276 | goto out_unlock; |
277 | } | |
278 | ||
8729faaa AB |
279 | while (!cpumask_empty(tmp)) |
280 | if (cpumask_subset(tmp, candidate_mask)) | |
413f7c40 ME |
281 | /* Found a range where we can insert the new cpu(s) */ |
282 | break; | |
283 | else | |
8729faaa | 284 | cpumask_shift_left(tmp, tmp, nthreads); |
413f7c40 | 285 | |
8729faaa | 286 | if (cpumask_empty(tmp)) { |
828a6986 | 287 | printk(KERN_ERR "Unable to find space in cpu_present_mask for" |
413f7c40 ME |
288 | " processor %s with %d thread(s)\n", np->name, |
289 | nthreads); | |
290 | goto out_unlock; | |
291 | } | |
292 | ||
8729faaa | 293 | for_each_cpu(cpu, tmp) { |
104699c0 | 294 | BUG_ON(cpu_present(cpu)); |
ea0f1cab | 295 | set_cpu_present(cpu, true); |
d6f1e7ab | 296 | set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++)); |
413f7c40 ME |
297 | } |
298 | err = 0; | |
299 | out_unlock: | |
86ef5c9a | 300 | cpu_maps_update_done(); |
8729faaa AB |
301 | free_cpumask_var(candidate_mask); |
302 | free_cpumask_var(tmp); | |
413f7c40 ME |
303 | return err; |
304 | } | |
305 | ||
306 | /* | |
307 | * Update the present map for a cpu node which is going away, and set | |
308 | * the hard id in the paca(s) to -1 to be consistent with boot time | |
309 | * convention for non-present cpus. | |
310 | */ | |
06ba30b6 | 311 | static void pseries_remove_processor(struct device_node *np) |
413f7c40 ME |
312 | { |
313 | unsigned int cpu; | |
314 | int len, nthreads, i; | |
e36d1227 TF |
315 | const __be32 *intserv; |
316 | u32 thread; | |
413f7c40 | 317 | |
e2eb6392 | 318 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
319 | if (!intserv) |
320 | return; | |
321 | ||
322 | nthreads = len / sizeof(u32); | |
323 | ||
86ef5c9a | 324 | cpu_maps_update_begin(); |
413f7c40 | 325 | for (i = 0; i < nthreads; i++) { |
e36d1227 | 326 | thread = be32_to_cpu(intserv[i]); |
413f7c40 | 327 | for_each_present_cpu(cpu) { |
e36d1227 | 328 | if (get_hard_smp_processor_id(cpu) != thread) |
413f7c40 ME |
329 | continue; |
330 | BUG_ON(cpu_online(cpu)); | |
ea0f1cab | 331 | set_cpu_present(cpu, false); |
413f7c40 ME |
332 | set_hard_smp_processor_id(cpu, -1); |
333 | break; | |
334 | } | |
8729faaa | 335 | if (cpu >= nr_cpu_ids) |
413f7c40 | 336 | printk(KERN_WARNING "Could not find cpu to remove " |
e36d1227 | 337 | "with physical id 0x%x\n", thread); |
413f7c40 | 338 | } |
86ef5c9a | 339 | cpu_maps_update_done(); |
413f7c40 ME |
340 | } |
341 | ||
06ba30b6 ME |
342 | static int pseries_smp_notifier(struct notifier_block *nb, |
343 | unsigned long action, void *node) | |
413f7c40 | 344 | { |
de2780a3 | 345 | int err = 0; |
413f7c40 ME |
346 | |
347 | switch (action) { | |
1cf3d8b3 | 348 | case OF_RECONFIG_ATTACH_NODE: |
de2780a3 | 349 | err = pseries_add_processor(node); |
413f7c40 | 350 | break; |
1cf3d8b3 | 351 | case OF_RECONFIG_DETACH_NODE: |
06ba30b6 | 352 | pseries_remove_processor(node); |
413f7c40 | 353 | break; |
413f7c40 | 354 | } |
de2780a3 | 355 | return notifier_from_errno(err); |
413f7c40 ME |
356 | } |
357 | ||
06ba30b6 ME |
358 | static struct notifier_block pseries_smp_nb = { |
359 | .notifier_call = pseries_smp_notifier, | |
413f7c40 ME |
360 | }; |
361 | ||
3aa565f5 GS |
362 | #define MAX_CEDE_LATENCY_LEVELS 4 |
363 | #define CEDE_LATENCY_PARAM_LENGTH 10 | |
364 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | |
365 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | |
366 | #define CEDE_LATENCY_TOKEN 45 | |
367 | ||
368 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | |
369 | ||
370 | static int parse_cede_parameters(void) | |
371 | { | |
3aa565f5 | 372 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); |
20a8ab97 AB |
373 | return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
374 | NULL, | |
375 | CEDE_LATENCY_TOKEN, | |
376 | __pa(cede_parameters), | |
377 | CEDE_LATENCY_PARAM_MAX_LENGTH); | |
3aa565f5 GS |
378 | } |
379 | ||
0332c2d4 ME |
380 | static int __init pseries_cpu_hotplug_init(void) |
381 | { | |
64f27585 OJ |
382 | struct device_node *np; |
383 | const char *typep; | |
3aa565f5 | 384 | int cpu; |
f8b67691 | 385 | int qcss_tok; |
64f27585 OJ |
386 | |
387 | for_each_node_by_name(np, "interrupt-controller") { | |
388 | typep = of_get_property(np, "compatible", NULL); | |
389 | if (strstr(typep, "open-pic")) { | |
390 | of_node_put(np); | |
391 | ||
392 | printk(KERN_INFO "CPU Hotplug not supported on " | |
393 | "systems using MPIC\n"); | |
394 | return 0; | |
395 | } | |
396 | } | |
397 | ||
41dd03a9 | 398 | rtas_stop_self_token = rtas_token("stop-self"); |
674fa677 | 399 | qcss_tok = rtas_token("query-cpu-stopped-state"); |
0332c2d4 | 400 | |
41dd03a9 | 401 | if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE || |
674fa677 ME |
402 | qcss_tok == RTAS_UNKNOWN_SERVICE) { |
403 | printk(KERN_INFO "CPU Hotplug not supported by firmware " | |
404 | "- disabling.\n"); | |
405 | return 0; | |
406 | } | |
04da6af9 | 407 | |
06ba30b6 ME |
408 | ppc_md.cpu_die = pseries_mach_cpu_die; |
409 | smp_ops->cpu_disable = pseries_cpu_disable; | |
410 | smp_ops->cpu_die = pseries_cpu_die; | |
413f7c40 ME |
411 | |
412 | /* Processors can be added/removed only on LPAR */ | |
3aa565f5 | 413 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
1cf3d8b3 | 414 | of_reconfig_notifier_register(&pseries_smp_nb); |
3aa565f5 GS |
415 | cpu_maps_update_begin(); |
416 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | |
417 | default_offline_state = CPU_STATE_INACTIVE; | |
418 | for_each_online_cpu(cpu) | |
419 | set_default_offline_state(cpu); | |
420 | } | |
421 | cpu_maps_update_done(); | |
422 | } | |
413f7c40 | 423 | |
0332c2d4 ME |
424 | return 0; |
425 | } | |
d2a36071 | 426 | machine_arch_initcall(pseries, pseries_cpu_hotplug_init); |