Commit | Line | Data |
---|---|---|
55190f88 BH |
1 | /* |
2 | * PowerNV setup code. | |
3 | * | |
4 | * Copyright 2011 IBM Corp. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #undef DEBUG | |
13 | ||
14 | #include <linux/cpu.h> | |
15 | #include <linux/errno.h> | |
16 | #include <linux/sched.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/tty.h> | |
19 | #include <linux/reboot.h> | |
20 | #include <linux/init.h> | |
21 | #include <linux/console.h> | |
22 | #include <linux/delay.h> | |
23 | #include <linux/irq.h> | |
24 | #include <linux/seq_file.h> | |
25 | #include <linux/of.h> | |
26a2056e | 26 | #include <linux/of_fdt.h> |
55190f88 BH |
27 | #include <linux/interrupt.h> |
28 | #include <linux/bug.h> | |
cd15b048 | 29 | #include <linux/pci.h> |
fb5153d0 | 30 | #include <linux/cpufreq.h> |
55190f88 BH |
31 | |
32 | #include <asm/machdep.h> | |
33 | #include <asm/firmware.h> | |
34 | #include <asm/xics.h> | |
628daa8d | 35 | #include <asm/rtas.h> |
daea1175 | 36 | #include <asm/opal.h> |
13906db6 | 37 | #include <asm/kexec.h> |
b2a80878 | 38 | #include <asm/smp.h> |
7cba160a SP |
39 | #include <asm/cputhreads.h> |
40 | #include <asm/cpuidle.h> | |
41 | #include <asm/code-patching.h> | |
55190f88 BH |
42 | |
43 | #include "powernv.h" | |
44 | ||
45 | static void __init pnv_setup_arch(void) | |
46 | { | |
4817fc32 AB |
47 | set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); |
48 | ||
55190f88 BH |
49 | /* Initialize SMP */ |
50 | pnv_smp_init(); | |
51 | ||
61305a96 BH |
52 | /* Setup PCI */ |
53 | pnv_pci_init(); | |
55190f88 | 54 | |
628daa8d BH |
55 | /* Setup RTC and NVRAM callbacks */ |
56 | if (firmware_has_feature(FW_FEATURE_OPAL)) | |
57 | opal_nvram_init(); | |
55190f88 BH |
58 | |
59 | /* Enable NAP mode */ | |
60 | powersave_nap = 1; | |
61 | ||
62 | /* XXX PMCS */ | |
63 | } | |
64 | ||
65 | static void __init pnv_init_early(void) | |
66 | { | |
3fafe9c2 BH |
67 | /* |
68 | * Initialize the LPC bus now so that legacy serial | |
69 | * ports can be found on it | |
70 | */ | |
71 | opal_lpc_init(); | |
72 | ||
daea1175 BH |
73 | #ifdef CONFIG_HVC_OPAL |
74 | if (firmware_has_feature(FW_FEATURE_OPAL)) | |
75 | hvc_opal_init_early(); | |
76 | else | |
77 | #endif | |
78 | add_preferred_console("hvc", 0, NULL); | |
55190f88 BH |
79 | } |
80 | ||
81 | static void __init pnv_init_IRQ(void) | |
82 | { | |
83 | xics_init(); | |
84 | ||
85 | WARN_ON(!ppc_md.get_irq); | |
86 | } | |
87 | ||
88 | static void pnv_show_cpuinfo(struct seq_file *m) | |
89 | { | |
90 | struct device_node *root; | |
91 | const char *model = ""; | |
92 | ||
93 | root = of_find_node_by_path("/"); | |
94 | if (root) | |
95 | model = of_get_property(root, "model", NULL); | |
96 | seq_printf(m, "machine\t\t: PowerNV %s\n", model); | |
75b93da4 BH |
97 | if (firmware_has_feature(FW_FEATURE_OPALv3)) |
98 | seq_printf(m, "firmware\t: OPAL v3\n"); | |
99 | else if (firmware_has_feature(FW_FEATURE_OPALv2)) | |
14a43e69 BH |
100 | seq_printf(m, "firmware\t: OPAL v2\n"); |
101 | else if (firmware_has_feature(FW_FEATURE_OPAL)) | |
102 | seq_printf(m, "firmware\t: OPAL v1\n"); | |
103 | else | |
104 | seq_printf(m, "firmware\t: BML\n"); | |
55190f88 BH |
105 | of_node_put(root); |
106 | } | |
107 | ||
2196c6f1 VH |
108 | static void pnv_prepare_going_down(void) |
109 | { | |
110 | /* | |
111 | * Disable all notifiers from OPAL, we can't | |
112 | * service interrupts anymore anyway | |
113 | */ | |
114 | opal_notifier_disable(); | |
115 | ||
116 | /* Soft disable interrupts */ | |
117 | local_irq_disable(); | |
118 | ||
119 | /* | |
120 | * Return secondary CPUs to firwmare if a flash update | |
121 | * is pending otherwise we will get all sort of error | |
122 | * messages about CPU being stuck etc.. This will also | |
123 | * have the side effect of hard disabling interrupts so | |
124 | * past this point, the kernel is effectively dead. | |
125 | */ | |
126 | opal_flash_term_callback(); | |
127 | } | |
128 | ||
ec27329f | 129 | static void __noreturn pnv_restart(char *cmd) |
55190f88 | 130 | { |
ec27329f BH |
131 | long rc = OPAL_BUSY; |
132 | ||
2196c6f1 | 133 | pnv_prepare_going_down(); |
e8e71fa4 | 134 | |
ec27329f BH |
135 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { |
136 | rc = opal_cec_reboot(); | |
137 | if (rc == OPAL_BUSY_EVENT) | |
138 | opal_poll_events(NULL); | |
139 | else | |
140 | mdelay(10); | |
141 | } | |
142 | for (;;) | |
143 | opal_poll_events(NULL); | |
55190f88 BH |
144 | } |
145 | ||
ec27329f | 146 | static void __noreturn pnv_power_off(void) |
55190f88 | 147 | { |
ec27329f BH |
148 | long rc = OPAL_BUSY; |
149 | ||
2196c6f1 | 150 | pnv_prepare_going_down(); |
e8e71fa4 | 151 | |
ec27329f BH |
152 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { |
153 | rc = opal_cec_power_down(0); | |
154 | if (rc == OPAL_BUSY_EVENT) | |
155 | opal_poll_events(NULL); | |
156 | else | |
157 | mdelay(10); | |
158 | } | |
159 | for (;;) | |
160 | opal_poll_events(NULL); | |
55190f88 BH |
161 | } |
162 | ||
ec27329f | 163 | static void __noreturn pnv_halt(void) |
55190f88 | 164 | { |
ec27329f | 165 | pnv_power_off(); |
55190f88 BH |
166 | } |
167 | ||
628daa8d | 168 | static void pnv_progress(char *s, unsigned short hex) |
55190f88 BH |
169 | { |
170 | } | |
171 | ||
cd15b048 BH |
172 | static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) |
173 | { | |
174 | if (dev_is_pci(dev)) | |
175 | return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); | |
176 | return __dma_set_mask(dev, dma_mask); | |
177 | } | |
178 | ||
fe7e85c6 GS |
179 | static u64 pnv_dma_get_required_mask(struct device *dev) |
180 | { | |
181 | if (dev_is_pci(dev)) | |
182 | return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); | |
183 | ||
184 | return __dma_get_required_mask(dev); | |
185 | } | |
186 | ||
73ed148a BH |
187 | static void pnv_shutdown(void) |
188 | { | |
189 | /* Let the PCI code clear up IODA tables */ | |
190 | pnv_pci_shutdown(); | |
191 | ||
f7d98d18 VH |
192 | /* |
193 | * Stop OPAL activity: Unregister all OPAL interrupts so they | |
194 | * don't fire up while we kexec and make sure all potentially | |
195 | * DMA'ing ops are complete (such as dump retrieval). | |
73ed148a BH |
196 | */ |
197 | opal_shutdown(); | |
198 | } | |
199 | ||
628daa8d | 200 | #ifdef CONFIG_KEXEC |
298b34d7 BH |
201 | static void pnv_kexec_wait_secondaries_down(void) |
202 | { | |
203 | int my_cpu, i, notified = -1; | |
204 | ||
205 | my_cpu = get_cpu(); | |
206 | ||
207 | for_each_online_cpu(i) { | |
208 | uint8_t status; | |
209 | int64_t rc; | |
210 | ||
211 | if (i == my_cpu) | |
212 | continue; | |
213 | ||
214 | for (;;) { | |
215 | rc = opal_query_cpu_status(get_hard_smp_processor_id(i), | |
216 | &status); | |
217 | if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) | |
218 | break; | |
219 | barrier(); | |
220 | if (i != notified) { | |
221 | printk(KERN_INFO "kexec: waiting for cpu %d " | |
222 | "(physical %d) to enter OPAL\n", | |
223 | i, paca[i].hw_cpu_id); | |
224 | notified = i; | |
225 | } | |
226 | } | |
227 | } | |
228 | } | |
229 | ||
628daa8d | 230 | static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) |
55190f88 | 231 | { |
628daa8d | 232 | xics_kexec_teardown_cpu(secondary); |
13906db6 | 233 | |
298b34d7 BH |
234 | /* On OPAL v3, we return all CPUs to firmware */ |
235 | ||
236 | if (!firmware_has_feature(FW_FEATURE_OPALv3)) | |
237 | return; | |
238 | ||
239 | if (secondary) { | |
240 | /* Return secondary CPUs to firmware on OPAL v3 */ | |
13906db6 BH |
241 | mb(); |
242 | get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; | |
243 | mb(); | |
244 | ||
245 | /* Return the CPU to OPAL */ | |
246 | opal_return_cpu(); | |
298b34d7 BH |
247 | } else if (crash_shutdown) { |
248 | /* | |
249 | * On crash, we don't wait for secondaries to go | |
250 | * down as they might be unreachable or hung, so | |
251 | * instead we just wait a bit and move on. | |
252 | */ | |
253 | mdelay(1); | |
254 | } else { | |
255 | /* Primary waits for the secondaries to have reached OPAL */ | |
256 | pnv_kexec_wait_secondaries_down(); | |
13906db6 | 257 | } |
55190f88 | 258 | } |
628daa8d | 259 | #endif /* CONFIG_KEXEC */ |
55190f88 | 260 | |
6d97d7a2 AB |
261 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
262 | static unsigned long pnv_memory_block_size(void) | |
263 | { | |
264 | return 256UL * 1024 * 1024; | |
265 | } | |
266 | #endif | |
267 | ||
628daa8d | 268 | static void __init pnv_setup_machdep_opal(void) |
55190f88 | 269 | { |
628daa8d | 270 | ppc_md.get_boot_time = opal_get_boot_time; |
628daa8d | 271 | ppc_md.restart = pnv_restart; |
9178ba29 | 272 | pm_power_off = pnv_power_off; |
628daa8d | 273 | ppc_md.halt = pnv_halt; |
ed79ba9e | 274 | ppc_md.machine_check_exception = opal_machine_check; |
55672ecf | 275 | ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; |
0869b6fd MS |
276 | ppc_md.hmi_exception_early = opal_hmi_exception_early; |
277 | ppc_md.handle_hmi_exception = opal_handle_hmi_exception; | |
55190f88 BH |
278 | } |
279 | ||
628daa8d BH |
280 | #ifdef CONFIG_PPC_POWERNV_RTAS |
281 | static void __init pnv_setup_machdep_rtas(void) | |
55190f88 | 282 | { |
628daa8d BH |
283 | if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) { |
284 | ppc_md.get_boot_time = rtas_get_boot_time; | |
285 | ppc_md.get_rtc_time = rtas_get_rtc_time; | |
286 | ppc_md.set_rtc_time = rtas_set_rtc_time; | |
287 | } | |
288 | ppc_md.restart = rtas_restart; | |
9178ba29 | 289 | pm_power_off = rtas_power_off; |
628daa8d | 290 | ppc_md.halt = rtas_halt; |
55190f88 | 291 | } |
628daa8d | 292 | #endif /* CONFIG_PPC_POWERNV_RTAS */ |
55190f88 | 293 | |
8eb8ac89 SP |
294 | static u32 supported_cpuidle_states; |
295 | ||
7cba160a SP |
296 | static void pnv_alloc_idle_core_states(void) |
297 | { | |
298 | int i, j; | |
299 | int nr_cores = cpu_nr_cores(); | |
300 | u32 *core_idle_state; | |
301 | ||
302 | /* | |
303 | * core_idle_state - First 8 bits track the idle state of each thread | |
304 | * of the core. The 8th bit is the lock bit. Initially all thread bits | |
305 | * are set. They are cleared when the thread enters deep idle state | |
306 | * like sleep and winkle. Initially the lock bit is cleared. | |
307 | * The lock bit has 2 purposes | |
308 | * a. While the first thread is restoring core state, it prevents | |
309 | * other threads in the core from switching to process context. | |
310 | * b. While the last thread in the core is saving the core state, it | |
311 | * prevents a different thread from waking up. | |
312 | */ | |
313 | for (i = 0; i < nr_cores; i++) { | |
314 | int first_cpu = i * threads_per_core; | |
315 | int node = cpu_to_node(first_cpu); | |
316 | ||
317 | core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); | |
318 | *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; | |
319 | ||
320 | for (j = 0; j < threads_per_core; j++) { | |
321 | int cpu = first_cpu + j; | |
322 | ||
323 | paca[cpu].core_idle_state_ptr = core_idle_state; | |
324 | paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; | |
325 | paca[cpu].thread_mask = 1 << j; | |
326 | } | |
327 | } | |
328 | } | |
329 | ||
8eb8ac89 SP |
330 | u32 pnv_get_supported_cpuidle_states(void) |
331 | { | |
332 | return supported_cpuidle_states; | |
333 | } | |
7cba160a | 334 | EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); |
8eb8ac89 SP |
335 | |
336 | static int __init pnv_init_idle_states(void) | |
337 | { | |
338 | struct device_node *power_mgt; | |
339 | int dt_idle_states; | |
340 | const __be32 *idle_state_flags; | |
341 | u32 len_flags, flags; | |
342 | int i; | |
343 | ||
344 | supported_cpuidle_states = 0; | |
345 | ||
346 | if (cpuidle_disable != IDLE_NO_OVERRIDE) | |
347 | return 0; | |
348 | ||
349 | if (!firmware_has_feature(FW_FEATURE_OPALv3)) | |
350 | return 0; | |
351 | ||
352 | power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); | |
353 | if (!power_mgt) { | |
354 | pr_warn("opal: PowerMgmt Node not found\n"); | |
355 | return 0; | |
356 | } | |
357 | ||
358 | idle_state_flags = of_get_property(power_mgt, | |
359 | "ibm,cpu-idle-state-flags", &len_flags); | |
360 | if (!idle_state_flags) { | |
361 | pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); | |
362 | return 0; | |
363 | } | |
364 | ||
365 | dt_idle_states = len_flags / sizeof(u32); | |
366 | ||
367 | for (i = 0; i < dt_idle_states; i++) { | |
368 | flags = be32_to_cpu(idle_state_flags[i]); | |
369 | supported_cpuidle_states |= flags; | |
370 | } | |
7cba160a SP |
371 | if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { |
372 | patch_instruction( | |
373 | (unsigned int *)pnv_fastsleep_workaround_at_entry, | |
374 | PPC_INST_NOP); | |
375 | patch_instruction( | |
376 | (unsigned int *)pnv_fastsleep_workaround_at_exit, | |
377 | PPC_INST_NOP); | |
378 | } | |
379 | pnv_alloc_idle_core_states(); | |
8eb8ac89 SP |
380 | return 0; |
381 | } | |
382 | ||
383 | subsys_initcall(pnv_init_idle_states); | |
384 | ||
55190f88 BH |
385 | static int __init pnv_probe(void) |
386 | { | |
387 | unsigned long root = of_get_flat_dt_root(); | |
388 | ||
389 | if (!of_flat_dt_is_compatible(root, "ibm,powernv")) | |
390 | return 0; | |
391 | ||
392 | hpte_init_native(); | |
393 | ||
628daa8d BH |
394 | if (firmware_has_feature(FW_FEATURE_OPAL)) |
395 | pnv_setup_machdep_opal(); | |
396 | #ifdef CONFIG_PPC_POWERNV_RTAS | |
397 | else if (rtas.base) | |
398 | pnv_setup_machdep_rtas(); | |
399 | #endif /* CONFIG_PPC_POWERNV_RTAS */ | |
400 | ||
55190f88 BH |
401 | pr_debug("PowerNV detected !\n"); |
402 | ||
403 | return 1; | |
404 | } | |
405 | ||
fb5153d0 GS |
406 | /* |
407 | * Returns the cpu frequency for 'cpu' in Hz. This is used by | |
408 | * /proc/cpuinfo | |
409 | */ | |
e51df2c1 | 410 | static unsigned long pnv_get_proc_freq(unsigned int cpu) |
fb5153d0 GS |
411 | { |
412 | unsigned long ret_freq; | |
413 | ||
414 | ret_freq = cpufreq_quick_get(cpu) * 1000ul; | |
415 | ||
416 | /* | |
417 | * If the backend cpufreq driver does not exist, | |
418 | * then fallback to old way of reporting the clockrate. | |
419 | */ | |
420 | if (!ret_freq) | |
421 | ret_freq = ppc_proc_freq; | |
422 | return ret_freq; | |
423 | } | |
424 | ||
55190f88 BH |
425 | define_machine(powernv) { |
426 | .name = "PowerNV", | |
427 | .probe = pnv_probe, | |
55190f88 | 428 | .init_early = pnv_init_early, |
628daa8d | 429 | .setup_arch = pnv_setup_arch, |
55190f88 BH |
430 | .init_IRQ = pnv_init_IRQ, |
431 | .show_cpuinfo = pnv_show_cpuinfo, | |
fb5153d0 | 432 | .get_proc_freq = pnv_get_proc_freq, |
55190f88 | 433 | .progress = pnv_progress, |
73ed148a | 434 | .machine_shutdown = pnv_shutdown, |
591ac0cb | 435 | .power_save = power7_idle, |
55190f88 | 436 | .calibrate_decr = generic_calibrate_decr, |
cd15b048 | 437 | .dma_set_mask = pnv_dma_set_mask, |
fe7e85c6 | 438 | .dma_get_required_mask = pnv_dma_get_required_mask, |
55190f88 BH |
439 | #ifdef CONFIG_KEXEC |
440 | .kexec_cpu_down = pnv_kexec_cpu_down, | |
441 | #endif | |
6d97d7a2 AB |
442 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
443 | .memory_block_size = pnv_memory_block_size, | |
444 | #endif | |
55190f88 | 445 | }; |