[PATCH] x86: all cpu backtrace
[deliverable/linux.git] / arch / i386 / kernel / nmi.c
1 /*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
14 */
15
16 #include <linux/delay.h>
17 #include <linux/interrupt.h>
18 #include <linux/module.h>
19 #include <linux/nmi.h>
20 #include <linux/sysdev.h>
21 #include <linux/sysctl.h>
22 #include <linux/percpu.h>
23 #include <linux/dmi.h>
24 #include <linux/kprobes.h>
25 #include <linux/cpumask.h>
26
27 #include <asm/smp.h>
28 #include <asm/nmi.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
31
32 #include "mach_traps.h"
33
34 int unknown_nmi_panic;
35 int nmi_watchdog_enabled;
36
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
41 * things a little
42 */
43 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
45
46 static cpumask_t backtrace_mask = CPU_MASK_NONE;
47
48 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
49 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
50 */
51 #define NMI_MAX_COUNTER_BITS 66
52
53 /* nmi_active:
54 * >0: the lapic NMI watchdog is active, but can be disabled
55 * <0: the lapic NMI watchdog has not been set up, and cannot
56 * be enabled
57 * 0: the lapic NMI watchdog is disabled, but can be enabled
58 */
59 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
60
61 unsigned int nmi_watchdog = NMI_DEFAULT;
62 static unsigned int nmi_hz = HZ;
63
64 struct nmi_watchdog_ctlblk {
65 int enabled;
66 u64 check_bit;
67 unsigned int cccr_msr;
68 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
69 unsigned int evntsel_msr; /* the MSR to select the events to handle */
70 };
71 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
72
73 /* local prototypes */
74 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
75
76 extern void show_registers(struct pt_regs *regs);
77 extern int unknown_nmi_panic;
78
79 /* converts an msr to an appropriate reservation bit */
80 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
81 {
82 /* returns the bit offset of the performance counter register */
83 switch (boot_cpu_data.x86_vendor) {
84 case X86_VENDOR_AMD:
85 return (msr - MSR_K7_PERFCTR0);
86 case X86_VENDOR_INTEL:
87 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
88 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
89
90 switch (boot_cpu_data.x86) {
91 case 6:
92 return (msr - MSR_P6_PERFCTR0);
93 case 15:
94 return (msr - MSR_P4_BPU_PERFCTR0);
95 }
96 }
97 return 0;
98 }
99
100 /* converts an msr to an appropriate reservation bit */
101 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
102 {
103 /* returns the bit offset of the event selection register */
104 switch (boot_cpu_data.x86_vendor) {
105 case X86_VENDOR_AMD:
106 return (msr - MSR_K7_EVNTSEL0);
107 case X86_VENDOR_INTEL:
108 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
109 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
110
111 switch (boot_cpu_data.x86) {
112 case 6:
113 return (msr - MSR_P6_EVNTSEL0);
114 case 15:
115 return (msr - MSR_P4_BSU_ESCR0);
116 }
117 }
118 return 0;
119 }
120
121 /* checks for a bit availability (hack for oprofile) */
122 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
123 {
124 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
125
126 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
127 }
128
129 /* checks the an msr for availability */
130 int avail_to_resrv_perfctr_nmi(unsigned int msr)
131 {
132 unsigned int counter;
133
134 counter = nmi_perfctr_msr_to_bit(msr);
135 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
136
137 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
138 }
139
140 int reserve_perfctr_nmi(unsigned int msr)
141 {
142 unsigned int counter;
143
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
146
147 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
148 return 1;
149 return 0;
150 }
151
152 void release_perfctr_nmi(unsigned int msr)
153 {
154 unsigned int counter;
155
156 counter = nmi_perfctr_msr_to_bit(msr);
157 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
158
159 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
160 }
161
162 int reserve_evntsel_nmi(unsigned int msr)
163 {
164 unsigned int counter;
165
166 counter = nmi_evntsel_msr_to_bit(msr);
167 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
168
169 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
170 return 1;
171 return 0;
172 }
173
174 void release_evntsel_nmi(unsigned int msr)
175 {
176 unsigned int counter;
177
178 counter = nmi_evntsel_msr_to_bit(msr);
179 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
180
181 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
182 }
183
184 static __cpuinit inline int nmi_known_cpu(void)
185 {
186 switch (boot_cpu_data.x86_vendor) {
187 case X86_VENDOR_AMD:
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
189 case X86_VENDOR_INTEL:
190 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
191 return 1;
192 else
193 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
194 }
195 return 0;
196 }
197
198 #ifdef CONFIG_SMP
199 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
200 * the CPU is idle. To make sure the NMI watchdog really ticks on all
201 * CPUs during the test make them busy.
202 */
203 static __init void nmi_cpu_busy(void *data)
204 {
205 volatile int *endflag = data;
206 local_irq_enable_in_hardirq();
207 /* Intentionally don't use cpu_relax here. This is
208 to make sure that the performance counter really ticks,
209 even if there is a simulator or similar that catches the
210 pause instruction. On a real HT machine this is fine because
211 all other CPUs are busy with "useless" delay loops and don't
212 care if they get somewhat less cycles. */
213 while (*endflag == 0)
214 barrier();
215 }
216 #endif
217
218 static int __init check_nmi_watchdog(void)
219 {
220 volatile int endflag = 0;
221 unsigned int *prev_nmi_count;
222 int cpu;
223
224 /* Enable NMI watchdog for newer systems.
225 Probably safe on most older systems too, but let's be careful.
226 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
227 which hangs the system. Disable watchdog for all thinkpads */
228 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
229 !dmi_name_in_vendors("ThinkPad"))
230 nmi_watchdog = NMI_LOCAL_APIC;
231
232 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
233 return 0;
234
235 if (!atomic_read(&nmi_active))
236 return 0;
237
238 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
239 if (!prev_nmi_count)
240 return -1;
241
242 printk(KERN_INFO "Testing NMI watchdog ... ");
243
244 if (nmi_watchdog == NMI_LOCAL_APIC)
245 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
246
247 for_each_possible_cpu(cpu)
248 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
249 local_irq_enable();
250 mdelay((10*1000)/nmi_hz); // wait 10 ticks
251
252 for_each_possible_cpu(cpu) {
253 #ifdef CONFIG_SMP
254 /* Check cpu_callin_map here because that is set
255 after the timer is started. */
256 if (!cpu_isset(cpu, cpu_callin_map))
257 continue;
258 #endif
259 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
260 continue;
261 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
262 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
263 cpu,
264 prev_nmi_count[cpu],
265 nmi_count(cpu));
266 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
267 atomic_dec(&nmi_active);
268 }
269 }
270 if (!atomic_read(&nmi_active)) {
271 kfree(prev_nmi_count);
272 atomic_set(&nmi_active, -1);
273 return -1;
274 }
275 endflag = 1;
276 printk("OK.\n");
277
278 /* now that we know it works we can reduce NMI frequency to
279 something more reasonable; makes a difference in some configs */
280 if (nmi_watchdog == NMI_LOCAL_APIC) {
281 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
282
283 nmi_hz = 1;
284 /*
285 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
286 * are writable, with higher bits sign extending from bit 31.
287 * So, we can only program the counter with 31 bit values and
288 * 32nd bit should be 1, for 33.. to be 1.
289 * Find the appropriate nmi_hz
290 */
291 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
292 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
293 u64 count = (u64)cpu_khz * 1000;
294 do_div(count, 0x7fffffffUL);
295 nmi_hz = count + 1;
296 }
297 }
298
299 kfree(prev_nmi_count);
300 return 0;
301 }
302 /* This needs to happen later in boot so counters are working */
303 late_initcall(check_nmi_watchdog);
304
305 static int __init setup_nmi_watchdog(char *str)
306 {
307 int nmi;
308
309 get_option(&str, &nmi);
310
311 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
312 return 0;
313 /*
314 * If any other x86 CPU has a local APIC, then
315 * please test the NMI stuff there and send me the
316 * missing bits. Right now Intel P6/P4 and AMD K7 only.
317 */
318 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
319 return 0; /* no lapic support */
320 nmi_watchdog = nmi;
321 return 1;
322 }
323
324 __setup("nmi_watchdog=", setup_nmi_watchdog);
325
326 static void disable_lapic_nmi_watchdog(void)
327 {
328 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
329
330 if (atomic_read(&nmi_active) <= 0)
331 return;
332
333 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
334
335 BUG_ON(atomic_read(&nmi_active) != 0);
336 }
337
338 static void enable_lapic_nmi_watchdog(void)
339 {
340 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
341
342 /* are we already enabled */
343 if (atomic_read(&nmi_active) != 0)
344 return;
345
346 /* are we lapic aware */
347 if (nmi_known_cpu() <= 0)
348 return;
349
350 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
351 touch_nmi_watchdog();
352 }
353
354 void disable_timer_nmi_watchdog(void)
355 {
356 BUG_ON(nmi_watchdog != NMI_IO_APIC);
357
358 if (atomic_read(&nmi_active) <= 0)
359 return;
360
361 disable_irq(0);
362 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
363
364 BUG_ON(atomic_read(&nmi_active) != 0);
365 }
366
367 void enable_timer_nmi_watchdog(void)
368 {
369 BUG_ON(nmi_watchdog != NMI_IO_APIC);
370
371 if (atomic_read(&nmi_active) == 0) {
372 touch_nmi_watchdog();
373 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
374 enable_irq(0);
375 }
376 }
377
378 #ifdef CONFIG_PM
379
380 static int nmi_pm_active; /* nmi_active before suspend */
381
382 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
383 {
384 /* only CPU0 goes here, other CPUs should be offline */
385 nmi_pm_active = atomic_read(&nmi_active);
386 stop_apic_nmi_watchdog(NULL);
387 BUG_ON(atomic_read(&nmi_active) != 0);
388 return 0;
389 }
390
391 static int lapic_nmi_resume(struct sys_device *dev)
392 {
393 /* only CPU0 goes here, other CPUs should be offline */
394 if (nmi_pm_active > 0) {
395 setup_apic_nmi_watchdog(NULL);
396 touch_nmi_watchdog();
397 }
398 return 0;
399 }
400
401
402 static struct sysdev_class nmi_sysclass = {
403 set_kset_name("lapic_nmi"),
404 .resume = lapic_nmi_resume,
405 .suspend = lapic_nmi_suspend,
406 };
407
408 static struct sys_device device_lapic_nmi = {
409 .id = 0,
410 .cls = &nmi_sysclass,
411 };
412
413 static int __init init_lapic_nmi_sysfs(void)
414 {
415 int error;
416
417 /* should really be a BUG_ON but b/c this is an
418 * init call, it just doesn't work. -dcz
419 */
420 if (nmi_watchdog != NMI_LOCAL_APIC)
421 return 0;
422
423 if ( atomic_read(&nmi_active) < 0 )
424 return 0;
425
426 error = sysdev_class_register(&nmi_sysclass);
427 if (!error)
428 error = sysdev_register(&device_lapic_nmi);
429 return error;
430 }
431 /* must come after the local APIC's device_initcall() */
432 late_initcall(init_lapic_nmi_sysfs);
433
434 #endif /* CONFIG_PM */
435
436 /*
437 * Activate the NMI watchdog via the local APIC.
438 * Original code written by Keith Owens.
439 */
440
441 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
442 {
443 u64 count = (u64)cpu_khz * 1000;
444
445 do_div(count, nmi_hz);
446 if(descr)
447 Dprintk("setting %s to -0x%08Lx\n", descr, count);
448 wrmsrl(perfctr_msr, 0 - count);
449 }
450
451 /* Note that these events don't tick when the CPU idles. This means
452 the frequency varies with CPU load. */
453
454 #define K7_EVNTSEL_ENABLE (1 << 22)
455 #define K7_EVNTSEL_INT (1 << 20)
456 #define K7_EVNTSEL_OS (1 << 17)
457 #define K7_EVNTSEL_USR (1 << 16)
458 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
459 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
460
461 static int setup_k7_watchdog(void)
462 {
463 unsigned int perfctr_msr, evntsel_msr;
464 unsigned int evntsel;
465 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
466
467 perfctr_msr = MSR_K7_PERFCTR0;
468 evntsel_msr = MSR_K7_EVNTSEL0;
469 if (!reserve_perfctr_nmi(perfctr_msr))
470 goto fail;
471
472 if (!reserve_evntsel_nmi(evntsel_msr))
473 goto fail1;
474
475 wrmsrl(perfctr_msr, 0UL);
476
477 evntsel = K7_EVNTSEL_INT
478 | K7_EVNTSEL_OS
479 | K7_EVNTSEL_USR
480 | K7_NMI_EVENT;
481
482 /* setup the timer */
483 wrmsr(evntsel_msr, evntsel, 0);
484 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
485 apic_write(APIC_LVTPC, APIC_DM_NMI);
486 evntsel |= K7_EVNTSEL_ENABLE;
487 wrmsr(evntsel_msr, evntsel, 0);
488
489 wd->perfctr_msr = perfctr_msr;
490 wd->evntsel_msr = evntsel_msr;
491 wd->cccr_msr = 0; //unused
492 wd->check_bit = 1ULL<<63;
493 return 1;
494 fail1:
495 release_perfctr_nmi(perfctr_msr);
496 fail:
497 return 0;
498 }
499
500 static void stop_k7_watchdog(void)
501 {
502 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
503
504 wrmsr(wd->evntsel_msr, 0, 0);
505
506 release_evntsel_nmi(wd->evntsel_msr);
507 release_perfctr_nmi(wd->perfctr_msr);
508 }
509
510 #define P6_EVNTSEL0_ENABLE (1 << 22)
511 #define P6_EVNTSEL_INT (1 << 20)
512 #define P6_EVNTSEL_OS (1 << 17)
513 #define P6_EVNTSEL_USR (1 << 16)
514 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
515 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
516
517 static int setup_p6_watchdog(void)
518 {
519 unsigned int perfctr_msr, evntsel_msr;
520 unsigned int evntsel;
521 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
522
523 perfctr_msr = MSR_P6_PERFCTR0;
524 evntsel_msr = MSR_P6_EVNTSEL0;
525 if (!reserve_perfctr_nmi(perfctr_msr))
526 goto fail;
527
528 if (!reserve_evntsel_nmi(evntsel_msr))
529 goto fail1;
530
531 wrmsrl(perfctr_msr, 0UL);
532
533 evntsel = P6_EVNTSEL_INT
534 | P6_EVNTSEL_OS
535 | P6_EVNTSEL_USR
536 | P6_NMI_EVENT;
537
538 /* setup the timer */
539 wrmsr(evntsel_msr, evntsel, 0);
540 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
541 apic_write(APIC_LVTPC, APIC_DM_NMI);
542 evntsel |= P6_EVNTSEL0_ENABLE;
543 wrmsr(evntsel_msr, evntsel, 0);
544
545 wd->perfctr_msr = perfctr_msr;
546 wd->evntsel_msr = evntsel_msr;
547 wd->cccr_msr = 0; //unused
548 wd->check_bit = 1ULL<<39;
549 return 1;
550 fail1:
551 release_perfctr_nmi(perfctr_msr);
552 fail:
553 return 0;
554 }
555
556 static void stop_p6_watchdog(void)
557 {
558 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
559
560 wrmsr(wd->evntsel_msr, 0, 0);
561
562 release_evntsel_nmi(wd->evntsel_msr);
563 release_perfctr_nmi(wd->perfctr_msr);
564 }
565
566 /* Note that these events don't tick when the CPU idles. This means
567 the frequency varies with CPU load. */
568
569 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
570 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
571 #define P4_ESCR_OS (1<<3)
572 #define P4_ESCR_USR (1<<2)
573 #define P4_CCCR_OVF_PMI0 (1<<26)
574 #define P4_CCCR_OVF_PMI1 (1<<27)
575 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
576 #define P4_CCCR_COMPLEMENT (1<<19)
577 #define P4_CCCR_COMPARE (1<<18)
578 #define P4_CCCR_REQUIRED (3<<16)
579 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
580 #define P4_CCCR_ENABLE (1<<12)
581 #define P4_CCCR_OVF (1<<31)
582 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
583 CRU_ESCR0 (with any non-null event selector) through a complemented
584 max threshold. [IA32-Vol3, Section 14.9.9] */
585
586 static int setup_p4_watchdog(void)
587 {
588 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
589 unsigned int evntsel, cccr_val;
590 unsigned int misc_enable, dummy;
591 unsigned int ht_num;
592 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
593
594 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
595 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
596 return 0;
597
598 #ifdef CONFIG_SMP
599 /* detect which hyperthread we are on */
600 if (smp_num_siblings == 2) {
601 unsigned int ebx, apicid;
602
603 ebx = cpuid_ebx(1);
604 apicid = (ebx >> 24) & 0xff;
605 ht_num = apicid & 1;
606 } else
607 #endif
608 ht_num = 0;
609
610 /* performance counters are shared resources
611 * assign each hyperthread its own set
612 * (re-use the ESCR0 register, seems safe
613 * and keeps the cccr_val the same)
614 */
615 if (!ht_num) {
616 /* logical cpu 0 */
617 perfctr_msr = MSR_P4_IQ_PERFCTR0;
618 evntsel_msr = MSR_P4_CRU_ESCR0;
619 cccr_msr = MSR_P4_IQ_CCCR0;
620 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
621 } else {
622 /* logical cpu 1 */
623 perfctr_msr = MSR_P4_IQ_PERFCTR1;
624 evntsel_msr = MSR_P4_CRU_ESCR0;
625 cccr_msr = MSR_P4_IQ_CCCR1;
626 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
627 }
628
629 if (!reserve_perfctr_nmi(perfctr_msr))
630 goto fail;
631
632 if (!reserve_evntsel_nmi(evntsel_msr))
633 goto fail1;
634
635 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
636 | P4_ESCR_OS
637 | P4_ESCR_USR;
638
639 cccr_val |= P4_CCCR_THRESHOLD(15)
640 | P4_CCCR_COMPLEMENT
641 | P4_CCCR_COMPARE
642 | P4_CCCR_REQUIRED;
643
644 wrmsr(evntsel_msr, evntsel, 0);
645 wrmsr(cccr_msr, cccr_val, 0);
646 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
647 apic_write(APIC_LVTPC, APIC_DM_NMI);
648 cccr_val |= P4_CCCR_ENABLE;
649 wrmsr(cccr_msr, cccr_val, 0);
650 wd->perfctr_msr = perfctr_msr;
651 wd->evntsel_msr = evntsel_msr;
652 wd->cccr_msr = cccr_msr;
653 wd->check_bit = 1ULL<<39;
654 return 1;
655 fail1:
656 release_perfctr_nmi(perfctr_msr);
657 fail:
658 return 0;
659 }
660
661 static void stop_p4_watchdog(void)
662 {
663 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
664
665 wrmsr(wd->cccr_msr, 0, 0);
666 wrmsr(wd->evntsel_msr, 0, 0);
667
668 release_evntsel_nmi(wd->evntsel_msr);
669 release_perfctr_nmi(wd->perfctr_msr);
670 }
671
672 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
673 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
674
675 static int setup_intel_arch_watchdog(void)
676 {
677 unsigned int ebx;
678 union cpuid10_eax eax;
679 unsigned int unused;
680 unsigned int perfctr_msr, evntsel_msr;
681 unsigned int evntsel;
682 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
683
684 /*
685 * Check whether the Architectural PerfMon supports
686 * Unhalted Core Cycles Event or not.
687 * NOTE: Corresponding bit = 0 in ebx indicates event present.
688 */
689 cpuid(10, &(eax.full), &ebx, &unused, &unused);
690 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
691 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
692 goto fail;
693
694 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
695 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
696
697 if (!reserve_perfctr_nmi(perfctr_msr))
698 goto fail;
699
700 if (!reserve_evntsel_nmi(evntsel_msr))
701 goto fail1;
702
703 wrmsrl(perfctr_msr, 0UL);
704
705 evntsel = ARCH_PERFMON_EVENTSEL_INT
706 | ARCH_PERFMON_EVENTSEL_OS
707 | ARCH_PERFMON_EVENTSEL_USR
708 | ARCH_PERFMON_NMI_EVENT_SEL
709 | ARCH_PERFMON_NMI_EVENT_UMASK;
710
711 /* setup the timer */
712 wrmsr(evntsel_msr, evntsel, 0);
713 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
714 apic_write(APIC_LVTPC, APIC_DM_NMI);
715 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
716 wrmsr(evntsel_msr, evntsel, 0);
717
718 wd->perfctr_msr = perfctr_msr;
719 wd->evntsel_msr = evntsel_msr;
720 wd->cccr_msr = 0; //unused
721 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
722 return 1;
723 fail1:
724 release_perfctr_nmi(perfctr_msr);
725 fail:
726 return 0;
727 }
728
729 static void stop_intel_arch_watchdog(void)
730 {
731 unsigned int ebx;
732 union cpuid10_eax eax;
733 unsigned int unused;
734 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
735
736 /*
737 * Check whether the Architectural PerfMon supports
738 * Unhalted Core Cycles Event or not.
739 * NOTE: Corresponding bit = 0 in ebx indicates event present.
740 */
741 cpuid(10, &(eax.full), &ebx, &unused, &unused);
742 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
743 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
744 return;
745
746 wrmsr(wd->evntsel_msr, 0, 0);
747 release_evntsel_nmi(wd->evntsel_msr);
748 release_perfctr_nmi(wd->perfctr_msr);
749 }
750
751 void setup_apic_nmi_watchdog (void *unused)
752 {
753 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
754
755 /* only support LOCAL and IO APICs for now */
756 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
757 (nmi_watchdog != NMI_IO_APIC))
758 return;
759
760 if (wd->enabled == 1)
761 return;
762
763 /* cheap hack to support suspend/resume */
764 /* if cpu0 is not active neither should the other cpus */
765 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
766 return;
767
768 if (nmi_watchdog == NMI_LOCAL_APIC) {
769 switch (boot_cpu_data.x86_vendor) {
770 case X86_VENDOR_AMD:
771 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
772 return;
773 if (!setup_k7_watchdog())
774 return;
775 break;
776 case X86_VENDOR_INTEL:
777 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
778 if (!setup_intel_arch_watchdog())
779 return;
780 break;
781 }
782 switch (boot_cpu_data.x86) {
783 case 6:
784 if (boot_cpu_data.x86_model > 0xd)
785 return;
786
787 if (!setup_p6_watchdog())
788 return;
789 break;
790 case 15:
791 if (boot_cpu_data.x86_model > 0x4)
792 return;
793
794 if (!setup_p4_watchdog())
795 return;
796 break;
797 default:
798 return;
799 }
800 break;
801 default:
802 return;
803 }
804 }
805 wd->enabled = 1;
806 atomic_inc(&nmi_active);
807 }
808
809 void stop_apic_nmi_watchdog(void *unused)
810 {
811 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
812
813 /* only support LOCAL and IO APICs for now */
814 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
815 (nmi_watchdog != NMI_IO_APIC))
816 return;
817
818 if (wd->enabled == 0)
819 return;
820
821 if (nmi_watchdog == NMI_LOCAL_APIC) {
822 switch (boot_cpu_data.x86_vendor) {
823 case X86_VENDOR_AMD:
824 stop_k7_watchdog();
825 break;
826 case X86_VENDOR_INTEL:
827 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
828 stop_intel_arch_watchdog();
829 break;
830 }
831 switch (boot_cpu_data.x86) {
832 case 6:
833 if (boot_cpu_data.x86_model > 0xd)
834 break;
835 stop_p6_watchdog();
836 break;
837 case 15:
838 if (boot_cpu_data.x86_model > 0x4)
839 break;
840 stop_p4_watchdog();
841 break;
842 }
843 break;
844 default:
845 return;
846 }
847 }
848 wd->enabled = 0;
849 atomic_dec(&nmi_active);
850 }
851
852 /*
853 * the best way to detect whether a CPU has a 'hard lockup' problem
854 * is to check it's local APIC timer IRQ counts. If they are not
855 * changing then that CPU has some problem.
856 *
857 * as these watchdog NMI IRQs are generated on every CPU, we only
858 * have to check the current processor.
859 *
860 * since NMIs don't listen to _any_ locks, we have to be extremely
861 * careful not to rely on unsafe variables. The printk might lock
862 * up though, so we have to break up any console locks first ...
863 * [when there will be more tty-related locks, break them up
864 * here too!]
865 */
866
867 static unsigned int
868 last_irq_sums [NR_CPUS],
869 alert_counter [NR_CPUS];
870
871 void touch_nmi_watchdog (void)
872 {
873 int i;
874
875 /*
876 * Just reset the alert counters, (other CPUs might be
877 * spinning on locks we hold):
878 */
879 for_each_possible_cpu(i)
880 alert_counter[i] = 0;
881
882 /*
883 * Tickle the softlockup detector too:
884 */
885 touch_softlockup_watchdog();
886 }
887 EXPORT_SYMBOL(touch_nmi_watchdog);
888
889 extern void die_nmi(struct pt_regs *, const char *msg);
890
891 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
892 {
893
894 /*
895 * Since current_thread_info()-> is always on the stack, and we
896 * always switch the stack NMI-atomically, it's safe to use
897 * smp_processor_id().
898 */
899 unsigned int sum;
900 int touched = 0;
901 int cpu = smp_processor_id();
902 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
903 u64 dummy;
904 int rc=0;
905
906 /* check for other users first */
907 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
908 == NOTIFY_STOP) {
909 rc = 1;
910 touched = 1;
911 }
912
913 if (cpu_isset(cpu, backtrace_mask)) {
914 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
915
916 spin_lock(&lock);
917 printk("NMI backtrace for cpu %d\n", cpu);
918 dump_stack();
919 spin_unlock(&lock);
920 cpu_clear(cpu, backtrace_mask);
921 }
922
923 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
924
925 /* if the apic timer isn't firing, this cpu isn't doing much */
926 if (!touched && last_irq_sums[cpu] == sum) {
927 /*
928 * Ayiee, looks like this CPU is stuck ...
929 * wait a few IRQs (5 seconds) before doing the oops ...
930 */
931 alert_counter[cpu]++;
932 if (alert_counter[cpu] == 5*nmi_hz)
933 /*
934 * die_nmi will return ONLY if NOTIFY_STOP happens..
935 */
936 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
937 } else {
938 last_irq_sums[cpu] = sum;
939 alert_counter[cpu] = 0;
940 }
941 /* see if the nmi watchdog went off */
942 if (wd->enabled) {
943 if (nmi_watchdog == NMI_LOCAL_APIC) {
944 rdmsrl(wd->perfctr_msr, dummy);
945 if (dummy & wd->check_bit){
946 /* this wasn't a watchdog timer interrupt */
947 goto done;
948 }
949
950 /* only Intel P4 uses the cccr msr */
951 if (wd->cccr_msr != 0) {
952 /*
953 * P4 quirks:
954 * - An overflown perfctr will assert its interrupt
955 * until the OVF flag in its CCCR is cleared.
956 * - LVTPC is masked on interrupt and must be
957 * unmasked by the LVTPC handler.
958 */
959 rdmsrl(wd->cccr_msr, dummy);
960 dummy &= ~P4_CCCR_OVF;
961 wrmsrl(wd->cccr_msr, dummy);
962 apic_write(APIC_LVTPC, APIC_DM_NMI);
963 }
964 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
965 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
966 /* P6 based Pentium M need to re-unmask
967 * the apic vector but it doesn't hurt
968 * other P6 variant.
969 * ArchPerfom/Core Duo also needs this */
970 apic_write(APIC_LVTPC, APIC_DM_NMI);
971 }
972 /* start the cycle over again */
973 write_watchdog_counter(wd->perfctr_msr, NULL);
974 rc = 1;
975 } else if (nmi_watchdog == NMI_IO_APIC) {
976 /* don't know how to accurately check for this.
977 * just assume it was a watchdog timer interrupt
978 * This matches the old behaviour.
979 */
980 rc = 1;
981 }
982 }
983 done:
984 return rc;
985 }
986
987 int do_nmi_callback(struct pt_regs * regs, int cpu)
988 {
989 #ifdef CONFIG_SYSCTL
990 if (unknown_nmi_panic)
991 return unknown_nmi_panic_callback(regs, cpu);
992 #endif
993 return 0;
994 }
995
996 #ifdef CONFIG_SYSCTL
997
998 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
999 {
1000 unsigned char reason = get_nmi_reason();
1001 char buf[64];
1002
1003 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
1004 die_nmi(regs, buf);
1005 return 0;
1006 }
1007
1008 /*
1009 * proc handler for /proc/sys/kernel/nmi
1010 */
1011 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1012 void __user *buffer, size_t *length, loff_t *ppos)
1013 {
1014 int old_state;
1015
1016 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1017 old_state = nmi_watchdog_enabled;
1018 proc_dointvec(table, write, file, buffer, length, ppos);
1019 if (!!old_state == !!nmi_watchdog_enabled)
1020 return 0;
1021
1022 if (atomic_read(&nmi_active) < 0) {
1023 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1024 return -EIO;
1025 }
1026
1027 if (nmi_watchdog == NMI_DEFAULT) {
1028 if (nmi_known_cpu() > 0)
1029 nmi_watchdog = NMI_LOCAL_APIC;
1030 else
1031 nmi_watchdog = NMI_IO_APIC;
1032 }
1033
1034 if (nmi_watchdog == NMI_LOCAL_APIC) {
1035 if (nmi_watchdog_enabled)
1036 enable_lapic_nmi_watchdog();
1037 else
1038 disable_lapic_nmi_watchdog();
1039 } else {
1040 printk( KERN_WARNING
1041 "NMI watchdog doesn't know what hardware to touch\n");
1042 return -EIO;
1043 }
1044 return 0;
1045 }
1046
1047 #endif
1048
1049 void __trigger_all_cpu_backtrace(void)
1050 {
1051 int i;
1052
1053 backtrace_mask = cpu_online_map;
1054 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1055 for (i = 0; i < 10 * 1000; i++) {
1056 if (cpus_empty(backtrace_mask))
1057 break;
1058 mdelay(1);
1059 }
1060 }
1061
1062 EXPORT_SYMBOL(nmi_active);
1063 EXPORT_SYMBOL(nmi_watchdog);
1064 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1065 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1066 EXPORT_SYMBOL(reserve_perfctr_nmi);
1067 EXPORT_SYMBOL(release_perfctr_nmi);
1068 EXPORT_SYMBOL(reserve_evntsel_nmi);
1069 EXPORT_SYMBOL(release_evntsel_nmi);
1070 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1071 EXPORT_SYMBOL(enable_timer_nmi_watchdog);
This page took 0.054337 seconds and 5 git commands to generate.