x86, mce: add MSR read wrappers for easier error injection
[deliverable/linux.git] / arch / x86 / kernel / cpu / mcheck / mce.c
CommitLineData
1da177e4
LT
1/*
2 * Machine check handler.
e9eee03e 3 *
1da177e4 4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
d88203d1
TG
5 * Rest from unknown author(s).
6 * 2004 Andi Kleen. Rewrote most of it.
b79109c3
AK
7 * Copyright 2008 Intel Corporation
8 * Author: Andi Kleen
1da177e4 9 */
e9eee03e
IM
10#include <linux/thread_info.h>
11#include <linux/capability.h>
12#include <linux/miscdevice.h>
13#include <linux/ratelimit.h>
14#include <linux/kallsyms.h>
15#include <linux/rcupdate.h>
38c4c97c 16#include <linux/smp_lock.h>
e9eee03e
IM
17#include <linux/kobject.h>
18#include <linux/kdebug.h>
19#include <linux/kernel.h>
20#include <linux/percpu.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/sysdev.h>
8c566ef5 23#include <linux/ctype.h>
e9eee03e 24#include <linux/sched.h>
0d7482e3 25#include <linux/sysfs.h>
e9eee03e
IM
26#include <linux/types.h>
27#include <linux/init.h>
28#include <linux/kmod.h>
29#include <linux/poll.h>
30#include <linux/cpu.h>
31#include <linux/fs.h>
32
d88203d1 33#include <asm/processor.h>
1da177e4 34#include <asm/uaccess.h>
e02e68d3 35#include <asm/idle.h>
e9eee03e
IM
36#include <asm/mce.h>
37#include <asm/msr.h>
38#include <asm/smp.h>
1da177e4 39
711c2e48
IM
40#include "mce.h"
41
5d727926
AK
42/* Handle unconfigured int18 (should never happen) */
43static void unexpected_machine_check(struct pt_regs *regs, long error_code)
44{
45 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
46 smp_processor_id());
47}
48
49/* Call the installed machine check handler for this CPU setup. */
50void (*machine_check_vector)(struct pt_regs *, long error_code) =
51 unexpected_machine_check;
04b2b1a4
AK
52
53int mce_disabled;
54
4efc0670 55#ifdef CONFIG_X86_NEW_MCE
711c2e48 56
e9eee03e 57#define MISC_MCELOG_MINOR 227
0d7482e3 58
553f265f
AK
59atomic_t mce_entry;
60
bd78432c
TH
61/*
62 * Tolerant levels:
63 * 0: always panic on uncorrected errors, log corrected errors
64 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
65 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
66 * 3: never panic or SIGBUS, log all errors (for testing only)
67 */
e9eee03e
IM
68static int tolerant = 1;
69static int banks;
70static u64 *bank;
71static unsigned long notify_user;
72static int rip_msr;
73static int mce_bootlog = -1;
74static atomic_t mce_events;
a98f0dd3 75
e9eee03e
IM
76static char trigger[128];
77static char *trigger_argv[2] = { trigger, NULL };
1da177e4 78
06b7a7a5
AK
79static unsigned long dont_init_banks;
80
e02e68d3
TH
81static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
82
ee031c31
AK
83/* MCA banks polled by the period polling timer for corrected events */
84DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
85 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
86};
87
06b7a7a5
AK
88static inline int skip_bank_init(int i)
89{
90 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
91}
92
b5f2fa4e
AK
93/* Do initial initialization of a struct mce */
94void mce_setup(struct mce *m)
95{
96 memset(m, 0, sizeof(struct mce));
97 m->cpu = smp_processor_id();
98 rdtscll(m->tsc);
99}
100
1da177e4
LT
101/*
102 * Lockless MCE logging infrastructure.
103 * This avoids deadlocks on printk locks without having to break locks. Also
104 * separate MCEs from kernel messages to avoid bogus bug reports.
105 */
106
231fd906 107static struct mce_log mcelog = {
1da177e4
LT
108 MCE_LOG_SIGNATURE,
109 MCE_LOG_LEN,
d88203d1 110};
1da177e4
LT
111
112void mce_log(struct mce *mce)
113{
114 unsigned next, entry;
e9eee03e 115
a98f0dd3 116 atomic_inc(&mce_events);
1da177e4 117 mce->finished = 0;
7644143c 118 wmb();
1da177e4
LT
119 for (;;) {
120 entry = rcu_dereference(mcelog.next);
673242c1 121 for (;;) {
e9eee03e
IM
122 /*
123 * When the buffer fills up discard new entries.
124 * Assume that the earlier errors are the more
125 * interesting ones:
126 */
673242c1 127 if (entry >= MCE_LOG_LEN) {
53756d37 128 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
673242c1
AK
129 return;
130 }
e9eee03e 131 /* Old left over entry. Skip: */
673242c1
AK
132 if (mcelog.entry[entry].finished) {
133 entry++;
134 continue;
135 }
7644143c 136 break;
1da177e4 137 }
1da177e4
LT
138 smp_rmb();
139 next = entry + 1;
140 if (cmpxchg(&mcelog.next, entry, next) == entry)
141 break;
142 }
143 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
7644143c 144 wmb();
1da177e4 145 mcelog.entry[entry].finished = 1;
7644143c 146 wmb();
1da177e4 147
e02e68d3 148 set_bit(0, &notify_user);
1da177e4
LT
149}
150
151static void print_mce(struct mce *m)
152{
153 printk(KERN_EMERG "\n"
4855170f 154 KERN_EMERG "HARDWARE ERROR\n"
1da177e4
LT
155 KERN_EMERG
156 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
157 m->cpu, m->mcgstatus, m->bank, m->status);
65ea5b03 158 if (m->ip) {
d88203d1 159 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
1da177e4 160 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
65ea5b03 161 m->cs, m->ip);
1da177e4 162 if (m->cs == __KERNEL_CS)
65ea5b03 163 print_symbol("{%s}", m->ip);
1da177e4
LT
164 printk("\n");
165 }
f6d1826d 166 printk(KERN_EMERG "TSC %llx ", m->tsc);
1da177e4 167 if (m->addr)
f6d1826d 168 printk("ADDR %llx ", m->addr);
1da177e4 169 if (m->misc)
f6d1826d 170 printk("MISC %llx ", m->misc);
1da177e4 171 printk("\n");
4855170f 172 printk(KERN_EMERG "This is not a software problem!\n");
d88203d1
TG
173 printk(KERN_EMERG "Run through mcelog --ascii to decode "
174 "and contact your hardware vendor\n");
1da177e4
LT
175}
176
3cde5c8c 177static void mce_panic(char *msg, struct mce *backup, u64 start)
d88203d1 178{
1da177e4 179 int i;
e02e68d3 180
d896a940
AK
181 bust_spinlocks(1);
182 console_verbose();
1da177e4 183 for (i = 0; i < MCE_LOG_LEN; i++) {
3cde5c8c 184 u64 tsc = mcelog.entry[i].tsc;
d88203d1 185
3cde5c8c 186 if ((s64)(tsc - start) < 0)
1da177e4 187 continue;
d88203d1 188 print_mce(&mcelog.entry[i]);
1da177e4
LT
189 if (backup && mcelog.entry[i].tsc == backup->tsc)
190 backup = NULL;
191 }
192 if (backup)
193 print_mce(backup);
e02e68d3 194 panic(msg);
d88203d1 195}
1da177e4 196
5f8c1a54
AK
197/* MSR access wrappers used for error injection */
198static u64 mce_rdmsrl(u32 msr)
199{
200 u64 v;
201 rdmsrl(msr, v);
202 return v;
203}
204
205static void mce_wrmsrl(u32 msr, u64 v)
206{
207 wrmsrl(msr, v);
208}
209
88ccbedd 210int mce_available(struct cpuinfo_x86 *c)
1da177e4 211{
04b2b1a4 212 if (mce_disabled)
5b4408fd 213 return 0;
3d1712c9 214 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
1da177e4
LT
215}
216
94ad8474
AK
217static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
218{
219 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
65ea5b03 220 m->ip = regs->ip;
94ad8474
AK
221 m->cs = regs->cs;
222 } else {
65ea5b03 223 m->ip = 0;
94ad8474
AK
224 m->cs = 0;
225 }
226 if (rip_msr) {
227 /* Assume the RIP in the MSR is exact. Is this true? */
228 m->mcgstatus |= MCG_STATUS_EIPV;
5f8c1a54 229 m->ip = mce_rdmsrl(rip_msr);
94ad8474
AK
230 m->cs = 0;
231 }
232}
233
d88203d1 234/*
b79109c3
AK
235 * Poll for corrected events or events that happened before reset.
236 * Those are just logged through /dev/mcelog.
237 *
238 * This is executed in standard interrupt context.
239 */
ee031c31 240void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
b79109c3
AK
241{
242 struct mce m;
243 int i;
244
245 mce_setup(&m);
246
5f8c1a54 247 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
b79109c3 248 for (i = 0; i < banks; i++) {
ee031c31 249 if (!bank[i] || !test_bit(i, *b))
b79109c3
AK
250 continue;
251
252 m.misc = 0;
253 m.addr = 0;
254 m.bank = i;
255 m.tsc = 0;
256
257 barrier();
5f8c1a54 258 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
b79109c3
AK
259 if (!(m.status & MCI_STATUS_VAL))
260 continue;
261
262 /*
263 * Uncorrected events are handled by the exception handler
264 * when it is enabled. But when the exception is disabled log
265 * everything.
266 *
267 * TBD do the same check for MCI_STATUS_EN here?
268 */
269 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
270 continue;
271
272 if (m.status & MCI_STATUS_MISCV)
5f8c1a54 273 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
b79109c3 274 if (m.status & MCI_STATUS_ADDRV)
5f8c1a54 275 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
b79109c3
AK
276
277 if (!(flags & MCP_TIMESTAMP))
278 m.tsc = 0;
279 /*
280 * Don't get the IP here because it's unlikely to
281 * have anything to do with the actual error location.
282 */
5679af4c
AK
283 if (!(flags & MCP_DONTLOG)) {
284 mce_log(&m);
285 add_taint(TAINT_MACHINE_CHECK);
286 }
b79109c3
AK
287
288 /*
289 * Clear state for this bank.
290 */
5f8c1a54 291 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
b79109c3
AK
292 }
293
294 /*
295 * Don't clear MCG_STATUS here because it's only defined for
296 * exceptions.
297 */
298}
299
300/*
301 * The actual machine check handler. This only handles real
302 * exceptions when something got corrupted coming in through int 18.
303 *
304 * This is executed in NMI context not subject to normal locking rules. This
305 * implies that most kernel services cannot be safely used. Don't even
306 * think about putting a printk in there!
1da177e4 307 */
e9eee03e 308void do_machine_check(struct pt_regs *regs, long error_code)
1da177e4
LT
309{
310 struct mce m, panicm;
e9eee03e 311 int panicm_found = 0;
1da177e4
LT
312 u64 mcestart = 0;
313 int i;
bd78432c
TH
314 /*
315 * If no_way_out gets set, there is no safe way to recover from this
316 * MCE. If tolerant is cranked up, we'll try anyway.
317 */
318 int no_way_out = 0;
319 /*
320 * If kill_it gets set, there might be a way to recover from this
321 * error.
322 */
323 int kill_it = 0;
b79109c3 324 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1da177e4 325
553f265f
AK
326 atomic_inc(&mce_entry);
327
b79109c3 328 if (notify_die(DIE_NMI, "machine check", regs, error_code,
22f5991c 329 18, SIGKILL) == NOTIFY_STOP)
b79109c3
AK
330 goto out2;
331 if (!banks)
553f265f 332 goto out2;
1da177e4 333
b5f2fa4e
AK
334 mce_setup(&m);
335
5f8c1a54 336 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
e9eee03e 337
bd78432c 338 /* if the restart IP is not valid, we're done for */
1da177e4 339 if (!(m.mcgstatus & MCG_STATUS_RIPV))
bd78432c 340 no_way_out = 1;
d88203d1 341
1da177e4
LT
342 rdtscll(mcestart);
343 barrier();
344
345 for (i = 0; i < banks; i++) {
b79109c3 346 __clear_bit(i, toclear);
0d7482e3 347 if (!bank[i])
1da177e4 348 continue;
d88203d1
TG
349
350 m.misc = 0;
1da177e4
LT
351 m.addr = 0;
352 m.bank = i;
1da177e4 353
5f8c1a54 354 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
1da177e4
LT
355 if ((m.status & MCI_STATUS_VAL) == 0)
356 continue;
357
b79109c3
AK
358 /*
359 * Non uncorrected errors are handled by machine_check_poll
360 * Leave them alone.
361 */
362 if ((m.status & MCI_STATUS_UC) == 0)
363 continue;
364
365 /*
366 * Set taint even when machine check was not enabled.
367 */
368 add_taint(TAINT_MACHINE_CHECK);
369
370 __set_bit(i, toclear);
371
1da177e4 372 if (m.status & MCI_STATUS_EN) {
bd78432c
TH
373 /* if PCC was set, there's no way out */
374 no_way_out |= !!(m.status & MCI_STATUS_PCC);
375 /*
376 * If this error was uncorrectable and there was
377 * an overflow, we're in trouble. If no overflow,
378 * we might get away with just killing a task.
379 */
380 if (m.status & MCI_STATUS_UC) {
381 if (tolerant < 1 || m.status & MCI_STATUS_OVER)
382 no_way_out = 1;
383 kill_it = 1;
384 }
b79109c3
AK
385 } else {
386 /*
387 * Machine check event was not enabled. Clear, but
388 * ignore.
389 */
390 continue;
1da177e4
LT
391 }
392
393 if (m.status & MCI_STATUS_MISCV)
5f8c1a54 394 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
1da177e4 395 if (m.status & MCI_STATUS_ADDRV)
5f8c1a54 396 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
1da177e4 397
94ad8474 398 mce_get_rip(&m, regs);
b79109c3 399 mce_log(&m);
1da177e4 400
e9eee03e
IM
401 /*
402 * Did this bank cause the exception?
403 *
404 * Assume that the bank with uncorrectable errors did it,
405 * and that there is only a single one:
406 */
407 if ((m.status & MCI_STATUS_UC) &&
408 (m.status & MCI_STATUS_EN)) {
1da177e4
LT
409 panicm = m;
410 panicm_found = 1;
411 }
1da177e4
LT
412 }
413
e9eee03e
IM
414 /*
415 * If we didn't find an uncorrectable error, pick
416 * the last one (shouldn't happen, just being safe).
417 */
1da177e4
LT
418 if (!panicm_found)
419 panicm = m;
bd78432c
TH
420
421 /*
422 * If we have decided that we just CAN'T continue, and the user
e9eee03e 423 * has not set tolerant to an insane level, give up and die.
bd78432c
TH
424 */
425 if (no_way_out && tolerant < 3)
1da177e4 426 mce_panic("Machine check", &panicm, mcestart);
bd78432c
TH
427
428 /*
429 * If the error seems to be unrecoverable, something should be
430 * done. Try to kill as little as possible. If we can kill just
431 * one task, do that. If the user has set the tolerance very
432 * high, don't try to do anything at all.
433 */
434 if (kill_it && tolerant < 3) {
1da177e4
LT
435 int user_space = 0;
436
bd78432c
TH
437 /*
438 * If the EIPV bit is set, it means the saved IP is the
439 * instruction which caused the MCE.
440 */
441 if (m.mcgstatus & MCG_STATUS_EIPV)
65ea5b03 442 user_space = panicm.ip && (panicm.cs & 3);
bd78432c
TH
443
444 /*
445 * If we know that the error was in user space, send a
446 * SIGBUS. Otherwise, panic if tolerance is low.
447 *
380851bc 448 * force_sig() takes an awful lot of locks and has a slight
bd78432c
TH
449 * risk of deadlocking.
450 */
451 if (user_space) {
380851bc 452 force_sig(SIGBUS, current);
bd78432c
TH
453 } else if (panic_on_oops || tolerant < 2) {
454 mce_panic("Uncorrected machine check",
455 &panicm, mcestart);
456 }
1da177e4
LT
457 }
458
e02e68d3
TH
459 /* notify userspace ASAP */
460 set_thread_flag(TIF_MCE_NOTIFY);
461
bd78432c 462 /* the last thing we do is clear state */
b79109c3
AK
463 for (i = 0; i < banks; i++) {
464 if (test_bit(i, toclear))
5f8c1a54 465 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
b79109c3 466 }
5f8c1a54 467 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
553f265f
AK
468 out2:
469 atomic_dec(&mce_entry);
1da177e4
LT
470}
471
15d5f839
DZ
472#ifdef CONFIG_X86_MCE_INTEL
473/***
474 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
676b1855 475 * @cpu: The CPU on which the event occurred.
15d5f839
DZ
476 * @status: Event status information
477 *
478 * This function should be called by the thermal interrupt after the
479 * event has been processed and the decision was made to log the event
480 * further.
481 *
482 * The status parameter will be saved to the 'status' field of 'struct mce'
483 * and historically has been the register value of the
484 * MSR_IA32_THERMAL_STATUS (Intel) msr.
485 */
b5f2fa4e 486void mce_log_therm_throt_event(__u64 status)
15d5f839
DZ
487{
488 struct mce m;
489
b5f2fa4e 490 mce_setup(&m);
15d5f839
DZ
491 m.bank = MCE_THERMAL_BANK;
492 m.status = status;
15d5f839
DZ
493 mce_log(&m);
494}
495#endif /* CONFIG_X86_MCE_INTEL */
496
1da177e4 497/*
8a336b0a
TH
498 * Periodic polling timer for "silent" machine check errors. If the
499 * poller finds an MCE, poll 2x faster. When the poller finds no more
500 * errors, poll 2x slower (up to check_interval seconds).
1da177e4 501 */
1da177e4 502static int check_interval = 5 * 60; /* 5 minutes */
e9eee03e 503
6298c512 504static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
52d168e2 505static DEFINE_PER_CPU(struct timer_list, mce_timer);
1da177e4 506
52d168e2 507static void mcheck_timer(unsigned long data)
1da177e4 508{
52d168e2 509 struct timer_list *t = &per_cpu(mce_timer, data);
6298c512 510 int *n;
52d168e2
AK
511
512 WARN_ON(smp_processor_id() != data);
513
e9eee03e 514 if (mce_available(&current_cpu_data)) {
ee031c31
AK
515 machine_check_poll(MCP_TIMESTAMP,
516 &__get_cpu_var(mce_poll_banks));
e9eee03e 517 }
1da177e4
LT
518
519 /*
e02e68d3
TH
520 * Alert userspace if needed. If we logged an MCE, reduce the
521 * polling interval, otherwise increase the polling interval.
1da177e4 522 */
6298c512 523 n = &__get_cpu_var(next_interval);
e02e68d3 524 if (mce_notify_user()) {
6298c512 525 *n = max(*n/2, HZ/100);
e02e68d3 526 } else {
6298c512 527 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
e02e68d3
TH
528 }
529
6298c512 530 t->expires = jiffies + *n;
52d168e2 531 add_timer(t);
e02e68d3
TH
532}
533
9bd98405
AK
534static void mce_do_trigger(struct work_struct *work)
535{
536 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
537}
538
539static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
540
e02e68d3 541/*
9bd98405
AK
542 * Notify the user(s) about new machine check events.
543 * Can be called from interrupt context, but not from machine check/NMI
544 * context.
e02e68d3
TH
545 */
546int mce_notify_user(void)
547{
8457c84d
AK
548 /* Not more than two messages every minute */
549 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
550
e02e68d3 551 clear_thread_flag(TIF_MCE_NOTIFY);
e9eee03e 552
e02e68d3 553 if (test_and_clear_bit(0, &notify_user)) {
e02e68d3 554 wake_up_interruptible(&mce_wait);
9bd98405
AK
555
556 /*
557 * There is no risk of missing notifications because
558 * work_pending is always cleared before the function is
559 * executed.
560 */
561 if (trigger[0] && !work_pending(&mce_trigger_work))
562 schedule_work(&mce_trigger_work);
e02e68d3 563
8457c84d 564 if (__ratelimit(&ratelimit))
8a336b0a 565 printk(KERN_INFO "Machine check events logged\n");
e02e68d3
TH
566
567 return 1;
1da177e4 568 }
e02e68d3
TH
569 return 0;
570}
8a336b0a 571
d88203d1 572/*
1da177e4
LT
573 * Initialize Machine Checks for a CPU.
574 */
0d7482e3 575static int mce_cap_init(void)
1da177e4 576{
0d7482e3 577 unsigned b;
e9eee03e 578 u64 cap;
1da177e4
LT
579
580 rdmsrl(MSR_IA32_MCG_CAP, cap);
01c6680a
TG
581
582 b = cap & MCG_BANKCNT_MASK;
b659294b
IM
583 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
584
0d7482e3
AK
585 if (b > MAX_NR_BANKS) {
586 printk(KERN_WARNING
587 "MCE: Using only %u machine check banks out of %u\n",
588 MAX_NR_BANKS, b);
589 b = MAX_NR_BANKS;
590 }
591
592 /* Don't support asymmetric configurations today */
593 WARN_ON(banks != 0 && b != banks);
594 banks = b;
595 if (!bank) {
596 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
597 if (!bank)
598 return -ENOMEM;
599 memset(bank, 0xff, banks * sizeof(u64));
1da177e4 600 }
0d7482e3 601
94ad8474 602 /* Use accurate RIP reporting if available. */
01c6680a 603 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
94ad8474 604 rip_msr = MSR_IA32_MCG_EIP;
1da177e4 605
0d7482e3
AK
606 return 0;
607}
608
609static void mce_init(void *dummy)
610{
e9eee03e 611 mce_banks_t all_banks;
0d7482e3
AK
612 u64 cap;
613 int i;
614
b79109c3
AK
615 /*
616 * Log the machine checks left over from the previous reset.
617 */
ee031c31 618 bitmap_fill(all_banks, MAX_NR_BANKS);
5679af4c 619 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
1da177e4
LT
620
621 set_in_cr4(X86_CR4_MCE);
622
0d7482e3 623 rdmsrl(MSR_IA32_MCG_CAP, cap);
1da177e4
LT
624 if (cap & MCG_CTL_P)
625 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
626
627 for (i = 0; i < banks; i++) {
06b7a7a5
AK
628 if (skip_bank_init(i))
629 continue;
0d7482e3 630 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
1da177e4 631 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
d88203d1 632 }
1da177e4
LT
633}
634
635/* Add per CPU specific workarounds here */
ec5b3d32 636static void mce_cpu_quirks(struct cpuinfo_x86 *c)
d88203d1 637{
1da177e4 638 /* This should be disabled by the BIOS, but isn't always */
911f6a7b 639 if (c->x86_vendor == X86_VENDOR_AMD) {
e9eee03e
IM
640 if (c->x86 == 15 && banks > 4) {
641 /*
642 * disable GART TBL walk error reporting, which
643 * trips off incorrectly with the IOMMU & 3ware
644 * & Cerberus:
645 */
0d7482e3 646 clear_bit(10, (unsigned long *)&bank[4]);
e9eee03e
IM
647 }
648 if (c->x86 <= 17 && mce_bootlog < 0) {
649 /*
650 * Lots of broken BIOS around that don't clear them
651 * by default and leave crap in there. Don't log:
652 */
911f6a7b 653 mce_bootlog = 0;
e9eee03e 654 }
2e6f694f
AK
655 /*
656 * Various K7s with broken bank 0 around. Always disable
657 * by default.
658 */
659 if (c->x86 == 6)
660 bank[0] = 0;
1da177e4 661 }
e583538f 662
06b7a7a5
AK
663 if (c->x86_vendor == X86_VENDOR_INTEL) {
664 /*
665 * SDM documents that on family 6 bank 0 should not be written
666 * because it aliases to another special BIOS controlled
667 * register.
668 * But it's not aliased anymore on model 0x1a+
669 * Don't ignore bank 0 completely because there could be a
670 * valid event later, merely don't write CTL0.
671 */
672
673 if (c->x86 == 6 && c->x86_model < 0x1A)
674 __set_bit(0, &dont_init_banks);
675 }
d88203d1 676}
1da177e4 677
4efc0670
AK
678static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
679{
680 if (c->x86 != 5)
681 return;
682 switch (c->x86_vendor) {
683 case X86_VENDOR_INTEL:
684 if (mce_p5_enabled())
685 intel_p5_mcheck_init(c);
686 break;
687 case X86_VENDOR_CENTAUR:
688 winchip_mcheck_init(c);
689 break;
690 }
691}
692
cc3ca220 693static void mce_cpu_features(struct cpuinfo_x86 *c)
1da177e4
LT
694{
695 switch (c->x86_vendor) {
696 case X86_VENDOR_INTEL:
697 mce_intel_feature_init(c);
698 break;
89b831ef
JS
699 case X86_VENDOR_AMD:
700 mce_amd_feature_init(c);
701 break;
1da177e4
LT
702 default:
703 break;
704 }
705}
706
52d168e2
AK
707static void mce_init_timer(void)
708{
709 struct timer_list *t = &__get_cpu_var(mce_timer);
6298c512 710 int *n = &__get_cpu_var(next_interval);
52d168e2 711
6298c512
AK
712 *n = check_interval * HZ;
713 if (!*n)
52d168e2
AK
714 return;
715 setup_timer(t, mcheck_timer, smp_processor_id());
6298c512 716 t->expires = round_jiffies(jiffies + *n);
52d168e2
AK
717 add_timer(t);
718}
719
d88203d1 720/*
1da177e4 721 * Called for each booted CPU to set up machine checks.
e9eee03e 722 * Must be called with preempt off:
1da177e4 723 */
e6982c67 724void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1da177e4 725{
4efc0670
AK
726 if (mce_disabled)
727 return;
728
729 mce_ancient_init(c);
730
5b4408fd 731 if (!mce_available(c))
1da177e4
LT
732 return;
733
0d7482e3 734 if (mce_cap_init() < 0) {
04b2b1a4 735 mce_disabled = 1;
0d7482e3
AK
736 return;
737 }
738 mce_cpu_quirks(c);
739
5d727926
AK
740 machine_check_vector = do_machine_check;
741
1da177e4
LT
742 mce_init(NULL);
743 mce_cpu_features(c);
52d168e2 744 mce_init_timer();
1da177e4
LT
745}
746
747/*
748 * Character device to read and clear the MCE log.
749 */
750
f528e7ba 751static DEFINE_SPINLOCK(mce_state_lock);
e9eee03e
IM
752static int open_count; /* #times opened */
753static int open_exclu; /* already open exclusive? */
f528e7ba
TH
754
755static int mce_open(struct inode *inode, struct file *file)
756{
38c4c97c 757 lock_kernel();
f528e7ba
TH
758 spin_lock(&mce_state_lock);
759
760 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
761 spin_unlock(&mce_state_lock);
38c4c97c 762 unlock_kernel();
e9eee03e 763
f528e7ba
TH
764 return -EBUSY;
765 }
766
767 if (file->f_flags & O_EXCL)
768 open_exclu = 1;
769 open_count++;
770
771 spin_unlock(&mce_state_lock);
38c4c97c 772 unlock_kernel();
f528e7ba 773
bd78432c 774 return nonseekable_open(inode, file);
f528e7ba
TH
775}
776
777static int mce_release(struct inode *inode, struct file *file)
778{
779 spin_lock(&mce_state_lock);
780
781 open_count--;
782 open_exclu = 0;
783
784 spin_unlock(&mce_state_lock);
785
786 return 0;
787}
788
d88203d1
TG
789static void collect_tscs(void *data)
790{
1da177e4 791 unsigned long *cpu_tsc = (unsigned long *)data;
d88203d1 792
1da177e4 793 rdtscll(cpu_tsc[smp_processor_id()]);
d88203d1 794}
1da177e4 795
e9eee03e
IM
796static DEFINE_MUTEX(mce_read_mutex);
797
d88203d1
TG
798static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
799 loff_t *off)
1da177e4 800{
e9eee03e 801 char __user *buf = ubuf;
f0de53bb 802 unsigned long *cpu_tsc;
ef41df43 803 unsigned prev, next;
1da177e4
LT
804 int i, err;
805
6bca67f9 806 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
f0de53bb
AK
807 if (!cpu_tsc)
808 return -ENOMEM;
809
8c8b8859 810 mutex_lock(&mce_read_mutex);
1da177e4
LT
811 next = rcu_dereference(mcelog.next);
812
813 /* Only supports full reads right now */
d88203d1 814 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
8c8b8859 815 mutex_unlock(&mce_read_mutex);
f0de53bb 816 kfree(cpu_tsc);
e9eee03e 817
1da177e4
LT
818 return -EINVAL;
819 }
820
821 err = 0;
ef41df43
HY
822 prev = 0;
823 do {
824 for (i = prev; i < next; i++) {
825 unsigned long start = jiffies;
826
827 while (!mcelog.entry[i].finished) {
828 if (time_after_eq(jiffies, start + 2)) {
829 memset(mcelog.entry + i, 0,
830 sizeof(struct mce));
831 goto timeout;
832 }
833 cpu_relax();
673242c1 834 }
ef41df43
HY
835 smp_rmb();
836 err |= copy_to_user(buf, mcelog.entry + i,
837 sizeof(struct mce));
838 buf += sizeof(struct mce);
839timeout:
840 ;
673242c1 841 }
1da177e4 842
ef41df43
HY
843 memset(mcelog.entry + prev, 0,
844 (next - prev) * sizeof(struct mce));
845 prev = next;
846 next = cmpxchg(&mcelog.next, prev, 0);
847 } while (next != prev);
1da177e4 848
b2b18660 849 synchronize_sched();
1da177e4 850
d88203d1
TG
851 /*
852 * Collect entries that were still getting written before the
853 * synchronize.
854 */
15c8b6c1 855 on_each_cpu(collect_tscs, cpu_tsc, 1);
e9eee03e 856
d88203d1
TG
857 for (i = next; i < MCE_LOG_LEN; i++) {
858 if (mcelog.entry[i].finished &&
859 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
860 err |= copy_to_user(buf, mcelog.entry+i,
861 sizeof(struct mce));
1da177e4
LT
862 smp_rmb();
863 buf += sizeof(struct mce);
864 memset(&mcelog.entry[i], 0, sizeof(struct mce));
865 }
d88203d1 866 }
8c8b8859 867 mutex_unlock(&mce_read_mutex);
f0de53bb 868 kfree(cpu_tsc);
e9eee03e 869
d88203d1 870 return err ? -EFAULT : buf - ubuf;
1da177e4
LT
871}
872
e02e68d3
TH
873static unsigned int mce_poll(struct file *file, poll_table *wait)
874{
875 poll_wait(file, &mce_wait, wait);
876 if (rcu_dereference(mcelog.next))
877 return POLLIN | POLLRDNORM;
878 return 0;
879}
880
c68461b6 881static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1da177e4
LT
882{
883 int __user *p = (int __user *)arg;
d88203d1 884
1da177e4 885 if (!capable(CAP_SYS_ADMIN))
d88203d1 886 return -EPERM;
e9eee03e 887
1da177e4 888 switch (cmd) {
d88203d1 889 case MCE_GET_RECORD_LEN:
1da177e4
LT
890 return put_user(sizeof(struct mce), p);
891 case MCE_GET_LOG_LEN:
d88203d1 892 return put_user(MCE_LOG_LEN, p);
1da177e4
LT
893 case MCE_GETCLEAR_FLAGS: {
894 unsigned flags;
d88203d1
TG
895
896 do {
1da177e4 897 flags = mcelog.flags;
d88203d1 898 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
e9eee03e 899
d88203d1 900 return put_user(flags, p);
1da177e4
LT
901 }
902 default:
d88203d1
TG
903 return -ENOTTY;
904 }
1da177e4
LT
905}
906
5dfe4c96 907static const struct file_operations mce_chrdev_ops = {
e9eee03e
IM
908 .open = mce_open,
909 .release = mce_release,
910 .read = mce_read,
911 .poll = mce_poll,
912 .unlocked_ioctl = mce_ioctl,
1da177e4
LT
913};
914
915static struct miscdevice mce_log_device = {
916 MISC_MCELOG_MINOR,
917 "mcelog",
918 &mce_chrdev_ops,
919};
920
13503fa9
HS
921/*
922 * mce=off disables machine check
923 * mce=TOLERANCELEVEL (number, see above)
924 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
925 * mce=nobootlog Don't log MCEs from before booting.
926 */
1da177e4
LT
927static int __init mcheck_enable(char *str)
928{
4efc0670
AK
929 if (*str == 0)
930 enable_p5_mce();
931 if (*str == '=')
932 str++;
1da177e4 933 if (!strcmp(str, "off"))
04b2b1a4 934 mce_disabled = 1;
13503fa9
HS
935 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
936 mce_bootlog = (str[0] == 'b');
8c566ef5
AK
937 else if (isdigit(str[0]))
938 get_option(&str, &tolerant);
13503fa9 939 else {
4efc0670 940 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
13503fa9
HS
941 str);
942 return 0;
943 }
9b41046c 944 return 1;
1da177e4 945}
4efc0670 946__setup("mce", mcheck_enable);
1da177e4 947
d88203d1 948/*
1da177e4 949 * Sysfs support
d88203d1 950 */
1da177e4 951
973a2dd1
AK
952/*
953 * Disable machine checks on suspend and shutdown. We can't really handle
954 * them later.
955 */
956static int mce_disable(void)
957{
958 int i;
959
06b7a7a5
AK
960 for (i = 0; i < banks; i++) {
961 if (!skip_bank_init(i))
962 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
963 }
973a2dd1
AK
964 return 0;
965}
966
967static int mce_suspend(struct sys_device *dev, pm_message_t state)
968{
969 return mce_disable();
970}
971
972static int mce_shutdown(struct sys_device *dev)
973{
974 return mce_disable();
975}
976
e9eee03e
IM
977/*
978 * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
979 * Only one CPU is active at this time, the others get re-added later using
980 * CPU hotplug:
981 */
1da177e4
LT
982static int mce_resume(struct sys_device *dev)
983{
413588c7 984 mce_init(NULL);
6ec68bff 985 mce_cpu_features(&current_cpu_data);
e9eee03e 986
1da177e4
LT
987 return 0;
988}
989
52d168e2
AK
990static void mce_cpu_restart(void *data)
991{
992 del_timer_sync(&__get_cpu_var(mce_timer));
993 if (mce_available(&current_cpu_data))
994 mce_init(NULL);
995 mce_init_timer();
996}
997
1da177e4 998/* Reinit MCEs after user configuration changes */
d88203d1
TG
999static void mce_restart(void)
1000{
52d168e2 1001 on_each_cpu(mce_cpu_restart, NULL, 1);
1da177e4
LT
1002}
1003
1004static struct sysdev_class mce_sysclass = {
e9eee03e
IM
1005 .suspend = mce_suspend,
1006 .shutdown = mce_shutdown,
1007 .resume = mce_resume,
1008 .name = "machinecheck",
1da177e4
LT
1009};
1010
cb491fca 1011DEFINE_PER_CPU(struct sys_device, mce_dev);
e9eee03e
IM
1012
1013__cpuinitdata
1014void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1da177e4
LT
1015
1016/* Why are there no generic functions for this? */
1017#define ACCESSOR(name, var, start) \
4a0b2b4d
AK
1018 static ssize_t show_ ## name(struct sys_device *s, \
1019 struct sysdev_attribute *attr, \
1020 char *buf) { \
3cde5c8c 1021 return sprintf(buf, "%Lx\n", (u64)var); \
d88203d1 1022 } \
4a0b2b4d
AK
1023 static ssize_t set_ ## name(struct sys_device *s, \
1024 struct sysdev_attribute *attr, \
1025 const char *buf, size_t siz) { \
d88203d1 1026 char *end; \
3cde5c8c 1027 u64 new = simple_strtoull(buf, &end, 0); \
e9eee03e
IM
1028 \
1029 if (end == buf) \
1030 return -EINVAL; \
d88203d1
TG
1031 var = new; \
1032 start; \
e9eee03e 1033 \
d88203d1
TG
1034 return end-buf; \
1035 } \
1da177e4
LT
1036 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
1037
0d7482e3
AK
1038static struct sysdev_attribute *bank_attrs;
1039
1040static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1041 char *buf)
1042{
1043 u64 b = bank[attr - bank_attrs];
e9eee03e 1044
f6d1826d 1045 return sprintf(buf, "%llx\n", b);
0d7482e3
AK
1046}
1047
1048static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1049 const char *buf, size_t siz)
1050{
1051 char *end;
1052 u64 new = simple_strtoull(buf, &end, 0);
e9eee03e 1053
0d7482e3
AK
1054 if (end == buf)
1055 return -EINVAL;
e9eee03e 1056
0d7482e3
AK
1057 bank[attr - bank_attrs] = new;
1058 mce_restart();
e9eee03e 1059
0d7482e3
AK
1060 return end-buf;
1061}
a98f0dd3 1062
e9eee03e
IM
1063static ssize_t
1064show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
a98f0dd3
AK
1065{
1066 strcpy(buf, trigger);
1067 strcat(buf, "\n");
1068 return strlen(trigger) + 1;
1069}
1070
4a0b2b4d 1071static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
e9eee03e 1072 const char *buf, size_t siz)
a98f0dd3
AK
1073{
1074 char *p;
1075 int len;
e9eee03e 1076
a98f0dd3
AK
1077 strncpy(trigger, buf, sizeof(trigger));
1078 trigger[sizeof(trigger)-1] = 0;
1079 len = strlen(trigger);
1080 p = strchr(trigger, '\n');
e9eee03e
IM
1081
1082 if (*p)
1083 *p = 0;
1084
a98f0dd3
AK
1085 return len;
1086}
1087
1088static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
d95d62c0 1089static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
e9eee03e
IM
1090
1091ACCESSOR(check_interval, check_interval, mce_restart())
1092
cb491fca 1093static struct sysdev_attribute *mce_attrs[] = {
d95d62c0 1094 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
a98f0dd3
AK
1095 NULL
1096};
1da177e4 1097
cb491fca 1098static cpumask_var_t mce_dev_initialized;
bae19fe0 1099
e9eee03e 1100/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
91c6d400 1101static __cpuinit int mce_create_device(unsigned int cpu)
1da177e4
LT
1102{
1103 int err;
73ca5358 1104 int i;
92cb7612 1105
90367556 1106 if (!mce_available(&boot_cpu_data))
91c6d400
AK
1107 return -EIO;
1108
cb491fca
IM
1109 memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
1110 per_cpu(mce_dev, cpu).id = cpu;
1111 per_cpu(mce_dev, cpu).cls = &mce_sysclass;
91c6d400 1112
cb491fca 1113 err = sysdev_register(&per_cpu(mce_dev, cpu));
d435d862
AM
1114 if (err)
1115 return err;
1116
cb491fca
IM
1117 for (i = 0; mce_attrs[i]; i++) {
1118 err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
d435d862
AM
1119 if (err)
1120 goto error;
1121 }
0d7482e3 1122 for (i = 0; i < banks; i++) {
cb491fca 1123 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
0d7482e3
AK
1124 &bank_attrs[i]);
1125 if (err)
1126 goto error2;
1127 }
cb491fca 1128 cpumask_set_cpu(cpu, mce_dev_initialized);
91c6d400 1129
d435d862 1130 return 0;
0d7482e3 1131error2:
cb491fca
IM
1132 while (--i >= 0)
1133 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
d435d862 1134error:
cb491fca
IM
1135 while (--i >= 0)
1136 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1137
1138 sysdev_unregister(&per_cpu(mce_dev, cpu));
d435d862 1139
91c6d400
AK
1140 return err;
1141}
1142
2d9cd6c2 1143static __cpuinit void mce_remove_device(unsigned int cpu)
91c6d400 1144{
73ca5358
SL
1145 int i;
1146
cb491fca 1147 if (!cpumask_test_cpu(cpu, mce_dev_initialized))
bae19fe0
AH
1148 return;
1149
cb491fca
IM
1150 for (i = 0; mce_attrs[i]; i++)
1151 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1152
0d7482e3 1153 for (i = 0; i < banks; i++)
cb491fca
IM
1154 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
1155
1156 sysdev_unregister(&per_cpu(mce_dev, cpu));
1157 cpumask_clear_cpu(cpu, mce_dev_initialized);
91c6d400 1158}
91c6d400 1159
d6b75584 1160/* Make sure there are no machine checks on offlined CPUs. */
ec5b3d32 1161static void mce_disable_cpu(void *h)
d6b75584 1162{
88ccbedd 1163 unsigned long action = *(unsigned long *)h;
cb491fca 1164 int i;
d6b75584
AK
1165
1166 if (!mce_available(&current_cpu_data))
1167 return;
88ccbedd
AK
1168 if (!(action & CPU_TASKS_FROZEN))
1169 cmci_clear();
06b7a7a5
AK
1170 for (i = 0; i < banks; i++) {
1171 if (!skip_bank_init(i))
1172 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1173 }
d6b75584
AK
1174}
1175
ec5b3d32 1176static void mce_reenable_cpu(void *h)
d6b75584 1177{
88ccbedd 1178 unsigned long action = *(unsigned long *)h;
e9eee03e 1179 int i;
d6b75584
AK
1180
1181 if (!mce_available(&current_cpu_data))
1182 return;
e9eee03e 1183
88ccbedd
AK
1184 if (!(action & CPU_TASKS_FROZEN))
1185 cmci_reenable();
06b7a7a5
AK
1186 for (i = 0; i < banks; i++) {
1187 if (!skip_bank_init(i))
1188 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1189 }
d6b75584
AK
1190}
1191
91c6d400 1192/* Get notified when a cpu comes on/off. Be hotplug friendly. */
e9eee03e
IM
1193static int __cpuinit
1194mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
91c6d400
AK
1195{
1196 unsigned int cpu = (unsigned long)hcpu;
52d168e2 1197 struct timer_list *t = &per_cpu(mce_timer, cpu);
91c6d400
AK
1198
1199 switch (action) {
bae19fe0
AH
1200 case CPU_ONLINE:
1201 case CPU_ONLINE_FROZEN:
1202 mce_create_device(cpu);
8735728e
RW
1203 if (threshold_cpu_callback)
1204 threshold_cpu_callback(action, cpu);
91c6d400 1205 break;
91c6d400 1206 case CPU_DEAD:
8bb78442 1207 case CPU_DEAD_FROZEN:
8735728e
RW
1208 if (threshold_cpu_callback)
1209 threshold_cpu_callback(action, cpu);
91c6d400
AK
1210 mce_remove_device(cpu);
1211 break;
52d168e2
AK
1212 case CPU_DOWN_PREPARE:
1213 case CPU_DOWN_PREPARE_FROZEN:
1214 del_timer_sync(t);
88ccbedd 1215 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
52d168e2
AK
1216 break;
1217 case CPU_DOWN_FAILED:
1218 case CPU_DOWN_FAILED_FROZEN:
6298c512
AK
1219 t->expires = round_jiffies(jiffies +
1220 __get_cpu_var(next_interval));
52d168e2 1221 add_timer_on(t, cpu);
88ccbedd
AK
1222 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1223 break;
1224 case CPU_POST_DEAD:
1225 /* intentionally ignoring frozen here */
1226 cmci_rediscover(cpu);
52d168e2 1227 break;
91c6d400 1228 }
bae19fe0 1229 return NOTIFY_OK;
91c6d400
AK
1230}
1231
1e35669d 1232static struct notifier_block mce_cpu_notifier __cpuinitdata = {
91c6d400
AK
1233 .notifier_call = mce_cpu_callback,
1234};
1235
0d7482e3
AK
1236static __init int mce_init_banks(void)
1237{
1238 int i;
1239
1240 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1241 GFP_KERNEL);
1242 if (!bank_attrs)
1243 return -ENOMEM;
1244
1245 for (i = 0; i < banks; i++) {
1246 struct sysdev_attribute *a = &bank_attrs[i];
e9eee03e
IM
1247
1248 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
0d7482e3
AK
1249 if (!a->attr.name)
1250 goto nomem;
e9eee03e
IM
1251
1252 a->attr.mode = 0644;
1253 a->show = show_bank;
1254 a->store = set_bank;
0d7482e3
AK
1255 }
1256 return 0;
1257
1258nomem:
1259 while (--i >= 0)
1260 kfree(bank_attrs[i].attr.name);
1261 kfree(bank_attrs);
1262 bank_attrs = NULL;
e9eee03e 1263
0d7482e3
AK
1264 return -ENOMEM;
1265}
1266
91c6d400
AK
1267static __init int mce_init_device(void)
1268{
1269 int err;
1270 int i = 0;
1271
1da177e4
LT
1272 if (!mce_available(&boot_cpu_data))
1273 return -EIO;
0d7482e3 1274
cb491fca 1275 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
996867d0 1276
0d7482e3
AK
1277 err = mce_init_banks();
1278 if (err)
1279 return err;
1280
1da177e4 1281 err = sysdev_class_register(&mce_sysclass);
d435d862
AM
1282 if (err)
1283 return err;
91c6d400
AK
1284
1285 for_each_online_cpu(i) {
d435d862
AM
1286 err = mce_create_device(i);
1287 if (err)
1288 return err;
91c6d400
AK
1289 }
1290
be6b5a35 1291 register_hotcpu_notifier(&mce_cpu_notifier);
1da177e4 1292 misc_register(&mce_log_device);
e9eee03e 1293
1da177e4 1294 return err;
1da177e4 1295}
91c6d400 1296
1da177e4 1297device_initcall(mce_init_device);
a988d334 1298
4efc0670 1299#else /* CONFIG_X86_OLD_MCE: */
a988d334 1300
a988d334
IM
1301int nr_mce_banks;
1302EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1303
a988d334
IM
1304/* This has to be run for each processor */
1305void mcheck_init(struct cpuinfo_x86 *c)
1306{
1307 if (mce_disabled == 1)
1308 return;
1309
1310 switch (c->x86_vendor) {
1311 case X86_VENDOR_AMD:
1312 amd_mcheck_init(c);
1313 break;
1314
1315 case X86_VENDOR_INTEL:
1316 if (c->x86 == 5)
1317 intel_p5_mcheck_init(c);
1318 if (c->x86 == 6)
1319 intel_p6_mcheck_init(c);
1320 if (c->x86 == 15)
1321 intel_p4_mcheck_init(c);
1322 break;
1323
1324 case X86_VENDOR_CENTAUR:
1325 if (c->x86 == 5)
1326 winchip_mcheck_init(c);
1327 break;
1328
1329 default:
1330 break;
1331 }
b659294b 1332 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
a988d334
IM
1333}
1334
a988d334
IM
1335static int __init mcheck_enable(char *str)
1336{
1337 mce_disabled = -1;
1338 return 1;
1339}
1340
a988d334
IM
1341__setup("mce", mcheck_enable);
1342
d7c3c9a6
AK
1343#endif /* CONFIG_X86_OLD_MCE */
1344
1345/*
1346 * Old style boot options parsing. Only for compatibility.
1347 */
1348static int __init mcheck_disable(char *str)
1349{
1350 mce_disabled = 1;
1351 return 1;
1352}
1353__setup("nomce", mcheck_disable);
This page took 0.580801 seconds and 5 git commands to generate.