x86_64: O_EXCL on /dev/mcelog
[deliverable/linux.git] / arch / x86_64 / kernel / mce.c
CommitLineData
1da177e4
LT
1/*
2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it.
6 */
7
8#include <linux/init.h>
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/string.h>
13#include <linux/rcupdate.h>
14#include <linux/kallsyms.h>
15#include <linux/sysdev.h>
16#include <linux/miscdevice.h>
17#include <linux/fs.h>
a9415644 18#include <linux/capability.h>
91c6d400
AK
19#include <linux/cpu.h>
20#include <linux/percpu.h>
8c566ef5 21#include <linux/ctype.h>
a98f0dd3 22#include <linux/kmod.h>
1eeb66a1 23#include <linux/kdebug.h>
1da177e4
LT
24#include <asm/processor.h>
25#include <asm/msr.h>
26#include <asm/mce.h>
1da177e4 27#include <asm/uaccess.h>
0a9c3ee7 28#include <asm/smp.h>
1da177e4
LT
29
30#define MISC_MCELOG_MINOR 227
73ca5358 31#define NR_BANKS 6
1da177e4 32
553f265f
AK
33atomic_t mce_entry;
34
1da177e4
LT
35static int mce_dont_init;
36
37/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
38 3: never panic or exit (for testing only) */
39static int tolerant = 1;
40static int banks;
41static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
42static unsigned long console_logged;
43static int notify_user;
94ad8474 44static int rip_msr;
e583538f 45static int mce_bootlog = 1;
a98f0dd3
AK
46static atomic_t mce_events;
47
48static char trigger[128];
49static char *trigger_argv[2] = { trigger, NULL };
1da177e4
LT
50
51/*
52 * Lockless MCE logging infrastructure.
53 * This avoids deadlocks on printk locks without having to break locks. Also
54 * separate MCEs from kernel messages to avoid bogus bug reports.
55 */
56
57struct mce_log mcelog = {
58 MCE_LOG_SIGNATURE,
59 MCE_LOG_LEN,
60};
61
62void mce_log(struct mce *mce)
63{
64 unsigned next, entry;
a98f0dd3 65 atomic_inc(&mce_events);
1da177e4 66 mce->finished = 0;
7644143c 67 wmb();
1da177e4
LT
68 for (;;) {
69 entry = rcu_dereference(mcelog.next);
7644143c
MW
70 /* The rmb forces the compiler to reload next in each
71 iteration */
72 rmb();
673242c1
AK
73 for (;;) {
74 /* When the buffer fills up discard new entries. Assume
75 that the earlier errors are the more interesting. */
76 if (entry >= MCE_LOG_LEN) {
77 set_bit(MCE_OVERFLOW, &mcelog.flags);
78 return;
79 }
80 /* Old left over entry. Skip. */
81 if (mcelog.entry[entry].finished) {
82 entry++;
83 continue;
84 }
7644143c 85 break;
1da177e4 86 }
1da177e4
LT
87 smp_rmb();
88 next = entry + 1;
89 if (cmpxchg(&mcelog.next, entry, next) == entry)
90 break;
91 }
92 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
7644143c 93 wmb();
1da177e4 94 mcelog.entry[entry].finished = 1;
7644143c 95 wmb();
1da177e4
LT
96
97 if (!test_and_set_bit(0, &console_logged))
98 notify_user = 1;
99}
100
101static void print_mce(struct mce *m)
102{
103 printk(KERN_EMERG "\n"
4855170f 104 KERN_EMERG "HARDWARE ERROR\n"
1da177e4
LT
105 KERN_EMERG
106 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
107 m->cpu, m->mcgstatus, m->bank, m->status);
108 if (m->rip) {
109 printk(KERN_EMERG
110 "RIP%s %02x:<%016Lx> ",
111 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
112 m->cs, m->rip);
113 if (m->cs == __KERNEL_CS)
114 print_symbol("{%s}", m->rip);
115 printk("\n");
116 }
117 printk(KERN_EMERG "TSC %Lx ", m->tsc);
118 if (m->addr)
119 printk("ADDR %Lx ", m->addr);
120 if (m->misc)
121 printk("MISC %Lx ", m->misc);
122 printk("\n");
4855170f
AK
123 printk(KERN_EMERG "This is not a software problem!\n");
124 printk(KERN_EMERG
125 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
1da177e4
LT
126}
127
128static void mce_panic(char *msg, struct mce *backup, unsigned long start)
129{
130 int i;
131 oops_begin();
132 for (i = 0; i < MCE_LOG_LEN; i++) {
133 unsigned long tsc = mcelog.entry[i].tsc;
134 if (time_before(tsc, start))
135 continue;
136 print_mce(&mcelog.entry[i]);
137 if (backup && mcelog.entry[i].tsc == backup->tsc)
138 backup = NULL;
139 }
140 if (backup)
141 print_mce(backup);
142 if (tolerant >= 3)
143 printk("Fake panic: %s\n", msg);
144 else
145 panic(msg);
146}
147
148static int mce_available(struct cpuinfo_x86 *c)
149{
3d1712c9 150 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
1da177e4
LT
151}
152
94ad8474
AK
153static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
154{
155 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
156 m->rip = regs->rip;
157 m->cs = regs->cs;
158 } else {
159 m->rip = 0;
160 m->cs = 0;
161 }
162 if (rip_msr) {
163 /* Assume the RIP in the MSR is exact. Is this true? */
164 m->mcgstatus |= MCG_STATUS_EIPV;
165 rdmsrl(rip_msr, m->rip);
166 m->cs = 0;
167 }
168}
169
a98f0dd3
AK
170static void do_mce_trigger(void)
171{
172 static atomic_t mce_logged;
173 int events = atomic_read(&mce_events);
174 if (events != atomic_read(&mce_logged) && trigger[0]) {
175 /* Small race window, but should be harmless. */
176 atomic_set(&mce_logged, events);
86313c48 177 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
a98f0dd3
AK
178 }
179}
180
1da177e4
LT
181/*
182 * The actual machine check handler
183 */
184
185void do_machine_check(struct pt_regs * regs, long error_code)
186{
187 struct mce m, panicm;
188 int nowayout = (tolerant < 1);
189 int kill_it = 0;
190 u64 mcestart = 0;
191 int i;
192 int panicm_found = 0;
193
553f265f
AK
194 atomic_inc(&mce_entry);
195
1da177e4 196 if (regs)
6e3f3617 197 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
1da177e4 198 if (!banks)
553f265f 199 goto out2;
1da177e4
LT
200
201 memset(&m, 0, sizeof(struct mce));
151f8cc1 202 m.cpu = smp_processor_id();
1da177e4
LT
203 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
204 if (!(m.mcgstatus & MCG_STATUS_RIPV))
205 kill_it = 1;
206
207 rdtscll(mcestart);
208 barrier();
209
210 for (i = 0; i < banks; i++) {
211 if (!bank[i])
212 continue;
213
214 m.misc = 0;
215 m.addr = 0;
216 m.bank = i;
217 m.tsc = 0;
218
219 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
220 if ((m.status & MCI_STATUS_VAL) == 0)
221 continue;
222
223 if (m.status & MCI_STATUS_EN) {
224 /* In theory _OVER could be a nowayout too, but
225 assume any overflowed errors were no fatal. */
226 nowayout |= !!(m.status & MCI_STATUS_PCC);
227 kill_it |= !!(m.status & MCI_STATUS_UC);
228 }
229
230 if (m.status & MCI_STATUS_MISCV)
231 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
232 if (m.status & MCI_STATUS_ADDRV)
233 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
234
94ad8474 235 mce_get_rip(&m, regs);
d5172f26 236 if (error_code >= 0)
1da177e4
LT
237 rdtscll(m.tsc);
238 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
d5172f26
AK
239 if (error_code != -2)
240 mce_log(&m);
1da177e4
LT
241
242 /* Did this bank cause the exception? */
243 /* Assume that the bank with uncorrectable errors did it,
244 and that there is only a single one. */
245 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
246 panicm = m;
247 panicm_found = 1;
248 }
249
9f158333 250 add_taint(TAINT_MACHINE_CHECK);
1da177e4
LT
251 }
252
253 /* Never do anything final in the polling timer */
a98f0dd3
AK
254 if (!regs) {
255 /* Normal interrupt context here. Call trigger for any new
256 events. */
257 do_mce_trigger();
1da177e4 258 goto out;
a98f0dd3 259 }
1da177e4
LT
260
261 /* If we didn't find an uncorrectable error, pick
262 the last one (shouldn't happen, just being safe). */
263 if (!panicm_found)
264 panicm = m;
265 if (nowayout)
266 mce_panic("Machine check", &panicm, mcestart);
267 if (kill_it) {
268 int user_space = 0;
269
270 if (m.mcgstatus & MCG_STATUS_RIPV)
271 user_space = panicm.rip && (panicm.cs & 3);
272
273 /* When the machine was in user space and the CPU didn't get
274 confused it's normally not necessary to panic, unless you
275 are paranoid (tolerant == 0)
276
277 RED-PEN could be more tolerant for MCEs in idle,
278 but most likely they occur at boot anyways, where
279 it is best to just halt the machine. */
280 if ((!user_space && (panic_on_oops || tolerant < 2)) ||
281 (unsigned)current->pid <= 1)
282 mce_panic("Uncorrected machine check", &panicm, mcestart);
283
284 /* do_exit takes an awful lot of locks and has as
285 slight risk of deadlocking. If you don't want that
286 don't set tolerant >= 2 */
287 if (tolerant < 3)
288 do_exit(SIGBUS);
289 }
290
291 out:
292 /* Last thing done in the machine check exception to clear state. */
293 wrmsrl(MSR_IA32_MCG_STATUS, 0);
553f265f
AK
294 out2:
295 atomic_dec(&mce_entry);
1da177e4
LT
296}
297
15d5f839
DZ
298#ifdef CONFIG_X86_MCE_INTEL
299/***
300 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
301 * @cpu: The CPU on which the event occured.
302 * @status: Event status information
303 *
304 * This function should be called by the thermal interrupt after the
305 * event has been processed and the decision was made to log the event
306 * further.
307 *
308 * The status parameter will be saved to the 'status' field of 'struct mce'
309 * and historically has been the register value of the
310 * MSR_IA32_THERMAL_STATUS (Intel) msr.
311 */
312void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
313{
314 struct mce m;
315
316 memset(&m, 0, sizeof(m));
317 m.cpu = cpu;
318 m.bank = MCE_THERMAL_BANK;
319 m.status = status;
320 rdtscll(m.tsc);
321 mce_log(&m);
322}
323#endif /* CONFIG_X86_MCE_INTEL */
324
1da177e4 325/*
8a336b0a
TH
326 * Periodic polling timer for "silent" machine check errors. If the
327 * poller finds an MCE, poll 2x faster. When the poller finds no more
328 * errors, poll 2x slower (up to check_interval seconds).
1da177e4
LT
329 */
330
331static int check_interval = 5 * 60; /* 5 minutes */
8a336b0a 332static int next_interval; /* in jiffies */
65f27f38
DH
333static void mcheck_timer(struct work_struct *work);
334static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
1da177e4
LT
335
336static void mcheck_check_cpu(void *info)
337{
338 if (mce_available(&current_cpu_data))
339 do_machine_check(NULL, 0);
340}
341
65f27f38 342static void mcheck_timer(struct work_struct *work)
1da177e4
LT
343{
344 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
1da177e4
LT
345
346 /*
347 * It's ok to read stale data here for notify_user and
348 * console_logged as we'll simply get the updated versions
349 * on the next mcheck_timer execution and atomic operations
350 * on console_logged act as synchronization for notify_user
351 * writes.
352 */
353 if (notify_user && console_logged) {
8a336b0a
TH
354 static unsigned long last_print;
355 unsigned long now = jiffies;
356
357 /* if we logged an MCE, reduce the polling interval */
358 next_interval = max(next_interval/2, HZ/100);
1da177e4
LT
359 notify_user = 0;
360 clear_bit(0, &console_logged);
8a336b0a
TH
361 if (time_after_eq(now, last_print + (check_interval*HZ))) {
362 last_print = now;
363 printk(KERN_INFO "Machine check events logged\n");
364 }
365 } else {
366 next_interval = min(next_interval*2, check_interval*HZ);
1da177e4 367 }
8a336b0a
TH
368
369 schedule_delayed_work(&mcheck_work, next_interval);
1da177e4
LT
370}
371
372
373static __init int periodic_mcheck_init(void)
374{
8a336b0a
TH
375 next_interval = check_interval * HZ;
376 if (next_interval)
377 schedule_delayed_work(&mcheck_work, next_interval);
1da177e4
LT
378 return 0;
379}
380__initcall(periodic_mcheck_init);
381
382
383/*
384 * Initialize Machine Checks for a CPU.
385 */
386static void mce_init(void *dummy)
387{
388 u64 cap;
389 int i;
390
391 rdmsrl(MSR_IA32_MCG_CAP, cap);
392 banks = cap & 0xff;
393 if (banks > NR_BANKS) {
394 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
395 banks = NR_BANKS;
396 }
94ad8474
AK
397 /* Use accurate RIP reporting if available. */
398 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
399 rip_msr = MSR_IA32_MCG_EIP;
1da177e4
LT
400
401 /* Log the machine checks left over from the previous reset.
402 This also clears all registers */
d5172f26 403 do_machine_check(NULL, mce_bootlog ? -1 : -2);
1da177e4
LT
404
405 set_in_cr4(X86_CR4_MCE);
406
407 if (cap & MCG_CTL_P)
408 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
409
410 for (i = 0; i < banks; i++) {
411 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
412 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
413 }
414}
415
416/* Add per CPU specific workarounds here */
e6982c67 417static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1da177e4
LT
418{
419 /* This should be disabled by the BIOS, but isn't always */
420 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
421 /* disable GART TBL walk error reporting, which trips off
422 incorrectly with the IOMMU & 3ware & Cerberus. */
423 clear_bit(10, &bank[4]);
e583538f
AK
424 /* Lots of broken BIOS around that don't clear them
425 by default and leave crap in there. Don't log. */
426 mce_bootlog = 0;
1da177e4 427 }
e583538f 428
1da177e4
LT
429}
430
e6982c67 431static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
1da177e4
LT
432{
433 switch (c->x86_vendor) {
434 case X86_VENDOR_INTEL:
435 mce_intel_feature_init(c);
436 break;
89b831ef
JS
437 case X86_VENDOR_AMD:
438 mce_amd_feature_init(c);
439 break;
1da177e4
LT
440 default:
441 break;
442 }
443}
444
445/*
446 * Called for each booted CPU to set up machine checks.
447 * Must be called with preempt off.
448 */
e6982c67 449void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1da177e4 450{
7ded5689 451 static cpumask_t mce_cpus = CPU_MASK_NONE;
1da177e4
LT
452
453 mce_cpu_quirks(c);
454
455 if (mce_dont_init ||
456 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
457 !mce_available(c))
458 return;
459
460 mce_init(NULL);
461 mce_cpu_features(c);
462}
463
464/*
465 * Character device to read and clear the MCE log.
466 */
467
f528e7ba
TH
468static DEFINE_SPINLOCK(mce_state_lock);
469static int open_count; /* #times opened */
470static int open_exclu; /* already open exclusive? */
471
472static int mce_open(struct inode *inode, struct file *file)
473{
474 spin_lock(&mce_state_lock);
475
476 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
477 spin_unlock(&mce_state_lock);
478 return -EBUSY;
479 }
480
481 if (file->f_flags & O_EXCL)
482 open_exclu = 1;
483 open_count++;
484
485 spin_unlock(&mce_state_lock);
486
487 return 0;
488}
489
490static int mce_release(struct inode *inode, struct file *file)
491{
492 spin_lock(&mce_state_lock);
493
494 open_count--;
495 open_exclu = 0;
496
497 spin_unlock(&mce_state_lock);
498
499 return 0;
500}
501
1da177e4
LT
502static void collect_tscs(void *data)
503{
504 unsigned long *cpu_tsc = (unsigned long *)data;
505 rdtscll(cpu_tsc[smp_processor_id()]);
506}
507
508static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
509{
f0de53bb 510 unsigned long *cpu_tsc;
1da177e4
LT
511 static DECLARE_MUTEX(mce_read_sem);
512 unsigned next;
513 char __user *buf = ubuf;
514 int i, err;
515
f0de53bb
AK
516 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
517 if (!cpu_tsc)
518 return -ENOMEM;
519
1da177e4
LT
520 down(&mce_read_sem);
521 next = rcu_dereference(mcelog.next);
522
523 /* Only supports full reads right now */
524 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
525 up(&mce_read_sem);
f0de53bb 526 kfree(cpu_tsc);
1da177e4
LT
527 return -EINVAL;
528 }
529
530 err = 0;
673242c1
AK
531 for (i = 0; i < next; i++) {
532 unsigned long start = jiffies;
533 while (!mcelog.entry[i].finished) {
4f84e4be 534 if (time_after_eq(jiffies, start + 2)) {
673242c1 535 memset(mcelog.entry + i,0, sizeof(struct mce));
4f84e4be 536 goto timeout;
673242c1
AK
537 }
538 cpu_relax();
539 }
1da177e4
LT
540 smp_rmb();
541 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
542 buf += sizeof(struct mce);
4f84e4be
JW
543 timeout:
544 ;
1da177e4
LT
545 }
546
547 memset(mcelog.entry, 0, next * sizeof(struct mce));
548 mcelog.next = 0;
549
b2b18660 550 synchronize_sched();
1da177e4
LT
551
552 /* Collect entries that were still getting written before the synchronize. */
553
554 on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
555 for (i = next; i < MCE_LOG_LEN; i++) {
556 if (mcelog.entry[i].finished &&
557 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
558 err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
559 smp_rmb();
560 buf += sizeof(struct mce);
561 memset(&mcelog.entry[i], 0, sizeof(struct mce));
562 }
563 }
564 up(&mce_read_sem);
f0de53bb 565 kfree(cpu_tsc);
1da177e4
LT
566 return err ? -EFAULT : buf - ubuf;
567}
568
569static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
570{
571 int __user *p = (int __user *)arg;
572 if (!capable(CAP_SYS_ADMIN))
573 return -EPERM;
574 switch (cmd) {
575 case MCE_GET_RECORD_LEN:
576 return put_user(sizeof(struct mce), p);
577 case MCE_GET_LOG_LEN:
578 return put_user(MCE_LOG_LEN, p);
579 case MCE_GETCLEAR_FLAGS: {
580 unsigned flags;
581 do {
582 flags = mcelog.flags;
583 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
584 return put_user(flags, p);
585 }
586 default:
587 return -ENOTTY;
588 }
589}
590
5dfe4c96 591static const struct file_operations mce_chrdev_ops = {
f528e7ba
TH
592 .open = mce_open,
593 .release = mce_release,
1da177e4
LT
594 .read = mce_read,
595 .ioctl = mce_ioctl,
596};
597
598static struct miscdevice mce_log_device = {
599 MISC_MCELOG_MINOR,
600 "mcelog",
601 &mce_chrdev_ops,
602};
603
604/*
605 * Old style boot options parsing. Only for compatibility.
606 */
607
608static int __init mcheck_disable(char *str)
609{
610 mce_dont_init = 1;
9b41046c 611 return 1;
1da177e4
LT
612}
613
614/* mce=off disables machine check. Note you can reenable it later
d5172f26 615 using sysfs.
8c566ef5 616 mce=TOLERANCELEVEL (number, see above)
e583538f
AK
617 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
618 mce=nobootlog Don't log MCEs from before booting. */
1da177e4
LT
619static int __init mcheck_enable(char *str)
620{
d5172f26
AK
621 if (*str == '=')
622 str++;
1da177e4
LT
623 if (!strcmp(str, "off"))
624 mce_dont_init = 1;
e583538f
AK
625 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
626 mce_bootlog = str[0] == 'b';
8c566ef5
AK
627 else if (isdigit(str[0]))
628 get_option(&str, &tolerant);
1da177e4
LT
629 else
630 printk("mce= argument %s ignored. Please use /sys", str);
9b41046c 631 return 1;
1da177e4
LT
632}
633
634__setup("nomce", mcheck_disable);
635__setup("mce", mcheck_enable);
636
637/*
638 * Sysfs support
639 */
640
413588c7
AK
641/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
642 Only one CPU is active at this time, the others get readded later using
643 CPU hotplug. */
1da177e4
LT
644static int mce_resume(struct sys_device *dev)
645{
413588c7 646 mce_init(NULL);
1da177e4
LT
647 return 0;
648}
649
650/* Reinit MCEs after user configuration changes */
651static void mce_restart(void)
652{
8a336b0a 653 if (next_interval)
1da177e4
LT
654 cancel_delayed_work(&mcheck_work);
655 /* Timer race is harmless here */
656 on_each_cpu(mce_init, NULL, 1, 1);
8a336b0a
TH
657 next_interval = check_interval * HZ;
658 if (next_interval)
659 schedule_delayed_work(&mcheck_work, next_interval);
1da177e4
LT
660}
661
662static struct sysdev_class mce_sysclass = {
663 .resume = mce_resume,
664 set_kset_name("machinecheck"),
665};
666
fff2e89f 667DEFINE_PER_CPU(struct sys_device, device_mce);
1da177e4
LT
668
669/* Why are there no generic functions for this? */
670#define ACCESSOR(name, var, start) \
671 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
672 return sprintf(buf, "%lx\n", (unsigned long)var); \
673 } \
674 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
675 char *end; \
676 unsigned long new = simple_strtoul(buf, &end, 0); \
677 if (end == buf) return -EINVAL; \
678 var = new; \
679 start; \
680 return end-buf; \
681 } \
682 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
683
a98f0dd3 684/* TBD should generate these dynamically based on number of available banks */
1da177e4
LT
685ACCESSOR(bank0ctl,bank[0],mce_restart())
686ACCESSOR(bank1ctl,bank[1],mce_restart())
687ACCESSOR(bank2ctl,bank[2],mce_restart())
688ACCESSOR(bank3ctl,bank[3],mce_restart())
689ACCESSOR(bank4ctl,bank[4],mce_restart())
73ca5358 690ACCESSOR(bank5ctl,bank[5],mce_restart())
a98f0dd3
AK
691
692static ssize_t show_trigger(struct sys_device *s, char *buf)
693{
694 strcpy(buf, trigger);
695 strcat(buf, "\n");
696 return strlen(trigger) + 1;
697}
698
699static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
700{
701 char *p;
702 int len;
703 strncpy(trigger, buf, sizeof(trigger));
704 trigger[sizeof(trigger)-1] = 0;
705 len = strlen(trigger);
706 p = strchr(trigger, '\n');
707 if (*p) *p = 0;
708 return len;
709}
710
711static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
1da177e4
LT
712ACCESSOR(tolerant,tolerant,)
713ACCESSOR(check_interval,check_interval,mce_restart())
a98f0dd3
AK
714static struct sysdev_attribute *mce_attributes[] = {
715 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
716 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
717 &attr_tolerant, &attr_check_interval, &attr_trigger,
718 NULL
719};
1da177e4 720
91c6d400
AK
721/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
722static __cpuinit int mce_create_device(unsigned int cpu)
1da177e4
LT
723{
724 int err;
73ca5358 725 int i;
91c6d400
AK
726 if (!mce_available(&cpu_data[cpu]))
727 return -EIO;
728
729 per_cpu(device_mce,cpu).id = cpu;
730 per_cpu(device_mce,cpu).cls = &mce_sysclass;
731
732 err = sysdev_register(&per_cpu(device_mce,cpu));
733
734 if (!err) {
a98f0dd3 735 for (i = 0; mce_attributes[i]; i++)
73ca5358 736 sysdev_create_file(&per_cpu(device_mce,cpu),
a98f0dd3 737 mce_attributes[i]);
91c6d400
AK
738 }
739 return err;
740}
741
be6b5a35 742static void mce_remove_device(unsigned int cpu)
91c6d400 743{
73ca5358
SL
744 int i;
745
a98f0dd3 746 for (i = 0; mce_attributes[i]; i++)
73ca5358 747 sysdev_remove_file(&per_cpu(device_mce,cpu),
a98f0dd3 748 mce_attributes[i]);
91c6d400 749 sysdev_unregister(&per_cpu(device_mce,cpu));
d4c45718 750 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
91c6d400 751}
91c6d400
AK
752
753/* Get notified when a cpu comes on/off. Be hotplug friendly. */
be6b5a35 754static int
91c6d400
AK
755mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
756{
757 unsigned int cpu = (unsigned long)hcpu;
758
759 switch (action) {
760 case CPU_ONLINE:
8bb78442 761 case CPU_ONLINE_FROZEN:
91c6d400
AK
762 mce_create_device(cpu);
763 break;
91c6d400 764 case CPU_DEAD:
8bb78442 765 case CPU_DEAD_FROZEN:
91c6d400
AK
766 mce_remove_device(cpu);
767 break;
91c6d400
AK
768 }
769 return NOTIFY_OK;
770}
771
be6b5a35 772static struct notifier_block mce_cpu_notifier = {
91c6d400
AK
773 .notifier_call = mce_cpu_callback,
774};
775
776static __init int mce_init_device(void)
777{
778 int err;
779 int i = 0;
780
1da177e4
LT
781 if (!mce_available(&boot_cpu_data))
782 return -EIO;
783 err = sysdev_class_register(&mce_sysclass);
91c6d400
AK
784
785 for_each_online_cpu(i) {
786 mce_create_device(i);
787 }
788
be6b5a35 789 register_hotcpu_notifier(&mce_cpu_notifier);
1da177e4
LT
790 misc_register(&mce_log_device);
791 return err;
1da177e4 792}
91c6d400 793
1da177e4 794device_initcall(mce_init_device);
This page took 0.440106 seconds and 5 git commands to generate.