d67c9e56d609de2f7368b309f39e2f46930fa596
[deliverable/linux.git] / arch / x86 / kernel / cpu / mcheck / mce_amd.c
1 /*
2 * (c) 2005, 2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Written by Jacob Shin - AMD, Inc.
8 *
9 * Support : jacob.shin@amd.com
10 *
11 * April 2006
12 * - added support for AMD Family 0x10 processors
13 *
14 * All MC4_MISCi registers are shared between multi-cores
15 */
16 #include <linux/interrupt.h>
17 #include <linux/notifier.h>
18 #include <linux/kobject.h>
19 #include <linux/percpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/sysfs.h>
23 #include <linux/slab.h>
24 #include <linux/init.h>
25 #include <linux/cpu.h>
26 #include <linux/smp.h>
27
28 #include <asm/amd_nb.h>
29 #include <asm/apic.h>
30 #include <asm/idle.h>
31 #include <asm/mce.h>
32 #include <asm/msr.h>
33
34 #define NR_BANKS 6
35 #define NR_BLOCKS 9
36 #define THRESHOLD_MAX 0xFFF
37 #define INT_TYPE_APIC 0x00020000
38 #define MASK_VALID_HI 0x80000000
39 #define MASK_CNTP_HI 0x40000000
40 #define MASK_LOCKED_HI 0x20000000
41 #define MASK_LVTOFF_HI 0x00F00000
42 #define MASK_COUNT_EN_HI 0x00080000
43 #define MASK_INT_TYPE_HI 0x00060000
44 #define MASK_OVERFLOW_HI 0x00010000
45 #define MASK_ERR_COUNT_HI 0x00000FFF
46 #define MASK_BLKPTR_LO 0xFF000000
47 #define MCG_XBLK_ADDR 0xC0000400
48
49 static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
50
51 static unsigned char shared_bank[NR_BANKS] = {
52 0, 0, 0, 0, 1
53 };
54
55 static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
56
57 static void amd_threshold_interrupt(void);
58
59 /*
60 * CPU Initialization
61 */
62
63 struct thresh_restart {
64 struct threshold_block *b;
65 int reset;
66 int set_lvt_off;
67 int lvt_off;
68 u16 old_limit;
69 };
70
71 static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
72 {
73 /*
74 * bank 4 supports APIC LVT interrupts implicitly since forever.
75 */
76 if (bank == 4)
77 return true;
78
79 /*
80 * IntP: interrupt present; if this bit is set, the thresholding
81 * bank can generate APIC LVT interrupts
82 */
83 return msr_high_bits & BIT(28);
84 }
85
86 static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
87 {
88 int msr = (hi & MASK_LVTOFF_HI) >> 20;
89
90 if (apic < 0) {
91 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
92 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
93 b->bank, b->block, b->address, hi, lo);
94 return 0;
95 }
96
97 if (apic != msr) {
98 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
99 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
100 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
101 return 0;
102 }
103
104 return 1;
105 };
106
107 /*
108 * Called via smp_call_function_single(), must be called with correct
109 * cpu affinity.
110 */
111 static void threshold_restart_bank(void *_tr)
112 {
113 struct thresh_restart *tr = _tr;
114 u32 hi, lo;
115
116 rdmsr(tr->b->address, lo, hi);
117
118 if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
119 tr->reset = 1; /* limit cannot be lower than err count */
120
121 if (tr->reset) { /* reset err count and overflow bit */
122 hi =
123 (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
124 (THRESHOLD_MAX - tr->b->threshold_limit);
125 } else if (tr->old_limit) { /* change limit w/o reset */
126 int new_count = (hi & THRESHOLD_MAX) +
127 (tr->old_limit - tr->b->threshold_limit);
128
129 hi = (hi & ~MASK_ERR_COUNT_HI) |
130 (new_count & THRESHOLD_MAX);
131 }
132
133 /* clear IntType */
134 hi &= ~MASK_INT_TYPE_HI;
135
136 if (!tr->b->interrupt_capable)
137 goto done;
138
139 if (tr->set_lvt_off) {
140 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
141 /* set new lvt offset */
142 hi &= ~MASK_LVTOFF_HI;
143 hi |= tr->lvt_off << 20;
144 }
145 }
146
147 if (tr->b->interrupt_enable)
148 hi |= INT_TYPE_APIC;
149
150 done:
151
152 hi |= MASK_COUNT_EN_HI;
153 wrmsr(tr->b->address, lo, hi);
154 }
155
156 static void mce_threshold_block_init(struct threshold_block *b, int offset)
157 {
158 struct thresh_restart tr = {
159 .b = b,
160 .set_lvt_off = 1,
161 .lvt_off = offset,
162 };
163
164 b->threshold_limit = THRESHOLD_MAX;
165 threshold_restart_bank(&tr);
166 };
167
168 static int setup_APIC_mce(int reserved, int new)
169 {
170 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
171 APIC_EILVT_MSG_FIX, 0))
172 return new;
173
174 return reserved;
175 }
176
177 /* cpu init entry point, called from mce.c with preempt off */
178 void mce_amd_feature_init(struct cpuinfo_x86 *c)
179 {
180 struct threshold_block b;
181 unsigned int cpu = smp_processor_id();
182 u32 low = 0, high = 0, address = 0;
183 unsigned int bank, block;
184 int offset = -1;
185
186 for (bank = 0; bank < NR_BANKS; ++bank) {
187 for (block = 0; block < NR_BLOCKS; ++block) {
188 if (block == 0)
189 address = MSR_IA32_MC0_MISC + bank * 4;
190 else if (block == 1) {
191 address = (low & MASK_BLKPTR_LO) >> 21;
192 if (!address)
193 break;
194
195 address += MCG_XBLK_ADDR;
196 } else
197 ++address;
198
199 if (rdmsr_safe(address, &low, &high))
200 break;
201
202 if (!(high & MASK_VALID_HI))
203 continue;
204
205 if (!(high & MASK_CNTP_HI) ||
206 (high & MASK_LOCKED_HI))
207 continue;
208
209 if (!block)
210 per_cpu(bank_map, cpu) |= (1 << bank);
211
212 memset(&b, 0, sizeof(b));
213 b.cpu = cpu;
214 b.bank = bank;
215 b.block = block;
216 b.address = address;
217 b.interrupt_capable = lvt_interrupt_supported(bank, high);
218
219 if (b.interrupt_capable) {
220 int new = (high & MASK_LVTOFF_HI) >> 20;
221 offset = setup_APIC_mce(offset, new);
222 }
223
224 mce_threshold_block_init(&b, offset);
225 mce_threshold_vector = amd_threshold_interrupt;
226 }
227 }
228 }
229
230 /*
231 * APIC Interrupt Handler
232 */
233
234 /*
235 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
236 * the interrupt goes off when error_count reaches threshold_limit.
237 * the handler will simply log mcelog w/ software defined bank number.
238 */
239 static void amd_threshold_interrupt(void)
240 {
241 u32 low = 0, high = 0, address = 0;
242 unsigned int bank, block;
243 struct mce m;
244
245 mce_setup(&m);
246
247 /* assume first bank caused it */
248 for (bank = 0; bank < NR_BANKS; ++bank) {
249 if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
250 continue;
251 for (block = 0; block < NR_BLOCKS; ++block) {
252 if (block == 0) {
253 address = MSR_IA32_MC0_MISC + bank * 4;
254 } else if (block == 1) {
255 address = (low & MASK_BLKPTR_LO) >> 21;
256 if (!address)
257 break;
258 address += MCG_XBLK_ADDR;
259 } else {
260 ++address;
261 }
262
263 if (rdmsr_safe(address, &low, &high))
264 break;
265
266 if (!(high & MASK_VALID_HI)) {
267 if (block)
268 continue;
269 else
270 break;
271 }
272
273 if (!(high & MASK_CNTP_HI) ||
274 (high & MASK_LOCKED_HI))
275 continue;
276
277 /*
278 * Log the machine check that caused the threshold
279 * event.
280 */
281 machine_check_poll(MCP_TIMESTAMP,
282 &__get_cpu_var(mce_poll_banks));
283
284 if (high & MASK_OVERFLOW_HI) {
285 rdmsrl(address, m.misc);
286 rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
287 m.status);
288 m.bank = K8_MCE_THRESHOLD_BASE
289 + bank * NR_BLOCKS
290 + block;
291 mce_log(&m);
292 return;
293 }
294 }
295 }
296 }
297
298 /*
299 * Sysfs Interface
300 */
301
302 struct threshold_attr {
303 struct attribute attr;
304 ssize_t (*show) (struct threshold_block *, char *);
305 ssize_t (*store) (struct threshold_block *, const char *, size_t count);
306 };
307
308 #define SHOW_FIELDS(name) \
309 static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
310 { \
311 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
312 }
313 SHOW_FIELDS(interrupt_enable)
314 SHOW_FIELDS(threshold_limit)
315
316 static ssize_t
317 store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
318 {
319 struct thresh_restart tr;
320 unsigned long new;
321
322 if (!b->interrupt_capable)
323 return -EINVAL;
324
325 if (strict_strtoul(buf, 0, &new) < 0)
326 return -EINVAL;
327
328 b->interrupt_enable = !!new;
329
330 memset(&tr, 0, sizeof(tr));
331 tr.b = b;
332
333 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
334
335 return size;
336 }
337
338 static ssize_t
339 store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
340 {
341 struct thresh_restart tr;
342 unsigned long new;
343
344 if (strict_strtoul(buf, 0, &new) < 0)
345 return -EINVAL;
346
347 if (new > THRESHOLD_MAX)
348 new = THRESHOLD_MAX;
349 if (new < 1)
350 new = 1;
351
352 memset(&tr, 0, sizeof(tr));
353 tr.old_limit = b->threshold_limit;
354 b->threshold_limit = new;
355 tr.b = b;
356
357 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
358
359 return size;
360 }
361
362 struct threshold_block_cross_cpu {
363 struct threshold_block *tb;
364 long retval;
365 };
366
367 static void local_error_count_handler(void *_tbcc)
368 {
369 struct threshold_block_cross_cpu *tbcc = _tbcc;
370 struct threshold_block *b = tbcc->tb;
371 u32 low, high;
372
373 rdmsr(b->address, low, high);
374 tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
375 }
376
377 static ssize_t show_error_count(struct threshold_block *b, char *buf)
378 {
379 struct threshold_block_cross_cpu tbcc = { .tb = b, };
380
381 smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
382 return sprintf(buf, "%lx\n", tbcc.retval);
383 }
384
385 static ssize_t store_error_count(struct threshold_block *b,
386 const char *buf, size_t count)
387 {
388 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
389
390 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
391 return 1;
392 }
393
394 #define RW_ATTR(val) \
395 static struct threshold_attr val = { \
396 .attr = {.name = __stringify(val), .mode = 0644 }, \
397 .show = show_## val, \
398 .store = store_## val, \
399 };
400
401 RW_ATTR(interrupt_enable);
402 RW_ATTR(threshold_limit);
403 RW_ATTR(error_count);
404
405 static struct attribute *default_attrs[] = {
406 &threshold_limit.attr,
407 &error_count.attr,
408 NULL, /* possibly interrupt_enable if supported, see below */
409 NULL,
410 };
411
412 #define to_block(k) container_of(k, struct threshold_block, kobj)
413 #define to_attr(a) container_of(a, struct threshold_attr, attr)
414
415 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
416 {
417 struct threshold_block *b = to_block(kobj);
418 struct threshold_attr *a = to_attr(attr);
419 ssize_t ret;
420
421 ret = a->show ? a->show(b, buf) : -EIO;
422
423 return ret;
424 }
425
426 static ssize_t store(struct kobject *kobj, struct attribute *attr,
427 const char *buf, size_t count)
428 {
429 struct threshold_block *b = to_block(kobj);
430 struct threshold_attr *a = to_attr(attr);
431 ssize_t ret;
432
433 ret = a->store ? a->store(b, buf, count) : -EIO;
434
435 return ret;
436 }
437
438 static const struct sysfs_ops threshold_ops = {
439 .show = show,
440 .store = store,
441 };
442
443 static struct kobj_type threshold_ktype = {
444 .sysfs_ops = &threshold_ops,
445 .default_attrs = default_attrs,
446 };
447
448 static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
449 unsigned int bank,
450 unsigned int block,
451 u32 address)
452 {
453 struct threshold_block *b = NULL;
454 u32 low, high;
455 int err;
456
457 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
458 return 0;
459
460 if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
461 return 0;
462
463 if (!(high & MASK_VALID_HI)) {
464 if (block)
465 goto recurse;
466 else
467 return 0;
468 }
469
470 if (!(high & MASK_CNTP_HI) ||
471 (high & MASK_LOCKED_HI))
472 goto recurse;
473
474 b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
475 if (!b)
476 return -ENOMEM;
477
478 b->block = block;
479 b->bank = bank;
480 b->cpu = cpu;
481 b->address = address;
482 b->interrupt_enable = 0;
483 b->interrupt_capable = lvt_interrupt_supported(bank, high);
484 b->threshold_limit = THRESHOLD_MAX;
485
486 if (b->interrupt_capable)
487 threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
488 else
489 threshold_ktype.default_attrs[2] = NULL;
490
491 INIT_LIST_HEAD(&b->miscj);
492
493 if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
494 list_add(&b->miscj,
495 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
496 } else {
497 per_cpu(threshold_banks, cpu)[bank]->blocks = b;
498 }
499
500 err = kobject_init_and_add(&b->kobj, &threshold_ktype,
501 per_cpu(threshold_banks, cpu)[bank]->kobj,
502 "misc%i", block);
503 if (err)
504 goto out_free;
505 recurse:
506 if (!block) {
507 address = (low & MASK_BLKPTR_LO) >> 21;
508 if (!address)
509 return 0;
510 address += MCG_XBLK_ADDR;
511 } else {
512 ++address;
513 }
514
515 err = allocate_threshold_blocks(cpu, bank, ++block, address);
516 if (err)
517 goto out_free;
518
519 if (b)
520 kobject_uevent(&b->kobj, KOBJ_ADD);
521
522 return err;
523
524 out_free:
525 if (b) {
526 kobject_put(&b->kobj);
527 list_del(&b->miscj);
528 kfree(b);
529 }
530 return err;
531 }
532
533 static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
534 {
535 struct list_head *head = &b->blocks->miscj;
536 struct threshold_block *pos = NULL;
537 struct threshold_block *tmp = NULL;
538 int err = 0;
539
540 err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
541 if (err)
542 return err;
543
544 list_for_each_entry_safe(pos, tmp, head, miscj) {
545
546 err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
547 if (err) {
548 list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
549 kobject_del(&pos->kobj);
550
551 return err;
552 }
553 }
554 return err;
555 }
556
557 static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
558 {
559 struct device *dev = per_cpu(mce_device, cpu);
560 struct amd_northbridge *nb = NULL;
561 struct threshold_bank *b = NULL;
562 char name[32];
563 int err = 0;
564
565 sprintf(name, "threshold_bank%i", bank);
566
567 if (shared_bank[bank]) {
568
569 nb = node_to_amd_nb(amd_get_nb_id(cpu));
570 WARN_ON(!nb);
571
572 /* threshold descriptor already initialized on this node? */
573 if (nb->bank4) {
574 /* yes, use it */
575 b = nb->bank4;
576 err = kobject_add(b->kobj, &dev->kobj, name);
577 if (err)
578 goto out;
579
580 per_cpu(threshold_banks, cpu)[bank] = b;
581 atomic_inc(&b->cpus);
582
583 err = __threshold_add_blocks(b);
584
585 goto out;
586 }
587 }
588
589 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
590 if (!b) {
591 err = -ENOMEM;
592 goto out;
593 }
594
595 b->kobj = kobject_create_and_add(name, &dev->kobj);
596 if (!b->kobj) {
597 err = -EINVAL;
598 goto out_free;
599 }
600
601 per_cpu(threshold_banks, cpu)[bank] = b;
602
603 if (shared_bank[bank]) {
604 atomic_set(&b->cpus, 1);
605
606 /* nb is already initialized, see above */
607 WARN_ON(nb->bank4);
608 nb->bank4 = b;
609 }
610
611 err = allocate_threshold_blocks(cpu, bank, 0,
612 MSR_IA32_MC0_MISC + bank * 4);
613 if (!err)
614 goto out;
615
616 out_free:
617 kfree(b);
618
619 out:
620 return err;
621 }
622
623 /* create dir/files for all valid threshold banks */
624 static __cpuinit int threshold_create_device(unsigned int cpu)
625 {
626 unsigned int bank;
627 int err = 0;
628
629 for (bank = 0; bank < NR_BANKS; ++bank) {
630 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
631 continue;
632 err = threshold_create_bank(cpu, bank);
633 if (err)
634 return err;
635 }
636
637 return err;
638 }
639
640 static void deallocate_threshold_block(unsigned int cpu,
641 unsigned int bank)
642 {
643 struct threshold_block *pos = NULL;
644 struct threshold_block *tmp = NULL;
645 struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
646
647 if (!head)
648 return;
649
650 list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
651 kobject_put(&pos->kobj);
652 list_del(&pos->miscj);
653 kfree(pos);
654 }
655
656 kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
657 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
658 }
659
660 static void __threshold_remove_blocks(struct threshold_bank *b)
661 {
662 struct threshold_block *pos = NULL;
663 struct threshold_block *tmp = NULL;
664
665 kobject_del(b->kobj);
666
667 list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
668 kobject_del(&pos->kobj);
669 }
670
671 static void threshold_remove_bank(unsigned int cpu, int bank)
672 {
673 struct amd_northbridge *nb;
674 struct threshold_bank *b;
675
676 b = per_cpu(threshold_banks, cpu)[bank];
677 if (!b)
678 return;
679
680 if (!b->blocks)
681 goto free_out;
682
683 if (shared_bank[bank]) {
684 if (!atomic_dec_and_test(&b->cpus)) {
685 __threshold_remove_blocks(b);
686 per_cpu(threshold_banks, cpu)[bank] = NULL;
687 return;
688 } else {
689 /*
690 * the last CPU on this node using the shared bank is
691 * going away, remove that bank now.
692 */
693 nb = node_to_amd_nb(amd_get_nb_id(cpu));
694 nb->bank4 = NULL;
695 }
696 }
697
698 deallocate_threshold_block(cpu, bank);
699
700 free_out:
701 kobject_del(b->kobj);
702 kobject_put(b->kobj);
703 kfree(b);
704 per_cpu(threshold_banks, cpu)[bank] = NULL;
705 }
706
707 static void threshold_remove_device(unsigned int cpu)
708 {
709 unsigned int bank;
710
711 for (bank = 0; bank < NR_BANKS; ++bank) {
712 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
713 continue;
714 threshold_remove_bank(cpu, bank);
715 }
716 }
717
718 /* get notified when a cpu comes on/off */
719 static void __cpuinit
720 amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
721 {
722 switch (action) {
723 case CPU_ONLINE:
724 case CPU_ONLINE_FROZEN:
725 threshold_create_device(cpu);
726 break;
727 case CPU_DEAD:
728 case CPU_DEAD_FROZEN:
729 threshold_remove_device(cpu);
730 break;
731 default:
732 break;
733 }
734 }
735
736 static __init int threshold_init_device(void)
737 {
738 unsigned lcpu = 0;
739
740 /* to hit CPUs online before the notifier is up */
741 for_each_online_cpu(lcpu) {
742 int err = threshold_create_device(lcpu);
743
744 if (err)
745 return err;
746 }
747 threshold_cpu_callback = amd_64_threshold_cpu_callback;
748
749 return 0;
750 }
751 device_initcall(threshold_init_device);
This page took 0.070334 seconds and 4 git commands to generate.