| 1 | /* |
| 2 | * P4 specific Machine Check Exception Reporting |
| 3 | */ |
| 4 | |
| 5 | #include <linux/init.h> |
| 6 | #include <linux/types.h> |
| 7 | #include <linux/kernel.h> |
| 8 | #include <linux/interrupt.h> |
| 9 | #include <linux/smp.h> |
| 10 | |
| 11 | #include <asm/processor.h> |
| 12 | #include <asm/system.h> |
| 13 | #include <asm/msr.h> |
| 14 | #include <asm/apic.h> |
| 15 | |
| 16 | #include <asm/therm_throt.h> |
| 17 | |
| 18 | #include "mce.h" |
| 19 | |
| 20 | /* as supported by the P4/Xeon family */ |
| 21 | struct intel_mce_extended_msrs { |
| 22 | u32 eax; |
| 23 | u32 ebx; |
| 24 | u32 ecx; |
| 25 | u32 edx; |
| 26 | u32 esi; |
| 27 | u32 edi; |
| 28 | u32 ebp; |
| 29 | u32 esp; |
| 30 | u32 eflags; |
| 31 | u32 eip; |
| 32 | /* u32 *reserved[]; */ |
| 33 | }; |
| 34 | |
| 35 | static int mce_num_extended_msrs = 0; |
| 36 | |
| 37 | |
| 38 | #ifdef CONFIG_X86_MCE_P4THERMAL |
| 39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) |
| 40 | { |
| 41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", |
| 42 | smp_processor_id()); |
| 43 | add_taint(TAINT_MACHINE_CHECK); |
| 44 | } |
| 45 | |
| 46 | /* P4/Xeon Thermal transition interrupt handler */ |
| 47 | static void intel_thermal_interrupt(struct pt_regs *regs) |
| 48 | { |
| 49 | __u64 msr_val; |
| 50 | |
| 51 | ack_APIC_irq(); |
| 52 | |
| 53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
| 54 | therm_throt_process(msr_val & 0x1); |
| 55 | } |
| 56 | |
| 57 | /* Thermal interrupt handler for this CPU setup */ |
| 58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; |
| 59 | |
| 60 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) |
| 61 | { |
| 62 | irq_enter(); |
| 63 | vendor_thermal_interrupt(regs); |
| 64 | irq_exit(); |
| 65 | } |
| 66 | |
| 67 | /* P4/Xeon Thermal regulation detect and init */ |
| 68 | static void intel_init_thermal(struct cpuinfo_x86 *c) |
| 69 | { |
| 70 | u32 l, h; |
| 71 | unsigned int cpu = smp_processor_id(); |
| 72 | |
| 73 | /* Thermal monitoring */ |
| 74 | if (!cpu_has(c, X86_FEATURE_ACPI)) |
| 75 | return; /* -ENODEV */ |
| 76 | |
| 77 | /* Clock modulation */ |
| 78 | if (!cpu_has(c, X86_FEATURE_ACC)) |
| 79 | return; /* -ENODEV */ |
| 80 | |
| 81 | /* first check if its enabled already, in which case there might |
| 82 | * be some SMM goo which handles it, so we can't even put a handler |
| 83 | * since it might be delivered via SMI already -zwanem. |
| 84 | */ |
| 85 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); |
| 86 | h = apic_read(APIC_LVTTHMR); |
| 87 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { |
| 88 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", |
| 89 | cpu); |
| 90 | return; /* -EBUSY */ |
| 91 | } |
| 92 | |
| 93 | /* check whether a vector already exists, temporarily masked? */ |
| 94 | if (h & APIC_VECTOR_MASK) { |
| 95 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " |
| 96 | "installed\n", |
| 97 | cpu, (h & APIC_VECTOR_MASK)); |
| 98 | return; /* -EBUSY */ |
| 99 | } |
| 100 | |
| 101 | /* The temperature transition interrupt handler setup */ |
| 102 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ |
| 103 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ |
| 104 | apic_write_around(APIC_LVTTHMR, h); |
| 105 | |
| 106 | rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); |
| 107 | wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); |
| 108 | |
| 109 | /* ok we're good to go... */ |
| 110 | vendor_thermal_interrupt = intel_thermal_interrupt; |
| 111 | |
| 112 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); |
| 113 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); |
| 114 | |
| 115 | l = apic_read (APIC_LVTTHMR); |
| 116 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
| 117 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); |
| 118 | |
| 119 | /* enable thermal throttle processing */ |
| 120 | atomic_set(&therm_throt_en, 1); |
| 121 | return; |
| 122 | } |
| 123 | #endif /* CONFIG_X86_MCE_P4THERMAL */ |
| 124 | |
| 125 | |
| 126 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
| 127 | static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
| 128 | { |
| 129 | u32 h; |
| 130 | |
| 131 | if (mce_num_extended_msrs == 0) |
| 132 | goto done; |
| 133 | |
| 134 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); |
| 135 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); |
| 136 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); |
| 137 | rdmsr (MSR_IA32_MCG_EDX, r->edx, h); |
| 138 | rdmsr (MSR_IA32_MCG_ESI, r->esi, h); |
| 139 | rdmsr (MSR_IA32_MCG_EDI, r->edi, h); |
| 140 | rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); |
| 141 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); |
| 142 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); |
| 143 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); |
| 144 | |
| 145 | /* can we rely on kmalloc to do a dynamic |
| 146 | * allocation for the reserved registers? |
| 147 | */ |
| 148 | done: |
| 149 | return mce_num_extended_msrs; |
| 150 | } |
| 151 | |
| 152 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) |
| 153 | { |
| 154 | int recover=1; |
| 155 | u32 alow, ahigh, high, low; |
| 156 | u32 mcgstl, mcgsth; |
| 157 | int i; |
| 158 | struct intel_mce_extended_msrs dbg; |
| 159 | |
| 160 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| 161 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
| 162 | recover=0; |
| 163 | |
| 164 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
| 165 | smp_processor_id(), mcgsth, mcgstl); |
| 166 | |
| 167 | if (intel_get_extended_msrs(&dbg)) { |
| 168 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", |
| 169 | smp_processor_id(), dbg.eip, dbg.eflags); |
| 170 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", |
| 171 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); |
| 172 | printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", |
| 173 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); |
| 174 | } |
| 175 | |
| 176 | for (i=0; i<nr_mce_banks; i++) { |
| 177 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); |
| 178 | if (high & (1<<31)) { |
| 179 | if (high & (1<<29)) |
| 180 | recover |= 1; |
| 181 | if (high & (1<<25)) |
| 182 | recover |= 2; |
| 183 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); |
| 184 | high &= ~(1<<31); |
| 185 | if (high & (1<<27)) { |
| 186 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
| 187 | printk ("[%08x%08x]", ahigh, alow); |
| 188 | } |
| 189 | if (high & (1<<26)) { |
| 190 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
| 191 | printk (" at %08x%08x", ahigh, alow); |
| 192 | } |
| 193 | printk ("\n"); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | if (recover & 2) |
| 198 | panic ("CPU context corrupt"); |
| 199 | if (recover & 1) |
| 200 | panic ("Unable to continue"); |
| 201 | |
| 202 | printk(KERN_EMERG "Attempting to continue.\n"); |
| 203 | /* |
| 204 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
| 205 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
| 206 | * for errors if the OS could not log the error. |
| 207 | */ |
| 208 | for (i=0; i<nr_mce_banks; i++) { |
| 209 | u32 msr; |
| 210 | msr = MSR_IA32_MC0_STATUS+i*4; |
| 211 | rdmsr (msr, low, high); |
| 212 | if (high&(1<<31)) { |
| 213 | /* Clear it */ |
| 214 | wrmsr(msr, 0UL, 0UL); |
| 215 | /* Serialize */ |
| 216 | wmb(); |
| 217 | add_taint(TAINT_MACHINE_CHECK); |
| 218 | } |
| 219 | } |
| 220 | mcgstl &= ~(1<<2); |
| 221 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); |
| 222 | } |
| 223 | |
| 224 | |
| 225 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) |
| 226 | { |
| 227 | u32 l, h; |
| 228 | int i; |
| 229 | |
| 230 | machine_check_vector = intel_machine_check; |
| 231 | wmb(); |
| 232 | |
| 233 | printk (KERN_INFO "Intel machine check architecture supported.\n"); |
| 234 | rdmsr (MSR_IA32_MCG_CAP, l, h); |
| 235 | if (l & (1<<8)) /* Control register present ? */ |
| 236 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
| 237 | nr_mce_banks = l & 0xff; |
| 238 | |
| 239 | for (i=0; i<nr_mce_banks; i++) { |
| 240 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
| 241 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
| 242 | } |
| 243 | |
| 244 | set_in_cr4 (X86_CR4_MCE); |
| 245 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", |
| 246 | smp_processor_id()); |
| 247 | |
| 248 | /* Check for P4/Xeon extended MCE MSRs */ |
| 249 | rdmsr (MSR_IA32_MCG_CAP, l, h); |
| 250 | if (l & (1<<9)) {/* MCG_EXT_P */ |
| 251 | mce_num_extended_msrs = (l >> 16) & 0xff; |
| 252 | printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" |
| 253 | " available\n", |
| 254 | smp_processor_id(), mce_num_extended_msrs); |
| 255 | |
| 256 | #ifdef CONFIG_X86_MCE_P4THERMAL |
| 257 | /* Check for P4/Xeon Thermal monitor */ |
| 258 | intel_init_thermal(c); |
| 259 | #endif |
| 260 | } |
| 261 | } |