Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Intel specific MCE features. | |
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | |
88ccbedd AK |
4 | * Copyright (C) 2008, 2009 Intel Corporation |
5 | * Author: Andi Kleen | |
1da177e4 LT |
6 | */ |
7 | ||
8 | #include <linux/init.h> | |
9 | #include <linux/interrupt.h> | |
10 | #include <linux/percpu.h> | |
11 | #include <asm/processor.h> | |
7b6aa335 | 12 | #include <asm/apic.h> |
1da177e4 LT |
13 | #include <asm/msr.h> |
14 | #include <asm/mce.h> | |
15 | #include <asm/hw_irq.h> | |
95833c83 | 16 | #include <asm/idle.h> |
15d5f839 | 17 | #include <asm/therm_throt.h> |
1da177e4 LT |
18 | |
19 | asmlinkage void smp_thermal_interrupt(void) | |
20 | { | |
15d5f839 | 21 | __u64 msr_val; |
1da177e4 LT |
22 | |
23 | ack_APIC_irq(); | |
24 | ||
95833c83 | 25 | exit_idle(); |
1da177e4 | 26 | irq_enter(); |
15d5f839 DZ |
27 | |
28 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | |
29 | if (therm_throt_process(msr_val & 1)) | |
b5f2fa4e | 30 | mce_log_therm_throt_event(msr_val); |
15d5f839 | 31 | |
8ae93669 | 32 | inc_irq_stat(irq_thermal_count); |
1da177e4 LT |
33 | irq_exit(); |
34 | } | |
35 | ||
cc3ca220 | 36 | static void intel_init_thermal(struct cpuinfo_x86 *c) |
1da177e4 LT |
37 | { |
38 | u32 l, h; | |
39 | int tm2 = 0; | |
40 | unsigned int cpu = smp_processor_id(); | |
41 | ||
42 | if (!cpu_has(c, X86_FEATURE_ACPI)) | |
43 | return; | |
44 | ||
45 | if (!cpu_has(c, X86_FEATURE_ACC)) | |
46 | return; | |
47 | ||
48 | /* first check if TM1 is already enabled by the BIOS, in which | |
49 | * case there might be some SMM goo which handles it, so we can't even | |
50 | * put a handler since it might be delivered via SMI already. | |
51 | */ | |
52 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | |
53 | h = apic_read(APIC_LVTTHMR); | |
ecab22aa | 54 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
1da177e4 LT |
55 | printk(KERN_DEBUG |
56 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | |
57 | return; | |
58 | } | |
59 | ||
ecab22aa | 60 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) |
1da177e4 LT |
61 | tm2 = 1; |
62 | ||
63 | if (h & APIC_VECTOR_MASK) { | |
64 | printk(KERN_DEBUG | |
65 | "CPU%d: Thermal LVT vector (%#x) already " | |
66 | "installed\n", cpu, (h & APIC_VECTOR_MASK)); | |
67 | return; | |
68 | } | |
69 | ||
70 | h = THERMAL_APIC_VECTOR; | |
71 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); | |
11a8e778 | 72 | apic_write(APIC_LVTTHMR, h); |
1da177e4 LT |
73 | |
74 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | |
75 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); | |
76 | ||
77 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | |
ecab22aa | 78 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); |
1da177e4 LT |
79 | |
80 | l = apic_read(APIC_LVTTHMR); | |
11a8e778 | 81 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
1da177e4 LT |
82 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", |
83 | cpu, tm2 ? "TM2" : "TM1"); | |
3222b36f DZ |
84 | |
85 | /* enable thermal throttle processing */ | |
86 | atomic_set(&therm_throt_en, 1); | |
1da177e4 LT |
87 | return; |
88 | } | |
89 | ||
88ccbedd AK |
90 | /* |
91 | * Support for Intel Correct Machine Check Interrupts. This allows | |
92 | * the CPU to raise an interrupt when a corrected machine check happened. | |
93 | * Normally we pick those up using a regular polling timer. | |
94 | * Also supports reliable discovery of shared banks. | |
95 | */ | |
96 | ||
97 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |
98 | ||
99 | /* | |
100 | * cmci_discover_lock protects against parallel discovery attempts | |
101 | * which could race against each other. | |
102 | */ | |
103 | static DEFINE_SPINLOCK(cmci_discover_lock); | |
104 | ||
105 | #define CMCI_THRESHOLD 1 | |
106 | ||
df20e2eb | 107 | static int cmci_supported(int *banks) |
88ccbedd AK |
108 | { |
109 | u64 cap; | |
110 | ||
111 | /* | |
112 | * Vendor check is not strictly needed, but the initial | |
113 | * initialization is vendor keyed and this | |
114 | * makes sure none of the backdoors are entered otherwise. | |
115 | */ | |
116 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | |
117 | return 0; | |
118 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | |
119 | return 0; | |
120 | rdmsrl(MSR_IA32_MCG_CAP, cap); | |
121 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | |
122 | return !!(cap & MCG_CMCI_P); | |
123 | } | |
124 | ||
125 | /* | |
126 | * The interrupt handler. This is called on every event. | |
127 | * Just call the poller directly to log any events. | |
128 | * This could in theory increase the threshold under high load, | |
129 | * but doesn't for now. | |
130 | */ | |
131 | static void intel_threshold_interrupt(void) | |
132 | { | |
133 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | |
134 | mce_notify_user(); | |
135 | } | |
136 | ||
137 | static void print_update(char *type, int *hdr, int num) | |
138 | { | |
139 | if (*hdr == 0) | |
140 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | |
141 | *hdr = 1; | |
142 | printk(KERN_CONT " %s:%d", type, num); | |
143 | } | |
144 | ||
145 | /* | |
146 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | |
147 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | |
148 | * banks. | |
149 | */ | |
df20e2eb | 150 | static void cmci_discover(int banks, int boot) |
88ccbedd AK |
151 | { |
152 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | |
e5299926 | 153 | unsigned long flags; |
88ccbedd AK |
154 | int hdr = 0; |
155 | int i; | |
156 | ||
e5299926 | 157 | spin_lock_irqsave(&cmci_discover_lock, flags); |
88ccbedd AK |
158 | for (i = 0; i < banks; i++) { |
159 | u64 val; | |
160 | ||
161 | if (test_bit(i, owned)) | |
162 | continue; | |
163 | ||
164 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | |
165 | ||
166 | /* Already owned by someone else? */ | |
167 | if (val & CMCI_EN) { | |
168 | if (test_and_clear_bit(i, owned) || boot) | |
169 | print_update("SHD", &hdr, i); | |
170 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | |
171 | continue; | |
172 | } | |
173 | ||
174 | val |= CMCI_EN | CMCI_THRESHOLD; | |
175 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | |
176 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | |
177 | ||
178 | /* Did the enable bit stick? -- the bank supports CMCI */ | |
179 | if (val & CMCI_EN) { | |
180 | if (!test_and_set_bit(i, owned) || boot) | |
181 | print_update("CMCI", &hdr, i); | |
182 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | |
183 | } else { | |
184 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | |
185 | } | |
186 | } | |
e5299926 | 187 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
88ccbedd AK |
188 | if (hdr) |
189 | printk(KERN_CONT "\n"); | |
190 | } | |
191 | ||
192 | /* | |
193 | * Just in case we missed an event during initialization check | |
194 | * all the CMCI owned banks. | |
195 | */ | |
df20e2eb | 196 | void cmci_recheck(void) |
88ccbedd AK |
197 | { |
198 | unsigned long flags; | |
199 | int banks; | |
200 | ||
201 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | |
202 | return; | |
203 | local_irq_save(flags); | |
204 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | |
205 | local_irq_restore(flags); | |
206 | } | |
207 | ||
208 | /* | |
209 | * Disable CMCI on this CPU for all banks it owns when it goes down. | |
210 | * This allows other CPUs to claim the banks on rediscovery. | |
211 | */ | |
df20e2eb | 212 | void cmci_clear(void) |
88ccbedd | 213 | { |
e5299926 | 214 | unsigned long flags; |
88ccbedd AK |
215 | int i; |
216 | int banks; | |
217 | u64 val; | |
218 | ||
219 | if (!cmci_supported(&banks)) | |
220 | return; | |
e5299926 | 221 | spin_lock_irqsave(&cmci_discover_lock, flags); |
88ccbedd AK |
222 | for (i = 0; i < banks; i++) { |
223 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | |
224 | continue; | |
225 | /* Disable CMCI */ | |
226 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | |
227 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | |
228 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | |
229 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | |
230 | } | |
e5299926 | 231 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
88ccbedd AK |
232 | } |
233 | ||
234 | /* | |
235 | * After a CPU went down cycle through all the others and rediscover | |
236 | * Must run in process context. | |
237 | */ | |
df20e2eb | 238 | void cmci_rediscover(int dying) |
88ccbedd AK |
239 | { |
240 | int banks; | |
241 | int cpu; | |
242 | cpumask_var_t old; | |
243 | ||
244 | if (!cmci_supported(&banks)) | |
245 | return; | |
246 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | |
247 | return; | |
248 | cpumask_copy(old, ¤t->cpus_allowed); | |
249 | ||
250 | for_each_online_cpu (cpu) { | |
251 | if (cpu == dying) | |
252 | continue; | |
4f062896 | 253 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
88ccbedd AK |
254 | continue; |
255 | /* Recheck banks in case CPUs don't all have the same */ | |
256 | if (cmci_supported(&banks)) | |
257 | cmci_discover(banks, 0); | |
258 | } | |
259 | ||
260 | set_cpus_allowed_ptr(current, old); | |
261 | free_cpumask_var(old); | |
262 | } | |
263 | ||
264 | /* | |
265 | * Reenable CMCI on this CPU in case a CPU down failed. | |
266 | */ | |
267 | void cmci_reenable(void) | |
268 | { | |
269 | int banks; | |
270 | if (cmci_supported(&banks)) | |
271 | cmci_discover(banks, 0); | |
272 | } | |
273 | ||
514ec49a | 274 | static void intel_init_cmci(void) |
88ccbedd AK |
275 | { |
276 | int banks; | |
277 | ||
278 | if (!cmci_supported(&banks)) | |
279 | return; | |
280 | ||
281 | mce_threshold_vector = intel_threshold_interrupt; | |
282 | cmci_discover(banks, 1); | |
283 | /* | |
284 | * For CPU #0 this runs with still disabled APIC, but that's | |
285 | * ok because only the vector is set up. We still do another | |
286 | * check for the banks later for CPU #0 just to make sure | |
287 | * to not miss any events. | |
288 | */ | |
289 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | |
290 | cmci_recheck(); | |
291 | } | |
292 | ||
cc3ca220 | 293 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
1da177e4 LT |
294 | { |
295 | intel_init_thermal(c); | |
88ccbedd | 296 | intel_init_cmci(); |
1da177e4 | 297 | } |