Commit | Line | Data |
---|---|---|
36df96f8 MS |
1 | /* |
2 | * Machine check exception handling. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright 2013 IBM Corporation | |
19 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | |
20 | */ | |
21 | ||
22 | #undef DEBUG | |
23 | #define pr_fmt(fmt) "mce: " fmt | |
24 | ||
25 | #include <linux/types.h> | |
26 | #include <linux/ptrace.h> | |
27 | #include <linux/percpu.h> | |
28 | #include <linux/export.h> | |
30c82635 | 29 | #include <linux/irq_work.h> |
36df96f8 MS |
30 | #include <asm/mce.h> |
31 | ||
32 | static DEFINE_PER_CPU(int, mce_nest_count); | |
33 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); | |
34 | ||
b5ff4211 MS |
35 | /* Queue for delayed MCE events. */ |
36 | static DEFINE_PER_CPU(int, mce_queue_count); | |
37 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); | |
38 | ||
30c82635 MS |
39 | static void machine_check_process_queued_event(struct irq_work *work); |
40 | struct irq_work mce_event_process_work = { | |
41 | .func = machine_check_process_queued_event, | |
42 | }; | |
43 | ||
36df96f8 MS |
44 | static void mce_set_error_info(struct machine_check_event *mce, |
45 | struct mce_error_info *mce_err) | |
46 | { | |
47 | mce->error_type = mce_err->error_type; | |
48 | switch (mce_err->error_type) { | |
49 | case MCE_ERROR_TYPE_UE: | |
50 | mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; | |
51 | break; | |
52 | case MCE_ERROR_TYPE_SLB: | |
53 | mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; | |
54 | break; | |
55 | case MCE_ERROR_TYPE_ERAT: | |
56 | mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; | |
57 | break; | |
58 | case MCE_ERROR_TYPE_TLB: | |
59 | mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; | |
60 | break; | |
61 | case MCE_ERROR_TYPE_UNKNOWN: | |
62 | default: | |
63 | break; | |
64 | } | |
65 | } | |
66 | ||
67 | /* | |
68 | * Decode and save high level MCE information into per cpu buffer which | |
69 | * is an array of machine_check_event structure. | |
70 | */ | |
71 | void save_mce_event(struct pt_regs *regs, long handled, | |
72 | struct mce_error_info *mce_err, | |
55672ecf | 73 | uint64_t nip, uint64_t addr) |
36df96f8 MS |
74 | { |
75 | uint64_t srr1; | |
ffb2d78e | 76 | int index = __this_cpu_inc_return(mce_nest_count) - 1; |
69111bac | 77 | struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
78 | |
79 | /* | |
80 | * Return if we don't have enough space to log mce event. | |
81 | * mce_nest_count may go beyond MAX_MC_EVT but that's ok, | |
82 | * the check below will stop buffer overrun. | |
83 | */ | |
84 | if (index >= MAX_MC_EVT) | |
85 | return; | |
86 | ||
87 | /* Populate generic machine check info */ | |
88 | mce->version = MCE_V1; | |
55672ecf | 89 | mce->srr0 = nip; |
36df96f8 MS |
90 | mce->srr1 = regs->msr; |
91 | mce->gpr3 = regs->gpr[3]; | |
92 | mce->in_use = 1; | |
93 | ||
94 | mce->initiator = MCE_INITIATOR_CPU; | |
95 | if (handled) | |
96 | mce->disposition = MCE_DISPOSITION_RECOVERED; | |
97 | else | |
98 | mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; | |
99 | mce->severity = MCE_SEV_ERROR_SYNC; | |
100 | ||
101 | srr1 = regs->msr; | |
102 | ||
103 | /* | |
104 | * Populate the mce error_type and type-specific error_type. | |
105 | */ | |
106 | mce_set_error_info(mce, mce_err); | |
107 | ||
108 | if (!addr) | |
109 | return; | |
110 | ||
111 | if (mce->error_type == MCE_ERROR_TYPE_TLB) { | |
112 | mce->u.tlb_error.effective_address_provided = true; | |
113 | mce->u.tlb_error.effective_address = addr; | |
114 | } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { | |
115 | mce->u.slb_error.effective_address_provided = true; | |
116 | mce->u.slb_error.effective_address = addr; | |
117 | } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { | |
118 | mce->u.erat_error.effective_address_provided = true; | |
119 | mce->u.erat_error.effective_address = addr; | |
120 | } else if (mce->error_type == MCE_ERROR_TYPE_UE) { | |
121 | mce->u.ue_error.effective_address_provided = true; | |
122 | mce->u.ue_error.effective_address = addr; | |
123 | } | |
124 | return; | |
125 | } | |
126 | ||
127 | /* | |
128 | * get_mce_event: | |
129 | * mce Pointer to machine_check_event structure to be filled. | |
130 | * release Flag to indicate whether to free the event slot or not. | |
131 | * 0 <= do not release the mce event. Caller will invoke | |
132 | * release_mce_event() once event has been consumed. | |
133 | * 1 <= release the slot. | |
134 | * | |
135 | * return 1 = success | |
136 | * 0 = failure | |
137 | * | |
138 | * get_mce_event() will be called by platform specific machine check | |
139 | * handle routine and in KVM. | |
140 | * When we call get_mce_event(), we are still in interrupt context and | |
141 | * preemption will not be scheduled until ret_from_expect() routine | |
142 | * is called. | |
143 | */ | |
144 | int get_mce_event(struct machine_check_event *mce, bool release) | |
145 | { | |
69111bac | 146 | int index = __this_cpu_read(mce_nest_count) - 1; |
36df96f8 MS |
147 | struct machine_check_event *mc_evt; |
148 | int ret = 0; | |
149 | ||
150 | /* Sanity check */ | |
151 | if (index < 0) | |
152 | return ret; | |
153 | ||
154 | /* Check if we have MCE info to process. */ | |
155 | if (index < MAX_MC_EVT) { | |
69111bac | 156 | mc_evt = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
157 | /* Copy the event structure and release the original */ |
158 | if (mce) | |
159 | *mce = *mc_evt; | |
160 | if (release) | |
161 | mc_evt->in_use = 0; | |
162 | ret = 1; | |
163 | } | |
164 | /* Decrement the count to free the slot. */ | |
165 | if (release) | |
69111bac | 166 | __this_cpu_dec(mce_nest_count); |
36df96f8 MS |
167 | |
168 | return ret; | |
169 | } | |
170 | ||
171 | void release_mce_event(void) | |
172 | { | |
173 | get_mce_event(NULL, true); | |
174 | } | |
b5ff4211 MS |
175 | |
176 | /* | |
177 | * Queue up the MCE event which then can be handled later. | |
178 | */ | |
179 | void machine_check_queue_event(void) | |
180 | { | |
181 | int index; | |
182 | struct machine_check_event evt; | |
183 | ||
184 | if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) | |
185 | return; | |
186 | ||
ffb2d78e | 187 | index = __this_cpu_inc_return(mce_queue_count) - 1; |
b5ff4211 MS |
188 | /* If queue is full, just return for now. */ |
189 | if (index >= MAX_MC_EVT) { | |
69111bac | 190 | __this_cpu_dec(mce_queue_count); |
b5ff4211 MS |
191 | return; |
192 | } | |
69111bac | 193 | memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); |
30c82635 MS |
194 | |
195 | /* Queue irq work to process this event later. */ | |
196 | irq_work_queue(&mce_event_process_work); | |
b5ff4211 MS |
197 | } |
198 | ||
199 | /* | |
200 | * process pending MCE event from the mce event queue. This function will be | |
201 | * called during syscall exit. | |
202 | */ | |
30c82635 | 203 | static void machine_check_process_queued_event(struct irq_work *work) |
b5ff4211 MS |
204 | { |
205 | int index; | |
206 | ||
b5ff4211 MS |
207 | /* |
208 | * For now just print it to console. | |
209 | * TODO: log this error event to FSP or nvram. | |
210 | */ | |
69111bac CL |
211 | while (__this_cpu_read(mce_queue_count) > 0) { |
212 | index = __this_cpu_read(mce_queue_count) - 1; | |
b5ff4211 | 213 | machine_check_print_event_info( |
69111bac CL |
214 | this_cpu_ptr(&mce_event_queue[index])); |
215 | __this_cpu_dec(mce_queue_count); | |
b5ff4211 | 216 | } |
b5ff4211 MS |
217 | } |
218 | ||
219 | void machine_check_print_event_info(struct machine_check_event *evt) | |
220 | { | |
221 | const char *level, *sevstr, *subtype; | |
222 | static const char *mc_ue_types[] = { | |
223 | "Indeterminate", | |
224 | "Instruction fetch", | |
225 | "Page table walk ifetch", | |
226 | "Load/Store", | |
227 | "Page table walk Load/Store", | |
228 | }; | |
229 | static const char *mc_slb_types[] = { | |
230 | "Indeterminate", | |
231 | "Parity", | |
232 | "Multihit", | |
233 | }; | |
234 | static const char *mc_erat_types[] = { | |
235 | "Indeterminate", | |
236 | "Parity", | |
237 | "Multihit", | |
238 | }; | |
239 | static const char *mc_tlb_types[] = { | |
240 | "Indeterminate", | |
241 | "Parity", | |
242 | "Multihit", | |
243 | }; | |
244 | ||
245 | /* Print things out */ | |
246 | if (evt->version != MCE_V1) { | |
247 | pr_err("Machine Check Exception, Unknown event version %d !\n", | |
248 | evt->version); | |
249 | return; | |
250 | } | |
251 | switch (evt->severity) { | |
252 | case MCE_SEV_NO_ERROR: | |
253 | level = KERN_INFO; | |
254 | sevstr = "Harmless"; | |
255 | break; | |
256 | case MCE_SEV_WARNING: | |
257 | level = KERN_WARNING; | |
258 | sevstr = ""; | |
259 | break; | |
260 | case MCE_SEV_ERROR_SYNC: | |
261 | level = KERN_ERR; | |
262 | sevstr = "Severe"; | |
263 | break; | |
264 | case MCE_SEV_FATAL: | |
265 | default: | |
266 | level = KERN_ERR; | |
267 | sevstr = "Fatal"; | |
268 | break; | |
269 | } | |
270 | ||
271 | printk("%s%s Machine check interrupt [%s]\n", level, sevstr, | |
272 | evt->disposition == MCE_DISPOSITION_RECOVERED ? | |
273 | "Recovered" : "[Not recovered"); | |
274 | printk("%s Initiator: %s\n", level, | |
275 | evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); | |
276 | switch (evt->error_type) { | |
277 | case MCE_ERROR_TYPE_UE: | |
278 | subtype = evt->u.ue_error.ue_error_type < | |
279 | ARRAY_SIZE(mc_ue_types) ? | |
280 | mc_ue_types[evt->u.ue_error.ue_error_type] | |
281 | : "Unknown"; | |
282 | printk("%s Error type: UE [%s]\n", level, subtype); | |
283 | if (evt->u.ue_error.effective_address_provided) | |
284 | printk("%s Effective address: %016llx\n", | |
285 | level, evt->u.ue_error.effective_address); | |
286 | if (evt->u.ue_error.physical_address_provided) | |
287 | printk("%s Physial address: %016llx\n", | |
288 | level, evt->u.ue_error.physical_address); | |
289 | break; | |
290 | case MCE_ERROR_TYPE_SLB: | |
291 | subtype = evt->u.slb_error.slb_error_type < | |
292 | ARRAY_SIZE(mc_slb_types) ? | |
293 | mc_slb_types[evt->u.slb_error.slb_error_type] | |
294 | : "Unknown"; | |
295 | printk("%s Error type: SLB [%s]\n", level, subtype); | |
296 | if (evt->u.slb_error.effective_address_provided) | |
297 | printk("%s Effective address: %016llx\n", | |
298 | level, evt->u.slb_error.effective_address); | |
299 | break; | |
300 | case MCE_ERROR_TYPE_ERAT: | |
301 | subtype = evt->u.erat_error.erat_error_type < | |
302 | ARRAY_SIZE(mc_erat_types) ? | |
303 | mc_erat_types[evt->u.erat_error.erat_error_type] | |
304 | : "Unknown"; | |
305 | printk("%s Error type: ERAT [%s]\n", level, subtype); | |
306 | if (evt->u.erat_error.effective_address_provided) | |
307 | printk("%s Effective address: %016llx\n", | |
308 | level, evt->u.erat_error.effective_address); | |
309 | break; | |
310 | case MCE_ERROR_TYPE_TLB: | |
311 | subtype = evt->u.tlb_error.tlb_error_type < | |
312 | ARRAY_SIZE(mc_tlb_types) ? | |
313 | mc_tlb_types[evt->u.tlb_error.tlb_error_type] | |
314 | : "Unknown"; | |
315 | printk("%s Error type: TLB [%s]\n", level, subtype); | |
316 | if (evt->u.tlb_error.effective_address_provided) | |
317 | printk("%s Effective address: %016llx\n", | |
318 | level, evt->u.tlb_error.effective_address); | |
319 | break; | |
320 | default: | |
321 | case MCE_ERROR_TYPE_UNKNOWN: | |
322 | printk("%s Error type: Unknown\n", level); | |
323 | break; | |
324 | } | |
325 | } | |
b63a0ffe MS |
326 | |
327 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) | |
328 | { | |
329 | switch (evt->error_type) { | |
330 | case MCE_ERROR_TYPE_UE: | |
331 | if (evt->u.ue_error.effective_address_provided) | |
332 | return evt->u.ue_error.effective_address; | |
333 | break; | |
334 | case MCE_ERROR_TYPE_SLB: | |
335 | if (evt->u.slb_error.effective_address_provided) | |
336 | return evt->u.slb_error.effective_address; | |
337 | break; | |
338 | case MCE_ERROR_TYPE_ERAT: | |
339 | if (evt->u.erat_error.effective_address_provided) | |
340 | return evt->u.erat_error.effective_address; | |
341 | break; | |
342 | case MCE_ERROR_TYPE_TLB: | |
343 | if (evt->u.tlb_error.effective_address_provided) | |
344 | return evt->u.tlb_error.effective_address; | |
345 | break; | |
346 | default: | |
347 | case MCE_ERROR_TYPE_UNKNOWN: | |
348 | break; | |
349 | } | |
350 | return 0; | |
351 | } | |
352 | EXPORT_SYMBOL(get_mce_fault_addr); |