Commit | Line | Data |
---|---|---|
d334a491 HY |
1 | /* |
2 | * APEI Generic Hardware Error Source support | |
3 | * | |
4 | * Generic Hardware Error Source provides a way to report platform | |
5 | * hardware errors (such as that from chipset). It works in so called | |
6 | * "Firmware First" mode, that is, hardware errors are reported to | |
7 | * firmware firstly, then reported to Linux by firmware. This way, | |
8 | * some non-standard hardware error registers or non-standard hardware | |
9 | * link can be checked by firmware to produce more hardware error | |
10 | * information for Linux. | |
11 | * | |
12 | * For more information about Generic Hardware Error Source, please | |
13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
14 | * | |
15 | * Now, only SCI notification type and memory errors are | |
16 | * supported. More notification type and hardware error type will be | |
17 | * added later. | |
18 | * | |
19 | * Copyright 2010 Intel Corp. | |
20 | * Author: Huang Ying <ying.huang@intel.com> | |
21 | * | |
22 | * This program is free software; you can redistribute it and/or | |
23 | * modify it under the terms of the GNU General Public License version | |
24 | * 2 as published by the Free Software Foundation; | |
25 | * | |
26 | * This program is distributed in the hope that it will be useful, | |
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
29 | * GNU General Public License for more details. | |
30 | * | |
31 | * You should have received a copy of the GNU General Public License | |
32 | * along with this program; if not, write to the Free Software | |
33 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
34 | */ | |
35 | ||
36 | #include <linux/kernel.h> | |
37 | #include <linux/module.h> | |
38 | #include <linux/init.h> | |
39 | #include <linux/acpi.h> | |
40 | #include <linux/io.h> | |
41 | #include <linux/interrupt.h> | |
42 | #include <linux/cper.h> | |
43 | #include <linux/kdebug.h> | |
44 | #include <acpi/apei.h> | |
45 | #include <acpi/atomicio.h> | |
46 | #include <acpi/hed.h> | |
47 | #include <asm/mce.h> | |
48 | ||
49 | #include "apei-internal.h" | |
50 | ||
51 | #define GHES_PFX "GHES: " | |
52 | ||
53 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
54 | ||
55 | /* | |
56 | * One struct ghes is created for each generic hardware error | |
57 | * source. | |
58 | * | |
59 | * It provides the context for APEI hardware error timer/IRQ/SCI/NMI | |
60 | * handler. Handler for one generic hardware error source is only | |
61 | * triggered after the previous one is done. So handler can uses | |
62 | * struct ghes without locking. | |
63 | * | |
64 | * estatus: memory buffer for error status block, allocated during | |
65 | * HEST parsing. | |
66 | */ | |
67 | #define GHES_TO_CLEAR 0x0001 | |
68 | ||
69 | struct ghes { | |
70 | struct acpi_hest_generic *generic; | |
71 | struct acpi_hest_generic_status *estatus; | |
72 | struct list_head list; | |
73 | u64 buffer_paddr; | |
74 | unsigned long flags; | |
75 | }; | |
76 | ||
77 | /* | |
78 | * Error source lists, one list for each notification method. The | |
79 | * members in lists are struct ghes. | |
80 | * | |
81 | * The list members are only added in HEST parsing and deleted during | |
82 | * module_exit, that is, single-threaded. So no lock is needed for | |
83 | * that. | |
84 | * | |
85 | * But the mutual exclusion is needed between members adding/deleting | |
86 | * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is | |
87 | * used for that. | |
88 | */ | |
89 | static LIST_HEAD(ghes_sci); | |
90 | ||
91 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | |
92 | { | |
93 | struct ghes *ghes; | |
94 | unsigned int error_block_length; | |
95 | int rc; | |
96 | ||
97 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
98 | if (!ghes) | |
99 | return ERR_PTR(-ENOMEM); | |
100 | ghes->generic = generic; | |
101 | INIT_LIST_HEAD(&ghes->list); | |
102 | rc = acpi_pre_map_gar(&generic->error_status_address); | |
103 | if (rc) | |
104 | goto err_free; | |
105 | error_block_length = generic->error_block_length; | |
106 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
107 | pr_warning(FW_WARN GHES_PFX | |
108 | "Error status block length is too long: %u for " | |
109 | "generic hardware error source: %d.\n", | |
110 | error_block_length, generic->header.source_id); | |
111 | error_block_length = GHES_ESTATUS_MAX_SIZE; | |
112 | } | |
113 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
114 | if (!ghes->estatus) { | |
115 | rc = -ENOMEM; | |
116 | goto err_unmap; | |
117 | } | |
118 | ||
119 | return ghes; | |
120 | ||
121 | err_unmap: | |
122 | acpi_post_unmap_gar(&generic->error_status_address); | |
123 | err_free: | |
124 | kfree(ghes); | |
125 | return ERR_PTR(rc); | |
126 | } | |
127 | ||
128 | static void ghes_fini(struct ghes *ghes) | |
129 | { | |
130 | kfree(ghes->estatus); | |
131 | acpi_post_unmap_gar(&ghes->generic->error_status_address); | |
132 | } | |
133 | ||
134 | enum { | |
ad4ecef2 HY |
135 | GHES_SEV_NO = 0x0, |
136 | GHES_SEV_CORRECTED = 0x1, | |
137 | GHES_SEV_RECOVERABLE = 0x2, | |
138 | GHES_SEV_PANIC = 0x3, | |
d334a491 HY |
139 | }; |
140 | ||
141 | static inline int ghes_severity(int severity) | |
142 | { | |
143 | switch (severity) { | |
ad4ecef2 HY |
144 | case CPER_SEV_INFORMATIONAL: |
145 | return GHES_SEV_NO; | |
146 | case CPER_SEV_CORRECTED: | |
147 | return GHES_SEV_CORRECTED; | |
148 | case CPER_SEV_RECOVERABLE: | |
149 | return GHES_SEV_RECOVERABLE; | |
150 | case CPER_SEV_FATAL: | |
151 | return GHES_SEV_PANIC; | |
d334a491 HY |
152 | default: |
153 | /* Unkown, go panic */ | |
ad4ecef2 | 154 | return GHES_SEV_PANIC; |
d334a491 HY |
155 | } |
156 | } | |
157 | ||
158 | /* SCI handler run in work queue, so ioremap can be used here */ | |
159 | static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, | |
160 | int from_phys) | |
161 | { | |
162 | void *vaddr; | |
163 | ||
164 | vaddr = ioremap_cache(paddr, len); | |
165 | if (!vaddr) | |
166 | return -ENOMEM; | |
167 | if (from_phys) | |
168 | memcpy(buffer, vaddr, len); | |
169 | else | |
170 | memcpy(vaddr, buffer, len); | |
171 | iounmap(vaddr); | |
172 | ||
173 | return 0; | |
174 | } | |
175 | ||
176 | static int ghes_read_estatus(struct ghes *ghes, int silent) | |
177 | { | |
178 | struct acpi_hest_generic *g = ghes->generic; | |
179 | u64 buf_paddr; | |
180 | u32 len; | |
181 | int rc; | |
182 | ||
183 | rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); | |
184 | if (rc) { | |
185 | if (!silent && printk_ratelimit()) | |
186 | pr_warning(FW_WARN GHES_PFX | |
187 | "Failed to read error status block address for hardware error source: %d.\n", | |
188 | g->header.source_id); | |
189 | return -EIO; | |
190 | } | |
191 | if (!buf_paddr) | |
192 | return -ENOENT; | |
193 | ||
194 | rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, | |
195 | sizeof(*ghes->estatus), 1); | |
196 | if (rc) | |
197 | return rc; | |
198 | if (!ghes->estatus->block_status) | |
199 | return -ENOENT; | |
200 | ||
201 | ghes->buffer_paddr = buf_paddr; | |
202 | ghes->flags |= GHES_TO_CLEAR; | |
203 | ||
204 | rc = -EIO; | |
205 | len = apei_estatus_len(ghes->estatus); | |
206 | if (len < sizeof(*ghes->estatus)) | |
207 | goto err_read_block; | |
208 | if (len > ghes->generic->error_block_length) | |
209 | goto err_read_block; | |
210 | if (apei_estatus_check_header(ghes->estatus)) | |
211 | goto err_read_block; | |
212 | rc = ghes_copy_tofrom_phys(ghes->estatus + 1, | |
213 | buf_paddr + sizeof(*ghes->estatus), | |
214 | len - sizeof(*ghes->estatus), 1); | |
215 | if (rc) | |
216 | return rc; | |
217 | if (apei_estatus_check(ghes->estatus)) | |
218 | goto err_read_block; | |
219 | rc = 0; | |
220 | ||
221 | err_read_block: | |
222 | if (rc && !silent) | |
223 | pr_warning(FW_WARN GHES_PFX | |
224 | "Failed to read error status block!\n"); | |
225 | return rc; | |
226 | } | |
227 | ||
228 | static void ghes_clear_estatus(struct ghes *ghes) | |
229 | { | |
230 | ghes->estatus->block_status = 0; | |
231 | if (!(ghes->flags & GHES_TO_CLEAR)) | |
232 | return; | |
233 | ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, | |
234 | sizeof(ghes->estatus->block_status), 0); | |
235 | ghes->flags &= ~GHES_TO_CLEAR; | |
236 | } | |
237 | ||
238 | static void ghes_do_proc(struct ghes *ghes) | |
239 | { | |
ad4ecef2 | 240 | int sev, processed = 0; |
d334a491 HY |
241 | struct acpi_hest_generic_data *gdata; |
242 | ||
ad4ecef2 | 243 | sev = ghes_severity(ghes->estatus->error_severity); |
d334a491 HY |
244 | apei_estatus_for_each_section(ghes->estatus, gdata) { |
245 | #ifdef CONFIG_X86_MCE | |
246 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | |
247 | CPER_SEC_PLATFORM_MEM)) { | |
248 | apei_mce_report_mem_error( | |
ad4ecef2 | 249 | sev == GHES_SEV_CORRECTED, |
d334a491 HY |
250 | (struct cper_sec_mem_err *)(gdata+1)); |
251 | processed = 1; | |
252 | } | |
253 | #endif | |
254 | } | |
255 | ||
256 | if (!processed && printk_ratelimit()) | |
257 | pr_warning(GHES_PFX | |
258 | "Unknown error record from generic hardware error source: %d\n", | |
259 | ghes->generic->header.source_id); | |
260 | } | |
261 | ||
262 | static int ghes_proc(struct ghes *ghes) | |
263 | { | |
264 | int rc; | |
265 | ||
266 | rc = ghes_read_estatus(ghes, 0); | |
267 | if (rc) | |
268 | goto out; | |
269 | ghes_do_proc(ghes); | |
270 | ||
271 | out: | |
272 | ghes_clear_estatus(ghes); | |
273 | return 0; | |
274 | } | |
275 | ||
276 | static int ghes_notify_sci(struct notifier_block *this, | |
277 | unsigned long event, void *data) | |
278 | { | |
279 | struct ghes *ghes; | |
280 | int ret = NOTIFY_DONE; | |
281 | ||
282 | rcu_read_lock(); | |
283 | list_for_each_entry_rcu(ghes, &ghes_sci, list) { | |
284 | if (!ghes_proc(ghes)) | |
285 | ret = NOTIFY_OK; | |
286 | } | |
287 | rcu_read_unlock(); | |
288 | ||
289 | return ret; | |
290 | } | |
291 | ||
292 | static struct notifier_block ghes_notifier_sci = { | |
293 | .notifier_call = ghes_notify_sci, | |
294 | }; | |
295 | ||
296 | static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data) | |
297 | { | |
298 | struct acpi_hest_generic *generic; | |
299 | struct ghes *ghes = NULL; | |
300 | int rc = 0; | |
301 | ||
302 | if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) | |
303 | return 0; | |
304 | ||
305 | generic = (struct acpi_hest_generic *)hest_hdr; | |
306 | if (!generic->enabled) | |
307 | return 0; | |
308 | ||
309 | if (generic->error_block_length < | |
310 | sizeof(struct acpi_hest_generic_status)) { | |
311 | pr_warning(FW_BUG GHES_PFX | |
312 | "Invalid error block length: %u for generic hardware error source: %d\n", | |
313 | generic->error_block_length, | |
314 | generic->header.source_id); | |
315 | goto err; | |
316 | } | |
317 | if (generic->records_to_preallocate == 0) { | |
318 | pr_warning(FW_BUG GHES_PFX | |
319 | "Invalid records to preallocate: %u for generic hardware error source: %d\n", | |
320 | generic->records_to_preallocate, | |
321 | generic->header.source_id); | |
322 | goto err; | |
323 | } | |
324 | ghes = ghes_new(generic); | |
325 | if (IS_ERR(ghes)) { | |
326 | rc = PTR_ERR(ghes); | |
327 | ghes = NULL; | |
328 | goto err; | |
329 | } | |
330 | switch (generic->notify.type) { | |
331 | case ACPI_HEST_NOTIFY_POLLED: | |
332 | pr_warning(GHES_PFX | |
333 | "Generic hardware error source: %d notified via POLL is not supported!\n", | |
334 | generic->header.source_id); | |
335 | break; | |
336 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
337 | case ACPI_HEST_NOTIFY_LOCAL: | |
338 | pr_warning(GHES_PFX | |
339 | "Generic hardware error source: %d notified via IRQ is not supported!\n", | |
340 | generic->header.source_id); | |
341 | break; | |
342 | case ACPI_HEST_NOTIFY_SCI: | |
343 | if (list_empty(&ghes_sci)) | |
344 | register_acpi_hed_notifier(&ghes_notifier_sci); | |
345 | list_add_rcu(&ghes->list, &ghes_sci); | |
346 | break; | |
347 | case ACPI_HEST_NOTIFY_NMI: | |
348 | pr_warning(GHES_PFX | |
349 | "Generic hardware error source: %d notified via NMI is not supported!\n", | |
350 | generic->header.source_id); | |
351 | break; | |
352 | default: | |
353 | pr_warning(FW_WARN GHES_PFX | |
354 | "Unknown notification type: %u for generic hardware error source: %d\n", | |
355 | generic->notify.type, generic->header.source_id); | |
356 | break; | |
357 | } | |
358 | ||
359 | return 0; | |
360 | err: | |
361 | if (ghes) | |
362 | ghes_fini(ghes); | |
363 | return rc; | |
364 | } | |
365 | ||
366 | static void ghes_cleanup(void) | |
367 | { | |
368 | struct ghes *ghes, *nghes; | |
369 | ||
370 | if (!list_empty(&ghes_sci)) | |
371 | unregister_acpi_hed_notifier(&ghes_notifier_sci); | |
372 | ||
373 | synchronize_rcu(); | |
374 | ||
375 | list_for_each_entry_safe(ghes, nghes, &ghes_sci, list) { | |
376 | list_del(&ghes->list); | |
377 | ghes_fini(ghes); | |
378 | kfree(ghes); | |
379 | } | |
380 | } | |
381 | ||
382 | static int __init ghes_init(void) | |
383 | { | |
384 | int rc; | |
385 | ||
386 | if (acpi_disabled) | |
387 | return -ENODEV; | |
388 | ||
389 | if (hest_disable) { | |
390 | pr_info(GHES_PFX "HEST is not enabled!\n"); | |
391 | return -EINVAL; | |
392 | } | |
393 | ||
394 | rc = apei_hest_parse(hest_ghes_parse, NULL); | |
395 | if (rc) { | |
396 | pr_err(GHES_PFX | |
397 | "Error during parsing HEST generic hardware error sources.\n"); | |
398 | goto err_cleanup; | |
399 | } | |
400 | ||
401 | if (list_empty(&ghes_sci)) { | |
402 | pr_info(GHES_PFX | |
403 | "No functional generic hardware error sources.\n"); | |
404 | rc = -ENODEV; | |
405 | goto err_cleanup; | |
406 | } | |
407 | ||
408 | pr_info(GHES_PFX | |
409 | "Generic Hardware Error Source support is initialized.\n"); | |
410 | ||
411 | return 0; | |
412 | err_cleanup: | |
413 | ghes_cleanup(); | |
414 | return rc; | |
415 | } | |
416 | ||
417 | static void __exit ghes_exit(void) | |
418 | { | |
419 | ghes_cleanup(); | |
420 | } | |
421 | ||
422 | module_init(ghes_init); | |
423 | module_exit(ghes_exit); | |
424 | ||
425 | MODULE_AUTHOR("Huang Ying"); | |
426 | MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); | |
427 | MODULE_LICENSE("GPL"); |