]>
Commit | Line | Data |
---|---|---|
d334a491 HY |
1 | /* |
2 | * APEI Generic Hardware Error Source support | |
3 | * | |
4 | * Generic Hardware Error Source provides a way to report platform | |
5 | * hardware errors (such as that from chipset). It works in so called | |
6 | * "Firmware First" mode, that is, hardware errors are reported to | |
7 | * firmware firstly, then reported to Linux by firmware. This way, | |
8 | * some non-standard hardware error registers or non-standard hardware | |
9 | * link can be checked by firmware to produce more hardware error | |
10 | * information for Linux. | |
11 | * | |
12 | * For more information about Generic Hardware Error Source, please | |
13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
14 | * | |
15 | * Now, only SCI notification type and memory errors are | |
16 | * supported. More notification type and hardware error type will be | |
17 | * added later. | |
18 | * | |
19 | * Copyright 2010 Intel Corp. | |
20 | * Author: Huang Ying <ying.huang@intel.com> | |
21 | * | |
22 | * This program is free software; you can redistribute it and/or | |
23 | * modify it under the terms of the GNU General Public License version | |
24 | * 2 as published by the Free Software Foundation; | |
25 | * | |
26 | * This program is distributed in the hope that it will be useful, | |
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
29 | * GNU General Public License for more details. | |
30 | * | |
31 | * You should have received a copy of the GNU General Public License | |
32 | * along with this program; if not, write to the Free Software | |
33 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
34 | */ | |
35 | ||
36 | #include <linux/kernel.h> | |
37 | #include <linux/module.h> | |
38 | #include <linux/init.h> | |
39 | #include <linux/acpi.h> | |
40 | #include <linux/io.h> | |
41 | #include <linux/interrupt.h> | |
42 | #include <linux/cper.h> | |
43 | #include <linux/kdebug.h> | |
7ad6e943 HY |
44 | #include <linux/platform_device.h> |
45 | #include <linux/mutex.h> | |
d334a491 HY |
46 | #include <acpi/apei.h> |
47 | #include <acpi/atomicio.h> | |
48 | #include <acpi/hed.h> | |
49 | #include <asm/mce.h> | |
50 | ||
51 | #include "apei-internal.h" | |
52 | ||
53 | #define GHES_PFX "GHES: " | |
54 | ||
55 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
56 | ||
57 | /* | |
58 | * One struct ghes is created for each generic hardware error | |
59 | * source. | |
60 | * | |
61 | * It provides the context for APEI hardware error timer/IRQ/SCI/NMI | |
62 | * handler. Handler for one generic hardware error source is only | |
63 | * triggered after the previous one is done. So handler can uses | |
64 | * struct ghes without locking. | |
65 | * | |
66 | * estatus: memory buffer for error status block, allocated during | |
67 | * HEST parsing. | |
68 | */ | |
69 | #define GHES_TO_CLEAR 0x0001 | |
70 | ||
71 | struct ghes { | |
72 | struct acpi_hest_generic *generic; | |
73 | struct acpi_hest_generic_status *estatus; | |
74 | struct list_head list; | |
75 | u64 buffer_paddr; | |
76 | unsigned long flags; | |
77 | }; | |
78 | ||
79 | /* | |
80 | * Error source lists, one list for each notification method. The | |
81 | * members in lists are struct ghes. | |
82 | * | |
83 | * The list members are only added in HEST parsing and deleted during | |
84 | * module_exit, that is, single-threaded. So no lock is needed for | |
85 | * that. | |
86 | * | |
87 | * But the mutual exclusion is needed between members adding/deleting | |
88 | * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is | |
89 | * used for that. | |
90 | */ | |
91 | static LIST_HEAD(ghes_sci); | |
7ad6e943 | 92 | static DEFINE_MUTEX(ghes_list_mutex); |
d334a491 HY |
93 | |
94 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | |
95 | { | |
96 | struct ghes *ghes; | |
97 | unsigned int error_block_length; | |
98 | int rc; | |
99 | ||
100 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
101 | if (!ghes) | |
102 | return ERR_PTR(-ENOMEM); | |
103 | ghes->generic = generic; | |
104 | INIT_LIST_HEAD(&ghes->list); | |
105 | rc = acpi_pre_map_gar(&generic->error_status_address); | |
106 | if (rc) | |
107 | goto err_free; | |
108 | error_block_length = generic->error_block_length; | |
109 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
110 | pr_warning(FW_WARN GHES_PFX | |
111 | "Error status block length is too long: %u for " | |
112 | "generic hardware error source: %d.\n", | |
113 | error_block_length, generic->header.source_id); | |
114 | error_block_length = GHES_ESTATUS_MAX_SIZE; | |
115 | } | |
116 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
117 | if (!ghes->estatus) { | |
118 | rc = -ENOMEM; | |
119 | goto err_unmap; | |
120 | } | |
121 | ||
122 | return ghes; | |
123 | ||
124 | err_unmap: | |
125 | acpi_post_unmap_gar(&generic->error_status_address); | |
126 | err_free: | |
127 | kfree(ghes); | |
128 | return ERR_PTR(rc); | |
129 | } | |
130 | ||
131 | static void ghes_fini(struct ghes *ghes) | |
132 | { | |
133 | kfree(ghes->estatus); | |
134 | acpi_post_unmap_gar(&ghes->generic->error_status_address); | |
135 | } | |
136 | ||
137 | enum { | |
ad4ecef2 HY |
138 | GHES_SEV_NO = 0x0, |
139 | GHES_SEV_CORRECTED = 0x1, | |
140 | GHES_SEV_RECOVERABLE = 0x2, | |
141 | GHES_SEV_PANIC = 0x3, | |
d334a491 HY |
142 | }; |
143 | ||
144 | static inline int ghes_severity(int severity) | |
145 | { | |
146 | switch (severity) { | |
ad4ecef2 HY |
147 | case CPER_SEV_INFORMATIONAL: |
148 | return GHES_SEV_NO; | |
149 | case CPER_SEV_CORRECTED: | |
150 | return GHES_SEV_CORRECTED; | |
151 | case CPER_SEV_RECOVERABLE: | |
152 | return GHES_SEV_RECOVERABLE; | |
153 | case CPER_SEV_FATAL: | |
154 | return GHES_SEV_PANIC; | |
d334a491 HY |
155 | default: |
156 | /* Unkown, go panic */ | |
ad4ecef2 | 157 | return GHES_SEV_PANIC; |
d334a491 HY |
158 | } |
159 | } | |
160 | ||
161 | /* SCI handler run in work queue, so ioremap can be used here */ | |
162 | static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, | |
163 | int from_phys) | |
164 | { | |
165 | void *vaddr; | |
166 | ||
167 | vaddr = ioremap_cache(paddr, len); | |
168 | if (!vaddr) | |
169 | return -ENOMEM; | |
170 | if (from_phys) | |
171 | memcpy(buffer, vaddr, len); | |
172 | else | |
173 | memcpy(vaddr, buffer, len); | |
174 | iounmap(vaddr); | |
175 | ||
176 | return 0; | |
177 | } | |
178 | ||
179 | static int ghes_read_estatus(struct ghes *ghes, int silent) | |
180 | { | |
181 | struct acpi_hest_generic *g = ghes->generic; | |
182 | u64 buf_paddr; | |
183 | u32 len; | |
184 | int rc; | |
185 | ||
186 | rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); | |
187 | if (rc) { | |
188 | if (!silent && printk_ratelimit()) | |
189 | pr_warning(FW_WARN GHES_PFX | |
190 | "Failed to read error status block address for hardware error source: %d.\n", | |
191 | g->header.source_id); | |
192 | return -EIO; | |
193 | } | |
194 | if (!buf_paddr) | |
195 | return -ENOENT; | |
196 | ||
197 | rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, | |
198 | sizeof(*ghes->estatus), 1); | |
199 | if (rc) | |
200 | return rc; | |
201 | if (!ghes->estatus->block_status) | |
202 | return -ENOENT; | |
203 | ||
204 | ghes->buffer_paddr = buf_paddr; | |
205 | ghes->flags |= GHES_TO_CLEAR; | |
206 | ||
207 | rc = -EIO; | |
208 | len = apei_estatus_len(ghes->estatus); | |
209 | if (len < sizeof(*ghes->estatus)) | |
210 | goto err_read_block; | |
211 | if (len > ghes->generic->error_block_length) | |
212 | goto err_read_block; | |
213 | if (apei_estatus_check_header(ghes->estatus)) | |
214 | goto err_read_block; | |
215 | rc = ghes_copy_tofrom_phys(ghes->estatus + 1, | |
216 | buf_paddr + sizeof(*ghes->estatus), | |
217 | len - sizeof(*ghes->estatus), 1); | |
218 | if (rc) | |
219 | return rc; | |
220 | if (apei_estatus_check(ghes->estatus)) | |
221 | goto err_read_block; | |
222 | rc = 0; | |
223 | ||
224 | err_read_block: | |
225 | if (rc && !silent) | |
226 | pr_warning(FW_WARN GHES_PFX | |
227 | "Failed to read error status block!\n"); | |
228 | return rc; | |
229 | } | |
230 | ||
231 | static void ghes_clear_estatus(struct ghes *ghes) | |
232 | { | |
233 | ghes->estatus->block_status = 0; | |
234 | if (!(ghes->flags & GHES_TO_CLEAR)) | |
235 | return; | |
236 | ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, | |
237 | sizeof(ghes->estatus->block_status), 0); | |
238 | ghes->flags &= ~GHES_TO_CLEAR; | |
239 | } | |
240 | ||
241 | static void ghes_do_proc(struct ghes *ghes) | |
242 | { | |
ad4ecef2 | 243 | int sev, processed = 0; |
d334a491 HY |
244 | struct acpi_hest_generic_data *gdata; |
245 | ||
ad4ecef2 | 246 | sev = ghes_severity(ghes->estatus->error_severity); |
d334a491 HY |
247 | apei_estatus_for_each_section(ghes->estatus, gdata) { |
248 | #ifdef CONFIG_X86_MCE | |
249 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | |
250 | CPER_SEC_PLATFORM_MEM)) { | |
251 | apei_mce_report_mem_error( | |
ad4ecef2 | 252 | sev == GHES_SEV_CORRECTED, |
d334a491 HY |
253 | (struct cper_sec_mem_err *)(gdata+1)); |
254 | processed = 1; | |
255 | } | |
256 | #endif | |
257 | } | |
258 | ||
259 | if (!processed && printk_ratelimit()) | |
260 | pr_warning(GHES_PFX | |
261 | "Unknown error record from generic hardware error source: %d\n", | |
262 | ghes->generic->header.source_id); | |
263 | } | |
264 | ||
265 | static int ghes_proc(struct ghes *ghes) | |
266 | { | |
267 | int rc; | |
268 | ||
269 | rc = ghes_read_estatus(ghes, 0); | |
270 | if (rc) | |
271 | goto out; | |
272 | ghes_do_proc(ghes); | |
273 | ||
274 | out: | |
275 | ghes_clear_estatus(ghes); | |
276 | return 0; | |
277 | } | |
278 | ||
279 | static int ghes_notify_sci(struct notifier_block *this, | |
280 | unsigned long event, void *data) | |
281 | { | |
282 | struct ghes *ghes; | |
283 | int ret = NOTIFY_DONE; | |
284 | ||
285 | rcu_read_lock(); | |
286 | list_for_each_entry_rcu(ghes, &ghes_sci, list) { | |
287 | if (!ghes_proc(ghes)) | |
288 | ret = NOTIFY_OK; | |
289 | } | |
290 | rcu_read_unlock(); | |
291 | ||
292 | return ret; | |
293 | } | |
294 | ||
295 | static struct notifier_block ghes_notifier_sci = { | |
296 | .notifier_call = ghes_notify_sci, | |
297 | }; | |
298 | ||
7ad6e943 | 299 | static int __devinit ghes_probe(struct platform_device *ghes_dev) |
d334a491 HY |
300 | { |
301 | struct acpi_hest_generic *generic; | |
302 | struct ghes *ghes = NULL; | |
7ad6e943 | 303 | int rc = -EINVAL; |
d334a491 | 304 | |
1dd6b20e | 305 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
d334a491 | 306 | if (!generic->enabled) |
7ad6e943 | 307 | return -ENODEV; |
d334a491 HY |
308 | |
309 | if (generic->error_block_length < | |
310 | sizeof(struct acpi_hest_generic_status)) { | |
311 | pr_warning(FW_BUG GHES_PFX | |
312 | "Invalid error block length: %u for generic hardware error source: %d\n", | |
313 | generic->error_block_length, | |
314 | generic->header.source_id); | |
315 | goto err; | |
316 | } | |
317 | if (generic->records_to_preallocate == 0) { | |
318 | pr_warning(FW_BUG GHES_PFX | |
319 | "Invalid records to preallocate: %u for generic hardware error source: %d\n", | |
320 | generic->records_to_preallocate, | |
321 | generic->header.source_id); | |
322 | goto err; | |
323 | } | |
324 | ghes = ghes_new(generic); | |
325 | if (IS_ERR(ghes)) { | |
326 | rc = PTR_ERR(ghes); | |
327 | ghes = NULL; | |
328 | goto err; | |
329 | } | |
7ad6e943 HY |
330 | if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) { |
331 | mutex_lock(&ghes_list_mutex); | |
d334a491 HY |
332 | if (list_empty(&ghes_sci)) |
333 | register_acpi_hed_notifier(&ghes_notifier_sci); | |
334 | list_add_rcu(&ghes->list, &ghes_sci); | |
7ad6e943 HY |
335 | mutex_unlock(&ghes_list_mutex); |
336 | } else { | |
337 | unsigned char *notify = NULL; | |
338 | ||
339 | switch (generic->notify.type) { | |
340 | case ACPI_HEST_NOTIFY_POLLED: | |
341 | notify = "POLL"; | |
342 | break; | |
343 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
344 | case ACPI_HEST_NOTIFY_LOCAL: | |
345 | notify = "IRQ"; | |
346 | break; | |
347 | case ACPI_HEST_NOTIFY_NMI: | |
348 | notify = "NMI"; | |
349 | break; | |
350 | } | |
351 | if (notify) { | |
352 | pr_warning(GHES_PFX | |
353 | "Generic hardware error source: %d notified via %s is not supported!\n", | |
354 | generic->header.source_id, notify); | |
355 | } else { | |
356 | pr_warning(FW_WARN GHES_PFX | |
357 | "Unknown notification type: %u for generic hardware error source: %d\n", | |
358 | generic->notify.type, generic->header.source_id); | |
359 | } | |
360 | rc = -ENODEV; | |
361 | goto err; | |
d334a491 | 362 | } |
7ad6e943 | 363 | platform_set_drvdata(ghes_dev, ghes); |
d334a491 HY |
364 | |
365 | return 0; | |
366 | err: | |
7ad6e943 | 367 | if (ghes) { |
d334a491 | 368 | ghes_fini(ghes); |
7ad6e943 HY |
369 | kfree(ghes); |
370 | } | |
d334a491 HY |
371 | return rc; |
372 | } | |
373 | ||
7ad6e943 | 374 | static int __devexit ghes_remove(struct platform_device *ghes_dev) |
d334a491 | 375 | { |
7ad6e943 HY |
376 | struct ghes *ghes; |
377 | struct acpi_hest_generic *generic; | |
d334a491 | 378 | |
7ad6e943 HY |
379 | ghes = platform_get_drvdata(ghes_dev); |
380 | generic = ghes->generic; | |
381 | ||
382 | switch (generic->notify.type) { | |
383 | case ACPI_HEST_NOTIFY_SCI: | |
384 | mutex_lock(&ghes_list_mutex); | |
385 | list_del_rcu(&ghes->list); | |
386 | if (list_empty(&ghes_sci)) | |
387 | unregister_acpi_hed_notifier(&ghes_notifier_sci); | |
388 | mutex_unlock(&ghes_list_mutex); | |
389 | break; | |
390 | default: | |
391 | BUG(); | |
392 | break; | |
393 | } | |
d334a491 HY |
394 | |
395 | synchronize_rcu(); | |
7ad6e943 HY |
396 | ghes_fini(ghes); |
397 | kfree(ghes); | |
d334a491 | 398 | |
7ad6e943 HY |
399 | platform_set_drvdata(ghes_dev, NULL); |
400 | ||
401 | return 0; | |
d334a491 HY |
402 | } |
403 | ||
7ad6e943 HY |
404 | static struct platform_driver ghes_platform_driver = { |
405 | .driver = { | |
406 | .name = "GHES", | |
407 | .owner = THIS_MODULE, | |
408 | }, | |
409 | .probe = ghes_probe, | |
410 | .remove = ghes_remove, | |
411 | }; | |
412 | ||
d334a491 HY |
413 | static int __init ghes_init(void) |
414 | { | |
d334a491 HY |
415 | if (acpi_disabled) |
416 | return -ENODEV; | |
417 | ||
418 | if (hest_disable) { | |
419 | pr_info(GHES_PFX "HEST is not enabled!\n"); | |
420 | return -EINVAL; | |
421 | } | |
422 | ||
7ad6e943 | 423 | return platform_driver_register(&ghes_platform_driver); |
d334a491 HY |
424 | } |
425 | ||
426 | static void __exit ghes_exit(void) | |
427 | { | |
7ad6e943 | 428 | platform_driver_unregister(&ghes_platform_driver); |
d334a491 HY |
429 | } |
430 | ||
431 | module_init(ghes_init); | |
432 | module_exit(ghes_exit); | |
433 | ||
434 | MODULE_AUTHOR("Huang Ying"); | |
435 | MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); | |
436 | MODULE_LICENSE("GPL"); | |
7ad6e943 | 437 | MODULE_ALIAS("platform:GHES"); |