edac_core: Don't let free(mci) happen while using it
[deliverable/linux.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/sysdev.h>
29#include <linux/ctype.h>
c0d12172 30#include <linux/edac.h>
da9bb1d2
AC
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
20bcb7a8 34#include "edac_core.h"
7c9281d7 35#include "edac_module.h"
da9bb1d2 36
da9bb1d2 37/* lock to memory controller's control array */
63b7df91 38static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 39static LIST_HEAD(mc_devices);
da9bb1d2 40
da9bb1d2
AC
41#ifdef CONFIG_EDAC_DEBUG
42
2da1c119 43static void edac_mc_dump_channel(struct channel_info *chan)
da9bb1d2
AC
44{
45 debugf4("\tchannel = %p\n", chan);
46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48 debugf4("\tchannel->label = '%s'\n", chan->label);
49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50}
51
2da1c119 52static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2
AC
53{
54 debugf4("\tcsrow = %p\n", csrow);
55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
079708b9 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
da9bb1d2
AC
57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
079708b9 60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
da9bb1d2
AC
61 debugf4("\tcsrow->channels = %p\n", csrow->channels);
62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63}
64
2da1c119 65static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2
AC
66{
67 debugf3("\tmci = %p\n", mci);
68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73 mci->nr_csrows, mci->csrows);
37f04581 74 debugf3("\tdev = %p\n", mci->dev);
079708b9 75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
da9bb1d2
AC
76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77}
78
239642fe
BP
79/*
80 * keep those in sync with the enum mem_type
81 */
82const char *edac_mem_types[] = {
83 "Empty csrow",
84 "Reserved csrow type",
85 "Unknown csrow type",
86 "Fast page mode RAM",
87 "Extended data out RAM",
88 "Burst Extended data out RAM",
89 "Single data rate SDRAM",
90 "Registered single data rate SDRAM",
91 "Double data rate SDRAM",
92 "Registered Double data rate SDRAM",
93 "Rambus DRAM",
94 "Unbuffered DDR2 RAM",
95 "Fully buffered DDR2",
96 "Registered DDR2 RAM",
97 "Rambus XDR",
98 "Unbuffered DDR3 RAM",
99 "Registered DDR3 RAM",
100};
101EXPORT_SYMBOL_GPL(edac_mem_types);
102
079708b9 103#endif /* CONFIG_EDAC_DEBUG */
da9bb1d2
AC
104
105/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106 * Adjust 'ptr' so that its alignment is at least as stringent as what the
107 * compiler would provide for X and return the aligned result.
108 *
109 * If 'size' is a constant, the compiler will optimize this whole function
110 * down to either a no-op or the addition of a constant to the value of 'ptr'.
111 */
7391c6dc 112void *edac_align_ptr(void *ptr, unsigned size)
da9bb1d2
AC
113{
114 unsigned align, r;
115
116 /* Here we assume that the alignment of a "long long" is the most
117 * stringent alignment that the compiler will ever provide by default.
118 * As far as I know, this is a reasonable assumption.
119 */
120 if (size > sizeof(long))
121 align = sizeof(long long);
122 else if (size > sizeof(int))
123 align = sizeof(long);
124 else if (size > sizeof(short))
125 align = sizeof(int);
126 else if (size > sizeof(char))
127 align = sizeof(short);
128 else
079708b9 129 return (char *)ptr;
da9bb1d2
AC
130
131 r = size % align;
132
133 if (r == 0)
079708b9 134 return (char *)ptr;
da9bb1d2 135
7391c6dc 136 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
137}
138
da9bb1d2
AC
139/**
140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141 * @size_pvt: size of private storage needed
142 * @nr_csrows: Number of CWROWS needed for this MC
143 * @nr_chans: Number of channels for the MC
144 *
145 * Everything is kmalloc'ed as one big chunk - more efficient.
146 * Only can be used if all structures have the same lifetime - otherwise
147 * you have to allocate and initialize your own structures.
148 *
149 * Use edac_mc_free() to free mc structures allocated by this function.
150 *
151 * Returns:
152 * NULL allocation failed
153 * struct mem_ctl_info pointer
154 */
155struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
b8f6f975 156 unsigned nr_chans, int edac_index)
da9bb1d2
AC
157{
158 struct mem_ctl_info *mci;
159 struct csrow_info *csi, *csrow;
160 struct channel_info *chi, *chp, *chan;
161 void *pvt;
162 unsigned size;
163 int row, chn;
8096cfaf 164 int err;
da9bb1d2
AC
165
166 /* Figure out the offsets of the various items from the start of an mc
167 * structure. We want the alignment of each item to be at least as
168 * stringent as what the compiler would provide if we could simply
169 * hardcode everything into a single struct.
170 */
079708b9 171 mci = (struct mem_ctl_info *)0;
7391c6dc
DT
172 csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
e27e3dac 174 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
079708b9 175 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 176
8096cfaf
DT
177 mci = kzalloc(size, GFP_KERNEL);
178 if (mci == NULL)
da9bb1d2
AC
179 return NULL;
180
181 /* Adjust pointers so they point within the memory we just allocated
182 * rather than an imaginary chunk of memory located at address 0.
183 */
079708b9
DT
184 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 187
b8f6f975
DT
188 /* setup index and various internal pointers */
189 mci->mc_idx = edac_index;
da9bb1d2
AC
190 mci->csrows = csi;
191 mci->pvt_info = pvt;
192 mci->nr_csrows = nr_csrows;
193
194 for (row = 0; row < nr_csrows; row++) {
195 csrow = &csi[row];
196 csrow->csrow_idx = row;
197 csrow->mci = mci;
198 csrow->nr_channels = nr_chans;
199 chp = &chi[row * nr_chans];
200 csrow->channels = chp;
201
202 for (chn = 0; chn < nr_chans; chn++) {
203 chan = &chp[chn];
204 chan->chan_idx = chn;
205 chan->csrow = csrow;
206 }
207 }
208
81d87cb1 209 mci->op_state = OP_ALLOC;
6fe1108f 210 INIT_LIST_HEAD(&mci->grp_kobj_list);
81d87cb1 211
8096cfaf
DT
212 /*
213 * Initialize the 'root' kobj for the edac_mc controller
214 */
215 err = edac_mc_register_sysfs_main_kobj(mci);
216 if (err) {
217 kfree(mci);
218 return NULL;
219 }
220
221 /* at this point, the root kobj is valid, and in order to
222 * 'free' the object, then the function:
223 * edac_mc_unregister_sysfs_main_kobj() must be called
224 * which will perform kobj unregistration and the actual free
225 * will occur during the kobject callback operation
226 */
da9bb1d2
AC
227 return mci;
228}
9110540f 229EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 230
da9bb1d2 231/**
8096cfaf
DT
232 * edac_mc_free
233 * 'Free' a previously allocated 'mci' structure
da9bb1d2 234 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
235 */
236void edac_mc_free(struct mem_ctl_info *mci)
237{
8096cfaf 238 edac_mc_unregister_sysfs_main_kobj(mci);
da9bb1d2 239}
9110540f 240EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 241
bce19683 242
939747bd 243/**
bce19683
DT
244 * find_mci_by_dev
245 *
246 * scan list of controllers looking for the one that manages
247 * the 'dev' device
939747bd 248 * @dev: pointer to a struct device related with the MCI
bce19683 249 */
939747bd 250struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
251{
252 struct mem_ctl_info *mci;
253 struct list_head *item;
254
537fba28 255 debugf3("%s()\n", __func__);
da9bb1d2
AC
256
257 list_for_each(item, &mc_devices) {
258 mci = list_entry(item, struct mem_ctl_info, link);
259
37f04581 260 if (mci->dev == dev)
da9bb1d2
AC
261 return mci;
262 }
263
264 return NULL;
265}
939747bd 266EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 267
81d87cb1
DJ
268/*
269 * handler for EDAC to check if NMI type handler has asserted interrupt
270 */
271static int edac_mc_assert_error_check_and_clear(void)
272{
66ee2f94 273 int old_state;
81d87cb1 274
079708b9 275 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
276 return 1;
277
66ee2f94
DJ
278 old_state = edac_err_assert;
279 edac_err_assert = 0;
81d87cb1 280
66ee2f94 281 return old_state;
81d87cb1
DJ
282}
283
284/*
285 * edac_mc_workq_function
286 * performs the operation scheduled by a workq request
287 */
81d87cb1
DJ
288static void edac_mc_workq_function(struct work_struct *work_req)
289{
fbeb4384 290 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 291 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
292
293 mutex_lock(&mem_ctls_mutex);
294
bf52fa4a
DT
295 /* if this control struct has movd to offline state, we are done */
296 if (mci->op_state == OP_OFFLINE) {
297 mutex_unlock(&mem_ctls_mutex);
298 return;
299 }
300
81d87cb1
DJ
301 /* Only poll controllers that are running polled and have a check */
302 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
303 mci->edac_check(mci);
304
81d87cb1
DJ
305 mutex_unlock(&mem_ctls_mutex);
306
307 /* Reschedule */
4de78c68 308 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 309 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
310}
311
312/*
313 * edac_mc_workq_setup
314 * initialize a workq item for this mci
315 * passing in the new delay period in msec
bf52fa4a
DT
316 *
317 * locking model:
318 *
319 * called with the mem_ctls_mutex held
81d87cb1 320 */
bf52fa4a 321static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1
DJ
322{
323 debugf0("%s()\n", __func__);
324
bf52fa4a
DT
325 /* if this instance is not in the POLL state, then simply return */
326 if (mci->op_state != OP_RUNNING_POLL)
327 return;
328
81d87cb1 329 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
81d87cb1
DJ
330 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
331}
332
333/*
334 * edac_mc_workq_teardown
335 * stop the workq processing on this mci
bf52fa4a
DT
336 *
337 * locking model:
338 *
339 * called WITHOUT lock held
81d87cb1 340 */
bf52fa4a 341static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
342{
343 int status;
344
00740c58
BP
345 if (mci->op_state != OP_RUNNING_POLL)
346 return;
347
bce19683
DT
348 status = cancel_delayed_work(&mci->work);
349 if (status == 0) {
350 debugf0("%s() not canceled, flush the queue\n",
351 __func__);
bf52fa4a 352
bce19683
DT
353 /* workq instance might be running, wait for it */
354 flush_workqueue(edac_workqueue);
81d87cb1
DJ
355 }
356}
357
358/*
bce19683
DT
359 * edac_mc_reset_delay_period(unsigned long value)
360 *
361 * user space has updated our poll period value, need to
362 * reset our workq delays
81d87cb1 363 */
bce19683 364void edac_mc_reset_delay_period(int value)
81d87cb1 365{
bce19683
DT
366 struct mem_ctl_info *mci;
367 struct list_head *item;
368
369 mutex_lock(&mem_ctls_mutex);
370
371 /* scan the list and turn off all workq timers, doing so under lock
372 */
373 list_for_each(item, &mc_devices) {
374 mci = list_entry(item, struct mem_ctl_info, link);
375
376 if (mci->op_state == OP_RUNNING_POLL)
377 cancel_delayed_work(&mci->work);
378 }
379
380 mutex_unlock(&mem_ctls_mutex);
81d87cb1 381
bce19683
DT
382
383 /* re-walk the list, and reset the poll delay */
bf52fa4a
DT
384 mutex_lock(&mem_ctls_mutex);
385
bce19683
DT
386 list_for_each(item, &mc_devices) {
387 mci = list_entry(item, struct mem_ctl_info, link);
388
389 edac_mc_workq_setup(mci, (unsigned long) value);
390 }
81d87cb1
DJ
391
392 mutex_unlock(&mem_ctls_mutex);
393}
394
bce19683
DT
395
396
2d7bbb91
DT
397/* Return 0 on success, 1 on failure.
398 * Before calling this function, caller must
399 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
400 *
401 * locking model:
402 *
403 * called with the mem_ctls_mutex lock held
2d7bbb91 404 */
079708b9 405static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
406{
407 struct list_head *item, *insert_before;
408 struct mem_ctl_info *p;
da9bb1d2 409
2d7bbb91 410 insert_before = &mc_devices;
da9bb1d2 411
bf52fa4a
DT
412 p = find_mci_by_dev(mci->dev);
413 if (unlikely(p != NULL))
2d7bbb91 414 goto fail0;
da9bb1d2 415
2d7bbb91
DT
416 list_for_each(item, &mc_devices) {
417 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 418
2d7bbb91
DT
419 if (p->mc_idx >= mci->mc_idx) {
420 if (unlikely(p->mc_idx == mci->mc_idx))
421 goto fail1;
da9bb1d2 422
2d7bbb91
DT
423 insert_before = item;
424 break;
da9bb1d2 425 }
da9bb1d2
AC
426 }
427
428 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 429 atomic_inc(&edac_handlers);
da9bb1d2 430 return 0;
2d7bbb91 431
052dfb45 432fail0:
2d7bbb91 433 edac_printk(KERN_WARNING, EDAC_MC,
281efb17 434 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
17aa7e03 435 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
436 return 1;
437
052dfb45 438fail1:
2d7bbb91 439 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
440 "bug in low-level driver: attempt to assign\n"
441 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 442 return 1;
da9bb1d2
AC
443}
444
e7ecd891 445static void complete_mc_list_del(struct rcu_head *head)
a1d03fcc
DP
446{
447 struct mem_ctl_info *mci;
448
449 mci = container_of(head, struct mem_ctl_info, rcu);
450 INIT_LIST_HEAD(&mci->link);
a1d03fcc
DP
451}
452
e7ecd891 453static void del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 454{
c0d12172 455 atomic_dec(&edac_handlers);
a1d03fcc 456 list_del_rcu(&mci->link);
a1d03fcc 457 call_rcu(&mci->rcu, complete_mc_list_del);
458e5ff1 458 rcu_barrier();
a1d03fcc
DP
459}
460
5da0831c
DT
461/**
462 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
463 *
464 * If found, return a pointer to the structure.
465 * Else return NULL.
466 *
467 * Caller must hold mem_ctls_mutex.
468 */
079708b9 469struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
470{
471 struct list_head *item;
472 struct mem_ctl_info *mci;
473
474 list_for_each(item, &mc_devices) {
475 mci = list_entry(item, struct mem_ctl_info, link);
476
477 if (mci->mc_idx >= idx) {
478 if (mci->mc_idx == idx)
479 return mci;
480
481 break;
482 }
483 }
484
485 return NULL;
486}
487EXPORT_SYMBOL(edac_mc_find);
488
da9bb1d2 489/**
472678eb
DP
490 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
491 * create sysfs entries associated with mci structure
da9bb1d2 492 * @mci: pointer to the mci structure to be added to the list
2d7bbb91 493 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
da9bb1d2
AC
494 *
495 * Return:
496 * 0 Success
497 * !0 Failure
498 */
499
500/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 501int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 502{
537fba28 503 debugf0("%s()\n", __func__);
b8f6f975 504
da9bb1d2
AC
505#ifdef CONFIG_EDAC_DEBUG
506 if (edac_debug_level >= 3)
507 edac_mc_dump_mci(mci);
e7ecd891 508
da9bb1d2
AC
509 if (edac_debug_level >= 4) {
510 int i;
511
512 for (i = 0; i < mci->nr_csrows; i++) {
513 int j;
e7ecd891 514
da9bb1d2
AC
515 edac_mc_dump_csrow(&mci->csrows[i]);
516 for (j = 0; j < mci->csrows[i].nr_channels; j++)
079708b9 517 edac_mc_dump_channel(&mci->csrows[i].
052dfb45 518 channels[j]);
da9bb1d2
AC
519 }
520 }
521#endif
63b7df91 522 mutex_lock(&mem_ctls_mutex);
da9bb1d2
AC
523
524 if (add_mc_to_global_list(mci))
028a7b6d 525 goto fail0;
da9bb1d2
AC
526
527 /* set load time so that error rate can be tracked */
528 mci->start_time = jiffies;
529
9794f33d 530 if (edac_create_sysfs_mci_device(mci)) {
531 edac_mc_printk(mci, KERN_WARNING,
052dfb45 532 "failed to create sysfs device\n");
9794f33d 533 goto fail1;
534 }
da9bb1d2 535
81d87cb1
DJ
536 /* If there IS a check routine, then we are running POLLED */
537 if (mci->edac_check != NULL) {
538 /* This instance is NOW RUNNING */
539 mci->op_state = OP_RUNNING_POLL;
540
541 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
542 } else {
543 mci->op_state = OP_RUNNING_INTERRUPT;
544 }
545
da9bb1d2 546 /* Report action taken */
bf52fa4a 547 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 548 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 549
63b7df91 550 mutex_unlock(&mem_ctls_mutex);
028a7b6d 551 return 0;
da9bb1d2 552
052dfb45 553fail1:
028a7b6d
DP
554 del_mc_from_global_list(mci);
555
052dfb45 556fail0:
63b7df91 557 mutex_unlock(&mem_ctls_mutex);
028a7b6d 558 return 1;
da9bb1d2 559}
9110540f 560EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 561
da9bb1d2 562/**
472678eb
DP
563 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
564 * remove mci structure from global list
37f04581 565 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 566 *
18dbc337 567 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 568 */
079708b9 569struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 570{
18dbc337 571 struct mem_ctl_info *mci;
da9bb1d2 572
bf52fa4a
DT
573 debugf0("%s()\n", __func__);
574
63b7df91 575 mutex_lock(&mem_ctls_mutex);
18dbc337 576
bf52fa4a
DT
577 /* find the requested mci struct in the global list */
578 mci = find_mci_by_dev(dev);
579 if (mci == NULL) {
63b7df91 580 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
581 return NULL;
582 }
583
81d87cb1
DJ
584 /* marking MCI offline */
585 mci->op_state = OP_OFFLINE;
586
da9bb1d2 587 del_mc_from_global_list(mci);
63b7df91 588 mutex_unlock(&mem_ctls_mutex);
bf52fa4a
DT
589
590 /* flush workq processes and remove sysfs */
591 edac_mc_workq_teardown(mci);
592 edac_remove_sysfs_mci_device(mci);
593
537fba28 594 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 595 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 596 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 597
18dbc337 598 return mci;
da9bb1d2 599}
9110540f 600EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 601
2da1c119
AB
602static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
603 u32 size)
da9bb1d2
AC
604{
605 struct page *pg;
606 void *virt_addr;
607 unsigned long flags = 0;
608
537fba28 609 debugf3("%s()\n", __func__);
da9bb1d2
AC
610
611 /* ECC error page was not in our memory. Ignore it. */
079708b9 612 if (!pfn_valid(page))
da9bb1d2
AC
613 return;
614
615 /* Find the actual page structure then map it and fix */
616 pg = pfn_to_page(page);
617
618 if (PageHighMem(pg))
619 local_irq_save(flags);
620
621 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
622
623 /* Perform architecture specific atomic scrub operation */
624 atomic_scrub(virt_addr + offset, size);
625
626 /* Unmap and complete */
627 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
628
629 if (PageHighMem(pg))
630 local_irq_restore(flags);
631}
632
da9bb1d2 633/* FIXME - should return -1 */
e7ecd891 634int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2
AC
635{
636 struct csrow_info *csrows = mci->csrows;
637 int row, i;
638
537fba28 639 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
da9bb1d2
AC
640 row = -1;
641
642 for (i = 0; i < mci->nr_csrows; i++) {
643 struct csrow_info *csrow = &csrows[i];
644
645 if (csrow->nr_pages == 0)
646 continue;
647
537fba28
DP
648 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
649 "mask(0x%lx)\n", mci->mc_idx, __func__,
650 csrow->first_page, page, csrow->last_page,
651 csrow->page_mask);
da9bb1d2
AC
652
653 if ((page >= csrow->first_page) &&
654 (page <= csrow->last_page) &&
655 ((page & csrow->page_mask) ==
656 (csrow->first_page & csrow->page_mask))) {
657 row = i;
658 break;
659 }
660 }
661
662 if (row == -1)
537fba28 663 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
664 "could not look up page error address %lx\n",
665 (unsigned long)page);
da9bb1d2
AC
666
667 return row;
668}
9110540f 669EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 670
da9bb1d2
AC
671/* FIXME - setable log (warning/emerg) levels */
672/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
673void edac_mc_handle_ce(struct mem_ctl_info *mci,
052dfb45
DT
674 unsigned long page_frame_number,
675 unsigned long offset_in_page, unsigned long syndrome,
676 int row, int channel, const char *msg)
da9bb1d2
AC
677{
678 unsigned long remapped_page;
679
537fba28 680 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
681
682 /* FIXME - maybe make panic on INTERNAL ERROR an option */
683 if (row >= mci->nr_csrows || row < 0) {
684 /* something is wrong */
537fba28 685 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
686 "INTERNAL ERROR: row out of range "
687 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
688 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
689 return;
690 }
e7ecd891 691
da9bb1d2
AC
692 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
693 /* something is wrong */
537fba28 694 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
695 "INTERNAL ERROR: channel out of range "
696 "(%d >= %d)\n", channel,
697 mci->csrows[row].nr_channels);
da9bb1d2
AC
698 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
699 return;
700 }
701
4de78c68 702 if (edac_mc_get_log_ce())
da9bb1d2 703 /* FIXME - put in DIMM location */
537fba28 704 edac_mc_printk(mci, KERN_WARNING,
052dfb45
DT
705 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
706 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
707 page_frame_number, offset_in_page,
708 mci->csrows[row].grain, syndrome, row, channel,
709 mci->csrows[row].channels[channel].label, msg);
da9bb1d2
AC
710
711 mci->ce_count++;
712 mci->csrows[row].ce_count++;
713 mci->csrows[row].channels[channel].ce_count++;
714
715 if (mci->scrub_mode & SCRUB_SW_SRC) {
716 /*
717 * Some MC's can remap memory so that it is still available
718 * at a different address when PCI devices map into memory.
719 * MC's that can't do this lose the memory where PCI devices
720 * are mapped. This mapping is MC dependant and so we call
721 * back into the MC driver for it to map the MC page to
722 * a physical (CPU) page which can then be mapped to a virtual
723 * page - which can then be scrubbed.
724 */
725 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
726 mci->ctl_page_to_phys(mci, page_frame_number) :
727 page_frame_number;
da9bb1d2
AC
728
729 edac_mc_scrub_block(remapped_page, offset_in_page,
052dfb45 730 mci->csrows[row].grain);
da9bb1d2
AC
731 }
732}
9110540f 733EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
da9bb1d2 734
e7ecd891 735void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 736{
4de78c68 737 if (edac_mc_get_log_ce())
537fba28 738 edac_mc_printk(mci, KERN_WARNING,
052dfb45 739 "CE - no information available: %s\n", msg);
e7ecd891 740
da9bb1d2
AC
741 mci->ce_noinfo_count++;
742 mci->ce_count++;
743}
9110540f 744EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
da9bb1d2 745
da9bb1d2 746void edac_mc_handle_ue(struct mem_ctl_info *mci,
052dfb45
DT
747 unsigned long page_frame_number,
748 unsigned long offset_in_page, int row, const char *msg)
da9bb1d2
AC
749{
750 int len = EDAC_MC_LABEL_LEN * 4;
751 char labels[len + 1];
752 char *pos = labels;
753 int chan;
754 int chars;
755
537fba28 756 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
757
758 /* FIXME - maybe make panic on INTERNAL ERROR an option */
759 if (row >= mci->nr_csrows || row < 0) {
760 /* something is wrong */
537fba28 761 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
762 "INTERNAL ERROR: row out of range "
763 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
764 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
765 return;
766 }
767
768 chars = snprintf(pos, len + 1, "%s",
079708b9 769 mci->csrows[row].channels[0].label);
da9bb1d2
AC
770 len -= chars;
771 pos += chars;
e7ecd891 772
da9bb1d2 773 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
052dfb45 774 chan++) {
da9bb1d2 775 chars = snprintf(pos, len + 1, ":%s",
079708b9 776 mci->csrows[row].channels[chan].label);
da9bb1d2
AC
777 len -= chars;
778 pos += chars;
779 }
780
4de78c68 781 if (edac_mc_get_log_ue())
537fba28 782 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
783 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
784 "labels \"%s\": %s\n", page_frame_number,
785 offset_in_page, mci->csrows[row].grain, row,
786 labels, msg);
da9bb1d2 787
4de78c68 788 if (edac_mc_get_panic_on_ue())
e7ecd891 789 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
052dfb45
DT
790 "row %d, labels \"%s\": %s\n", mci->mc_idx,
791 page_frame_number, offset_in_page,
792 mci->csrows[row].grain, row, labels, msg);
da9bb1d2
AC
793
794 mci->ue_count++;
795 mci->csrows[row].ue_count++;
796}
9110540f 797EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
da9bb1d2 798
e7ecd891 799void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 800{
4de78c68 801 if (edac_mc_get_panic_on_ue())
da9bb1d2
AC
802 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
803
4de78c68 804 if (edac_mc_get_log_ue())
537fba28 805 edac_mc_printk(mci, KERN_WARNING,
052dfb45 806 "UE - no information available: %s\n", msg);
da9bb1d2
AC
807 mci->ue_noinfo_count++;
808 mci->ue_count++;
809}
079708b9 810EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
da9bb1d2 811
9794f33d 812/*************************************************************
813 * On Fully Buffered DIMM modules, this help function is
814 * called to process UE events
815 */
816void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
052dfb45
DT
817 unsigned int csrow,
818 unsigned int channela,
819 unsigned int channelb, char *msg)
9794f33d 820{
821 int len = EDAC_MC_LABEL_LEN * 4;
822 char labels[len + 1];
823 char *pos = labels;
824 int chars;
825
826 if (csrow >= mci->nr_csrows) {
827 /* something is wrong */
828 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
829 "INTERNAL ERROR: row out of range (%d >= %d)\n",
830 csrow, mci->nr_csrows);
9794f33d 831 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
832 return;
833 }
834
835 if (channela >= mci->csrows[csrow].nr_channels) {
836 /* something is wrong */
837 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
838 "INTERNAL ERROR: channel-a out of range "
839 "(%d >= %d)\n",
840 channela, mci->csrows[csrow].nr_channels);
9794f33d 841 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
842 return;
843 }
844
845 if (channelb >= mci->csrows[csrow].nr_channels) {
846 /* something is wrong */
847 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
848 "INTERNAL ERROR: channel-b out of range "
849 "(%d >= %d)\n",
850 channelb, mci->csrows[csrow].nr_channels);
9794f33d 851 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
852 return;
853 }
854
855 mci->ue_count++;
856 mci->csrows[csrow].ue_count++;
857
858 /* Generate the DIMM labels from the specified channels */
859 chars = snprintf(pos, len + 1, "%s",
860 mci->csrows[csrow].channels[channela].label);
079708b9
DT
861 len -= chars;
862 pos += chars;
9794f33d 863 chars = snprintf(pos, len + 1, "-%s",
864 mci->csrows[csrow].channels[channelb].label);
865
4de78c68 866 if (edac_mc_get_log_ue())
9794f33d 867 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
868 "UE row %d, channel-a= %d channel-b= %d "
869 "labels \"%s\": %s\n", csrow, channela, channelb,
870 labels, msg);
9794f33d 871
4de78c68 872 if (edac_mc_get_panic_on_ue())
9794f33d 873 panic("UE row %d, channel-a= %d channel-b= %d "
052dfb45
DT
874 "labels \"%s\": %s\n", csrow, channela,
875 channelb, labels, msg);
9794f33d 876}
877EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
878
879/*************************************************************
880 * On Fully Buffered DIMM modules, this help function is
881 * called to process CE events
882 */
883void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
052dfb45 884 unsigned int csrow, unsigned int channel, char *msg)
9794f33d 885{
886
887 /* Ensure boundary values */
888 if (csrow >= mci->nr_csrows) {
889 /* something is wrong */
890 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
891 "INTERNAL ERROR: row out of range (%d >= %d)\n",
892 csrow, mci->nr_csrows);
9794f33d 893 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
894 return;
895 }
896 if (channel >= mci->csrows[csrow].nr_channels) {
897 /* something is wrong */
898 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
899 "INTERNAL ERROR: channel out of range (%d >= %d)\n",
900 channel, mci->csrows[csrow].nr_channels);
9794f33d 901 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
902 return;
903 }
904
4de78c68 905 if (edac_mc_get_log_ce())
9794f33d 906 /* FIXME - put in DIMM location */
907 edac_mc_printk(mci, KERN_WARNING,
052dfb45
DT
908 "CE row %d, channel %d, label \"%s\": %s\n",
909 csrow, channel,
910 mci->csrows[csrow].channels[channel].label, msg);
9794f33d 911
912 mci->ce_count++;
913 mci->csrows[csrow].ce_count++;
914 mci->csrows[csrow].channels[channel].ce_count++;
915}
079708b9 916EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
This page took 0.52262 seconds and 5 git commands to generate.