edac: move nr_pages to dimm struct
[deliverable/linux.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
da9bb1d2 28#include <linux/ctype.h>
c0d12172 29#include <linux/edac.h>
da9bb1d2
AC
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
20bcb7a8 33#include "edac_core.h"
7c9281d7 34#include "edac_module.h"
da9bb1d2 35
da9bb1d2 36/* lock to memory controller's control array */
63b7df91 37static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 38static LIST_HEAD(mc_devices);
da9bb1d2 39
da9bb1d2
AC
40#ifdef CONFIG_EDAC_DEBUG
41
a4b4be3f 42static void edac_mc_dump_channel(struct rank_info *chan)
da9bb1d2
AC
43{
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
da9bb1d2 46 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
a895bf8b
MCC
47 debugf4("\tdimm->ce_count = %d\n", chan->dimm->ce_count);
48 debugf4("\tdimm->label = '%s'\n", chan->dimm->label);
49 debugf4("\tdimm->nr_pages = 0x%x\n", chan->dimm->nr_pages);
da9bb1d2
AC
50}
51
2da1c119 52static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2
AC
53{
54 debugf4("\tcsrow = %p\n", csrow);
55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
079708b9 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
da9bb1d2
AC
57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
079708b9 59 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
da9bb1d2
AC
60 debugf4("\tcsrow->channels = %p\n", csrow->channels);
61 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
62}
63
2da1c119 64static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2
AC
65{
66 debugf3("\tmci = %p\n", mci);
67 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
68 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
69 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
70 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
71 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
72 mci->nr_csrows, mci->csrows);
37f04581 73 debugf3("\tdev = %p\n", mci->dev);
079708b9 74 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
da9bb1d2
AC
75 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
76}
77
24f9a7fe
BP
78#endif /* CONFIG_EDAC_DEBUG */
79
239642fe
BP
80/*
81 * keep those in sync with the enum mem_type
82 */
83const char *edac_mem_types[] = {
84 "Empty csrow",
85 "Reserved csrow type",
86 "Unknown csrow type",
87 "Fast page mode RAM",
88 "Extended data out RAM",
89 "Burst Extended data out RAM",
90 "Single data rate SDRAM",
91 "Registered single data rate SDRAM",
92 "Double data rate SDRAM",
93 "Registered Double data rate SDRAM",
94 "Rambus DRAM",
95 "Unbuffered DDR2 RAM",
96 "Fully buffered DDR2",
97 "Registered DDR2 RAM",
98 "Rambus XDR",
99 "Unbuffered DDR3 RAM",
100 "Registered DDR3 RAM",
101};
102EXPORT_SYMBOL_GPL(edac_mem_types);
103
da9bb1d2
AC
104/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
105 * Adjust 'ptr' so that its alignment is at least as stringent as what the
106 * compiler would provide for X and return the aligned result.
107 *
108 * If 'size' is a constant, the compiler will optimize this whole function
109 * down to either a no-op or the addition of a constant to the value of 'ptr'.
110 */
7391c6dc 111void *edac_align_ptr(void *ptr, unsigned size)
da9bb1d2
AC
112{
113 unsigned align, r;
114
115 /* Here we assume that the alignment of a "long long" is the most
116 * stringent alignment that the compiler will ever provide by default.
117 * As far as I know, this is a reasonable assumption.
118 */
119 if (size > sizeof(long))
120 align = sizeof(long long);
121 else if (size > sizeof(int))
122 align = sizeof(long);
123 else if (size > sizeof(short))
124 align = sizeof(int);
125 else if (size > sizeof(char))
126 align = sizeof(short);
127 else
079708b9 128 return (char *)ptr;
da9bb1d2
AC
129
130 r = size % align;
131
132 if (r == 0)
079708b9 133 return (char *)ptr;
da9bb1d2 134
7391c6dc 135 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
136}
137
da9bb1d2
AC
138/**
139 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
140 * @size_pvt: size of private storage needed
141 * @nr_csrows: Number of CWROWS needed for this MC
142 * @nr_chans: Number of channels for the MC
143 *
144 * Everything is kmalloc'ed as one big chunk - more efficient.
145 * Only can be used if all structures have the same lifetime - otherwise
146 * you have to allocate and initialize your own structures.
147 *
148 * Use edac_mc_free() to free mc structures allocated by this function.
149 *
150 * Returns:
151 * NULL allocation failed
152 * struct mem_ctl_info pointer
153 */
154struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
b8f6f975 155 unsigned nr_chans, int edac_index)
da9bb1d2
AC
156{
157 struct mem_ctl_info *mci;
158 struct csrow_info *csi, *csrow;
a4b4be3f 159 struct rank_info *chi, *chp, *chan;
a7d7d2e1 160 struct dimm_info *dimm;
da9bb1d2
AC
161 void *pvt;
162 unsigned size;
163 int row, chn;
8096cfaf 164 int err;
da9bb1d2
AC
165
166 /* Figure out the offsets of the various items from the start of an mc
167 * structure. We want the alignment of each item to be at least as
168 * stringent as what the compiler would provide if we could simply
169 * hardcode everything into a single struct.
170 */
079708b9 171 mci = (struct mem_ctl_info *)0;
7391c6dc
DT
172 csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
a7d7d2e1
MCC
174 dimm = edac_align_ptr(&chi[nr_chans * nr_csrows], sizeof(*dimm));
175 pvt = edac_align_ptr(&dimm[nr_chans * nr_csrows], sz_pvt);
079708b9 176 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 177
8096cfaf
DT
178 mci = kzalloc(size, GFP_KERNEL);
179 if (mci == NULL)
da9bb1d2
AC
180 return NULL;
181
182 /* Adjust pointers so they point within the memory we just allocated
183 * rather than an imaginary chunk of memory located at address 0.
184 */
079708b9 185 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
a4b4be3f 186 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
a7d7d2e1 187 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
079708b9 188 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 189
b8f6f975
DT
190 /* setup index and various internal pointers */
191 mci->mc_idx = edac_index;
da9bb1d2 192 mci->csrows = csi;
a7d7d2e1 193 mci->dimms = dimm;
da9bb1d2
AC
194 mci->pvt_info = pvt;
195 mci->nr_csrows = nr_csrows;
196
a7d7d2e1
MCC
197 /*
198 * For now, assumes that a per-csrow arrangement for dimms.
199 * This will be latter changed.
200 */
201 dimm = mci->dimms;
202
da9bb1d2
AC
203 for (row = 0; row < nr_csrows; row++) {
204 csrow = &csi[row];
205 csrow->csrow_idx = row;
206 csrow->mci = mci;
207 csrow->nr_channels = nr_chans;
208 chp = &chi[row * nr_chans];
209 csrow->channels = chp;
210
211 for (chn = 0; chn < nr_chans; chn++) {
212 chan = &chp[chn];
213 chan->chan_idx = chn;
214 chan->csrow = csrow;
a7d7d2e1
MCC
215
216 mci->csrows[row].channels[chn].dimm = dimm;
217 dimm->csrow = row;
218 dimm->csrow_channel = chn;
219 dimm++;
220 mci->nr_dimms++;
da9bb1d2
AC
221 }
222 }
223
81d87cb1 224 mci->op_state = OP_ALLOC;
6fe1108f 225 INIT_LIST_HEAD(&mci->grp_kobj_list);
81d87cb1 226
8096cfaf
DT
227 /*
228 * Initialize the 'root' kobj for the edac_mc controller
229 */
230 err = edac_mc_register_sysfs_main_kobj(mci);
231 if (err) {
232 kfree(mci);
233 return NULL;
234 }
235
236 /* at this point, the root kobj is valid, and in order to
237 * 'free' the object, then the function:
238 * edac_mc_unregister_sysfs_main_kobj() must be called
239 * which will perform kobj unregistration and the actual free
240 * will occur during the kobject callback operation
241 */
da9bb1d2
AC
242 return mci;
243}
9110540f 244EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 245
da9bb1d2 246/**
8096cfaf
DT
247 * edac_mc_free
248 * 'Free' a previously allocated 'mci' structure
da9bb1d2 249 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
250 */
251void edac_mc_free(struct mem_ctl_info *mci)
252{
bbc560ae
MCC
253 debugf1("%s()\n", __func__);
254
8096cfaf 255 edac_mc_unregister_sysfs_main_kobj(mci);
accf74ff
MCC
256
257 /* free the mci instance memory here */
258 kfree(mci);
da9bb1d2 259}
9110540f 260EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 261
bce19683 262
939747bd 263/**
bce19683
DT
264 * find_mci_by_dev
265 *
266 * scan list of controllers looking for the one that manages
267 * the 'dev' device
939747bd 268 * @dev: pointer to a struct device related with the MCI
bce19683 269 */
939747bd 270struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
271{
272 struct mem_ctl_info *mci;
273 struct list_head *item;
274
537fba28 275 debugf3("%s()\n", __func__);
da9bb1d2
AC
276
277 list_for_each(item, &mc_devices) {
278 mci = list_entry(item, struct mem_ctl_info, link);
279
37f04581 280 if (mci->dev == dev)
da9bb1d2
AC
281 return mci;
282 }
283
284 return NULL;
285}
939747bd 286EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 287
81d87cb1
DJ
288/*
289 * handler for EDAC to check if NMI type handler has asserted interrupt
290 */
291static int edac_mc_assert_error_check_and_clear(void)
292{
66ee2f94 293 int old_state;
81d87cb1 294
079708b9 295 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
296 return 1;
297
66ee2f94
DJ
298 old_state = edac_err_assert;
299 edac_err_assert = 0;
81d87cb1 300
66ee2f94 301 return old_state;
81d87cb1
DJ
302}
303
304/*
305 * edac_mc_workq_function
306 * performs the operation scheduled by a workq request
307 */
81d87cb1
DJ
308static void edac_mc_workq_function(struct work_struct *work_req)
309{
fbeb4384 310 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 311 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
312
313 mutex_lock(&mem_ctls_mutex);
314
bf52fa4a
DT
315 /* if this control struct has movd to offline state, we are done */
316 if (mci->op_state == OP_OFFLINE) {
317 mutex_unlock(&mem_ctls_mutex);
318 return;
319 }
320
81d87cb1
DJ
321 /* Only poll controllers that are running polled and have a check */
322 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
323 mci->edac_check(mci);
324
81d87cb1
DJ
325 mutex_unlock(&mem_ctls_mutex);
326
327 /* Reschedule */
4de78c68 328 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 329 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
330}
331
332/*
333 * edac_mc_workq_setup
334 * initialize a workq item for this mci
335 * passing in the new delay period in msec
bf52fa4a
DT
336 *
337 * locking model:
338 *
339 * called with the mem_ctls_mutex held
81d87cb1 340 */
bf52fa4a 341static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1
DJ
342{
343 debugf0("%s()\n", __func__);
344
bf52fa4a
DT
345 /* if this instance is not in the POLL state, then simply return */
346 if (mci->op_state != OP_RUNNING_POLL)
347 return;
348
81d87cb1 349 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
81d87cb1
DJ
350 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
351}
352
353/*
354 * edac_mc_workq_teardown
355 * stop the workq processing on this mci
bf52fa4a
DT
356 *
357 * locking model:
358 *
359 * called WITHOUT lock held
81d87cb1 360 */
bf52fa4a 361static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
362{
363 int status;
364
00740c58
BP
365 if (mci->op_state != OP_RUNNING_POLL)
366 return;
367
bce19683
DT
368 status = cancel_delayed_work(&mci->work);
369 if (status == 0) {
370 debugf0("%s() not canceled, flush the queue\n",
371 __func__);
bf52fa4a 372
bce19683
DT
373 /* workq instance might be running, wait for it */
374 flush_workqueue(edac_workqueue);
81d87cb1
DJ
375 }
376}
377
378/*
bce19683
DT
379 * edac_mc_reset_delay_period(unsigned long value)
380 *
381 * user space has updated our poll period value, need to
382 * reset our workq delays
81d87cb1 383 */
bce19683 384void edac_mc_reset_delay_period(int value)
81d87cb1 385{
bce19683
DT
386 struct mem_ctl_info *mci;
387 struct list_head *item;
388
389 mutex_lock(&mem_ctls_mutex);
390
391 /* scan the list and turn off all workq timers, doing so under lock
392 */
393 list_for_each(item, &mc_devices) {
394 mci = list_entry(item, struct mem_ctl_info, link);
395
396 if (mci->op_state == OP_RUNNING_POLL)
397 cancel_delayed_work(&mci->work);
398 }
399
400 mutex_unlock(&mem_ctls_mutex);
81d87cb1 401
bce19683
DT
402
403 /* re-walk the list, and reset the poll delay */
bf52fa4a
DT
404 mutex_lock(&mem_ctls_mutex);
405
bce19683
DT
406 list_for_each(item, &mc_devices) {
407 mci = list_entry(item, struct mem_ctl_info, link);
408
409 edac_mc_workq_setup(mci, (unsigned long) value);
410 }
81d87cb1
DJ
411
412 mutex_unlock(&mem_ctls_mutex);
413}
414
bce19683
DT
415
416
2d7bbb91
DT
417/* Return 0 on success, 1 on failure.
418 * Before calling this function, caller must
419 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
420 *
421 * locking model:
422 *
423 * called with the mem_ctls_mutex lock held
2d7bbb91 424 */
079708b9 425static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
426{
427 struct list_head *item, *insert_before;
428 struct mem_ctl_info *p;
da9bb1d2 429
2d7bbb91 430 insert_before = &mc_devices;
da9bb1d2 431
bf52fa4a
DT
432 p = find_mci_by_dev(mci->dev);
433 if (unlikely(p != NULL))
2d7bbb91 434 goto fail0;
da9bb1d2 435
2d7bbb91
DT
436 list_for_each(item, &mc_devices) {
437 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 438
2d7bbb91
DT
439 if (p->mc_idx >= mci->mc_idx) {
440 if (unlikely(p->mc_idx == mci->mc_idx))
441 goto fail1;
da9bb1d2 442
2d7bbb91
DT
443 insert_before = item;
444 break;
da9bb1d2 445 }
da9bb1d2
AC
446 }
447
448 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 449 atomic_inc(&edac_handlers);
da9bb1d2 450 return 0;
2d7bbb91 451
052dfb45 452fail0:
2d7bbb91 453 edac_printk(KERN_WARNING, EDAC_MC,
281efb17 454 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
17aa7e03 455 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
456 return 1;
457
052dfb45 458fail1:
2d7bbb91 459 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
460 "bug in low-level driver: attempt to assign\n"
461 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 462 return 1;
da9bb1d2
AC
463}
464
e7ecd891 465static void del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 466{
c0d12172 467 atomic_dec(&edac_handlers);
a1d03fcc 468 list_del_rcu(&mci->link);
e2e77098
LJ
469
470 /* these are for safe removal of devices from global list while
471 * NMI handlers may be traversing list
472 */
473 synchronize_rcu();
474 INIT_LIST_HEAD(&mci->link);
a1d03fcc
DP
475}
476
5da0831c
DT
477/**
478 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
479 *
480 * If found, return a pointer to the structure.
481 * Else return NULL.
482 *
483 * Caller must hold mem_ctls_mutex.
484 */
079708b9 485struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
486{
487 struct list_head *item;
488 struct mem_ctl_info *mci;
489
490 list_for_each(item, &mc_devices) {
491 mci = list_entry(item, struct mem_ctl_info, link);
492
493 if (mci->mc_idx >= idx) {
494 if (mci->mc_idx == idx)
495 return mci;
496
497 break;
498 }
499 }
500
501 return NULL;
502}
503EXPORT_SYMBOL(edac_mc_find);
504
da9bb1d2 505/**
472678eb
DP
506 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
507 * create sysfs entries associated with mci structure
da9bb1d2 508 * @mci: pointer to the mci structure to be added to the list
2d7bbb91 509 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
da9bb1d2
AC
510 *
511 * Return:
512 * 0 Success
513 * !0 Failure
514 */
515
516/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 517int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 518{
537fba28 519 debugf0("%s()\n", __func__);
b8f6f975 520
da9bb1d2
AC
521#ifdef CONFIG_EDAC_DEBUG
522 if (edac_debug_level >= 3)
523 edac_mc_dump_mci(mci);
e7ecd891 524
da9bb1d2
AC
525 if (edac_debug_level >= 4) {
526 int i;
527
528 for (i = 0; i < mci->nr_csrows; i++) {
529 int j;
e7ecd891 530
da9bb1d2
AC
531 edac_mc_dump_csrow(&mci->csrows[i]);
532 for (j = 0; j < mci->csrows[i].nr_channels; j++)
079708b9 533 edac_mc_dump_channel(&mci->csrows[i].
052dfb45 534 channels[j]);
da9bb1d2
AC
535 }
536 }
537#endif
63b7df91 538 mutex_lock(&mem_ctls_mutex);
da9bb1d2
AC
539
540 if (add_mc_to_global_list(mci))
028a7b6d 541 goto fail0;
da9bb1d2
AC
542
543 /* set load time so that error rate can be tracked */
544 mci->start_time = jiffies;
545
9794f33d 546 if (edac_create_sysfs_mci_device(mci)) {
547 edac_mc_printk(mci, KERN_WARNING,
052dfb45 548 "failed to create sysfs device\n");
9794f33d 549 goto fail1;
550 }
da9bb1d2 551
81d87cb1
DJ
552 /* If there IS a check routine, then we are running POLLED */
553 if (mci->edac_check != NULL) {
554 /* This instance is NOW RUNNING */
555 mci->op_state = OP_RUNNING_POLL;
556
557 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
558 } else {
559 mci->op_state = OP_RUNNING_INTERRUPT;
560 }
561
da9bb1d2 562 /* Report action taken */
bf52fa4a 563 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 564 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 565
63b7df91 566 mutex_unlock(&mem_ctls_mutex);
028a7b6d 567 return 0;
da9bb1d2 568
052dfb45 569fail1:
028a7b6d
DP
570 del_mc_from_global_list(mci);
571
052dfb45 572fail0:
63b7df91 573 mutex_unlock(&mem_ctls_mutex);
028a7b6d 574 return 1;
da9bb1d2 575}
9110540f 576EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 577
da9bb1d2 578/**
472678eb
DP
579 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
580 * remove mci structure from global list
37f04581 581 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 582 *
18dbc337 583 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 584 */
079708b9 585struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 586{
18dbc337 587 struct mem_ctl_info *mci;
da9bb1d2 588
bf52fa4a
DT
589 debugf0("%s()\n", __func__);
590
63b7df91 591 mutex_lock(&mem_ctls_mutex);
18dbc337 592
bf52fa4a
DT
593 /* find the requested mci struct in the global list */
594 mci = find_mci_by_dev(dev);
595 if (mci == NULL) {
63b7df91 596 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
597 return NULL;
598 }
599
da9bb1d2 600 del_mc_from_global_list(mci);
63b7df91 601 mutex_unlock(&mem_ctls_mutex);
bf52fa4a 602
bb31b312 603 /* flush workq processes */
bf52fa4a 604 edac_mc_workq_teardown(mci);
bb31b312
BP
605
606 /* marking MCI offline */
607 mci->op_state = OP_OFFLINE;
608
609 /* remove from sysfs */
bf52fa4a
DT
610 edac_remove_sysfs_mci_device(mci);
611
537fba28 612 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 613 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 614 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 615
18dbc337 616 return mci;
da9bb1d2 617}
9110540f 618EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 619
2da1c119
AB
620static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
621 u32 size)
da9bb1d2
AC
622{
623 struct page *pg;
624 void *virt_addr;
625 unsigned long flags = 0;
626
537fba28 627 debugf3("%s()\n", __func__);
da9bb1d2
AC
628
629 /* ECC error page was not in our memory. Ignore it. */
079708b9 630 if (!pfn_valid(page))
da9bb1d2
AC
631 return;
632
633 /* Find the actual page structure then map it and fix */
634 pg = pfn_to_page(page);
635
636 if (PageHighMem(pg))
637 local_irq_save(flags);
638
4e5df7ca 639 virt_addr = kmap_atomic(pg);
da9bb1d2
AC
640
641 /* Perform architecture specific atomic scrub operation */
642 atomic_scrub(virt_addr + offset, size);
643
644 /* Unmap and complete */
4e5df7ca 645 kunmap_atomic(virt_addr);
da9bb1d2
AC
646
647 if (PageHighMem(pg))
648 local_irq_restore(flags);
649}
650
da9bb1d2 651/* FIXME - should return -1 */
e7ecd891 652int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2
AC
653{
654 struct csrow_info *csrows = mci->csrows;
a895bf8b 655 int row, i, j, n;
da9bb1d2 656
537fba28 657 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
da9bb1d2
AC
658 row = -1;
659
660 for (i = 0; i < mci->nr_csrows; i++) {
661 struct csrow_info *csrow = &csrows[i];
a895bf8b
MCC
662 n = 0;
663 for (j = 0; j < csrow->nr_channels; j++) {
664 struct dimm_info *dimm = csrow->channels[j].dimm;
665 n += dimm->nr_pages;
666 }
667 if (n == 0)
da9bb1d2
AC
668 continue;
669
537fba28
DP
670 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
671 "mask(0x%lx)\n", mci->mc_idx, __func__,
672 csrow->first_page, page, csrow->last_page,
673 csrow->page_mask);
da9bb1d2
AC
674
675 if ((page >= csrow->first_page) &&
676 (page <= csrow->last_page) &&
677 ((page & csrow->page_mask) ==
678 (csrow->first_page & csrow->page_mask))) {
679 row = i;
680 break;
681 }
682 }
683
684 if (row == -1)
537fba28 685 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
686 "could not look up page error address %lx\n",
687 (unsigned long)page);
da9bb1d2
AC
688
689 return row;
690}
9110540f 691EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 692
da9bb1d2
AC
693/* FIXME - setable log (warning/emerg) levels */
694/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
695void edac_mc_handle_ce(struct mem_ctl_info *mci,
052dfb45
DT
696 unsigned long page_frame_number,
697 unsigned long offset_in_page, unsigned long syndrome,
698 int row, int channel, const char *msg)
da9bb1d2
AC
699{
700 unsigned long remapped_page;
a7d7d2e1 701 char *label = NULL;
084a4fcc 702 u32 grain;
da9bb1d2 703
537fba28 704 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
705
706 /* FIXME - maybe make panic on INTERNAL ERROR an option */
707 if (row >= mci->nr_csrows || row < 0) {
708 /* something is wrong */
537fba28 709 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
710 "INTERNAL ERROR: row out of range "
711 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
712 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
713 return;
714 }
e7ecd891 715
da9bb1d2
AC
716 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
717 /* something is wrong */
537fba28 718 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
719 "INTERNAL ERROR: channel out of range "
720 "(%d >= %d)\n", channel,
721 mci->csrows[row].nr_channels);
da9bb1d2
AC
722 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
723 return;
724 }
725
a7d7d2e1 726 label = mci->csrows[row].channels[channel].dimm->label;
084a4fcc 727 grain = mci->csrows[row].channels[channel].dimm->grain;
a7d7d2e1 728
4de78c68 729 if (edac_mc_get_log_ce())
da9bb1d2 730 /* FIXME - put in DIMM location */
537fba28 731 edac_mc_printk(mci, KERN_WARNING,
052dfb45
DT
732 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
733 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
734 page_frame_number, offset_in_page,
084a4fcc 735 grain, syndrome, row, channel,
a7d7d2e1 736 label, msg);
da9bb1d2
AC
737
738 mci->ce_count++;
739 mci->csrows[row].ce_count++;
084a4fcc 740 mci->csrows[row].channels[channel].dimm->ce_count++;
da9bb1d2
AC
741 mci->csrows[row].channels[channel].ce_count++;
742
743 if (mci->scrub_mode & SCRUB_SW_SRC) {
744 /*
745 * Some MC's can remap memory so that it is still available
746 * at a different address when PCI devices map into memory.
747 * MC's that can't do this lose the memory where PCI devices
25985edc 748 * are mapped. This mapping is MC dependent and so we call
da9bb1d2
AC
749 * back into the MC driver for it to map the MC page to
750 * a physical (CPU) page which can then be mapped to a virtual
751 * page - which can then be scrubbed.
752 */
753 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
754 mci->ctl_page_to_phys(mci, page_frame_number) :
755 page_frame_number;
da9bb1d2 756
084a4fcc 757 edac_mc_scrub_block(remapped_page, offset_in_page, grain);
da9bb1d2
AC
758 }
759}
9110540f 760EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
da9bb1d2 761
e7ecd891 762void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 763{
4de78c68 764 if (edac_mc_get_log_ce())
537fba28 765 edac_mc_printk(mci, KERN_WARNING,
052dfb45 766 "CE - no information available: %s\n", msg);
e7ecd891 767
da9bb1d2
AC
768 mci->ce_noinfo_count++;
769 mci->ce_count++;
770}
9110540f 771EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
da9bb1d2 772
da9bb1d2 773void edac_mc_handle_ue(struct mem_ctl_info *mci,
052dfb45
DT
774 unsigned long page_frame_number,
775 unsigned long offset_in_page, int row, const char *msg)
da9bb1d2
AC
776{
777 int len = EDAC_MC_LABEL_LEN * 4;
778 char labels[len + 1];
779 char *pos = labels;
780 int chan;
781 int chars;
a7d7d2e1 782 char *label = NULL;
084a4fcc 783 u32 grain;
da9bb1d2 784
537fba28 785 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
786
787 /* FIXME - maybe make panic on INTERNAL ERROR an option */
788 if (row >= mci->nr_csrows || row < 0) {
789 /* something is wrong */
537fba28 790 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
791 "INTERNAL ERROR: row out of range "
792 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
793 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
794 return;
795 }
796
084a4fcc 797 grain = mci->csrows[row].channels[0].dimm->grain;
a7d7d2e1
MCC
798 label = mci->csrows[row].channels[0].dimm->label;
799 chars = snprintf(pos, len + 1, "%s", label);
da9bb1d2
AC
800 len -= chars;
801 pos += chars;
e7ecd891 802
da9bb1d2 803 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
052dfb45 804 chan++) {
a7d7d2e1
MCC
805 label = mci->csrows[row].channels[chan].dimm->label;
806 chars = snprintf(pos, len + 1, ":%s", label);
da9bb1d2
AC
807 len -= chars;
808 pos += chars;
809 }
810
4de78c68 811 if (edac_mc_get_log_ue())
537fba28 812 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
813 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
814 "labels \"%s\": %s\n", page_frame_number,
084a4fcc 815 offset_in_page, grain, row, labels, msg);
da9bb1d2 816
4de78c68 817 if (edac_mc_get_panic_on_ue())
e7ecd891 818 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
052dfb45
DT
819 "row %d, labels \"%s\": %s\n", mci->mc_idx,
820 page_frame_number, offset_in_page,
084a4fcc 821 grain, row, labels, msg);
da9bb1d2
AC
822
823 mci->ue_count++;
824 mci->csrows[row].ue_count++;
825}
9110540f 826EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
da9bb1d2 827
e7ecd891 828void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 829{
4de78c68 830 if (edac_mc_get_panic_on_ue())
da9bb1d2
AC
831 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
832
4de78c68 833 if (edac_mc_get_log_ue())
537fba28 834 edac_mc_printk(mci, KERN_WARNING,
052dfb45 835 "UE - no information available: %s\n", msg);
da9bb1d2
AC
836 mci->ue_noinfo_count++;
837 mci->ue_count++;
838}
079708b9 839EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
da9bb1d2 840
9794f33d 841/*************************************************************
842 * On Fully Buffered DIMM modules, this help function is
843 * called to process UE events
844 */
845void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
052dfb45
DT
846 unsigned int csrow,
847 unsigned int channela,
848 unsigned int channelb, char *msg)
9794f33d 849{
850 int len = EDAC_MC_LABEL_LEN * 4;
851 char labels[len + 1];
852 char *pos = labels;
853 int chars;
a7d7d2e1 854 char *label;
9794f33d 855
856 if (csrow >= mci->nr_csrows) {
857 /* something is wrong */
858 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
859 "INTERNAL ERROR: row out of range (%d >= %d)\n",
860 csrow, mci->nr_csrows);
9794f33d 861 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
862 return;
863 }
864
865 if (channela >= mci->csrows[csrow].nr_channels) {
866 /* something is wrong */
867 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
868 "INTERNAL ERROR: channel-a out of range "
869 "(%d >= %d)\n",
870 channela, mci->csrows[csrow].nr_channels);
9794f33d 871 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
872 return;
873 }
874
875 if (channelb >= mci->csrows[csrow].nr_channels) {
876 /* something is wrong */
877 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
878 "INTERNAL ERROR: channel-b out of range "
879 "(%d >= %d)\n",
880 channelb, mci->csrows[csrow].nr_channels);
9794f33d 881 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
882 return;
883 }
884
885 mci->ue_count++;
886 mci->csrows[csrow].ue_count++;
887
888 /* Generate the DIMM labels from the specified channels */
a7d7d2e1
MCC
889 label = mci->csrows[csrow].channels[channela].dimm->label;
890 chars = snprintf(pos, len + 1, "%s", label);
079708b9
DT
891 len -= chars;
892 pos += chars;
084a4fcc 893
9794f33d 894 chars = snprintf(pos, len + 1, "-%s",
a7d7d2e1 895 mci->csrows[csrow].channels[channelb].dimm->label);
9794f33d 896
4de78c68 897 if (edac_mc_get_log_ue())
9794f33d 898 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
899 "UE row %d, channel-a= %d channel-b= %d "
900 "labels \"%s\": %s\n", csrow, channela, channelb,
901 labels, msg);
9794f33d 902
4de78c68 903 if (edac_mc_get_panic_on_ue())
9794f33d 904 panic("UE row %d, channel-a= %d channel-b= %d "
052dfb45
DT
905 "labels \"%s\": %s\n", csrow, channela,
906 channelb, labels, msg);
9794f33d 907}
908EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
909
910/*************************************************************
911 * On Fully Buffered DIMM modules, this help function is
912 * called to process CE events
913 */
914void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
052dfb45 915 unsigned int csrow, unsigned int channel, char *msg)
9794f33d 916{
a7d7d2e1 917 char *label = NULL;
9794f33d 918
919 /* Ensure boundary values */
920 if (csrow >= mci->nr_csrows) {
921 /* something is wrong */
922 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
923 "INTERNAL ERROR: row out of range (%d >= %d)\n",
924 csrow, mci->nr_csrows);
9794f33d 925 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
926 return;
927 }
928 if (channel >= mci->csrows[csrow].nr_channels) {
929 /* something is wrong */
930 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
931 "INTERNAL ERROR: channel out of range (%d >= %d)\n",
932 channel, mci->csrows[csrow].nr_channels);
9794f33d 933 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
934 return;
935 }
936
a7d7d2e1
MCC
937 label = mci->csrows[csrow].channels[channel].dimm->label;
938
4de78c68 939 if (edac_mc_get_log_ce())
9794f33d 940 /* FIXME - put in DIMM location */
941 edac_mc_printk(mci, KERN_WARNING,
052dfb45 942 "CE row %d, channel %d, label \"%s\": %s\n",
a7d7d2e1 943 csrow, channel, label, msg);
9794f33d 944
945 mci->ce_count++;
946 mci->csrows[csrow].ce_count++;
084a4fcc 947 mci->csrows[csrow].channels[channel].dimm->ce_count++;
9794f33d 948 mci->csrows[csrow].channels[channel].ce_count++;
949}
079708b9 950EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
This page took 0.661297 seconds and 5 git commands to generate.