edac: i7core_edac produces undefined behaviour on 32bit
[deliverable/linux.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
d5381642 30#include <linux/edac_mce.h>
f4742949 31#include <linux/smp.h>
14d2c083 32#include <asm/processor.h>
a0c36a1f
MCC
33
34#include "edac_core.h"
35
f4742949
MCC
36/*
37 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
38 * registers start at bus 255, and are not reported by BIOS.
39 * We currently find devices with only 2 sockets. In order to support more QPI
40 * Quick Path Interconnect, just increment this number.
41 */
42#define MAX_SOCKET_BUSES 2
43
44
a0c36a1f
MCC
45/*
46 * Alter this version for the module when modifications are made
47 */
48#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
49#define EDAC_MOD_STR "i7core_edac"
50
a0c36a1f
MCC
51/*
52 * Debug macros
53 */
54#define i7core_printk(level, fmt, arg...) \
55 edac_printk(level, "i7core", fmt, ##arg)
56
57#define i7core_mc_printk(mci, level, fmt, arg...) \
58 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
59
60/*
61 * i7core Memory Controller Registers
62 */
63
e9bd2e73
MCC
64 /* OFFSETS for Device 0 Function 0 */
65
66#define MC_CFG_CONTROL 0x90
67
a0c36a1f
MCC
68 /* OFFSETS for Device 3 Function 0 */
69
70#define MC_CONTROL 0x48
71#define MC_STATUS 0x4c
72#define MC_MAX_DOD 0x64
73
442305b1
MCC
74/*
75 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
76 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
77 */
78
79#define MC_TEST_ERR_RCV1 0x60
80 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
81
82#define MC_TEST_ERR_RCV0 0x64
83 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
84 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
85
b4e8f0b6
MCC
86/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
87#define MC_COR_ECC_CNT_0 0x80
88#define MC_COR_ECC_CNT_1 0x84
89#define MC_COR_ECC_CNT_2 0x88
90#define MC_COR_ECC_CNT_3 0x8c
91#define MC_COR_ECC_CNT_4 0x90
92#define MC_COR_ECC_CNT_5 0x94
93
94#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
95#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
96
97
a0c36a1f
MCC
98 /* OFFSETS for Devices 4,5 and 6 Function 0 */
99
0b2b7b7e
MCC
100#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
101 #define THREE_DIMMS_PRESENT (1 << 24)
102 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
103 #define QUAD_RANK_PRESENT (1 << 22)
104 #define REGISTERED_DIMM (1 << 15)
105
f122a892
MCC
106#define MC_CHANNEL_MAPPER 0x60
107 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
108 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
109
0b2b7b7e
MCC
110#define MC_CHANNEL_RANK_PRESENT 0x7c
111 #define RANK_PRESENT_MASK 0xffff
112
a0c36a1f 113#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
114#define MC_CHANNEL_ERROR_MASK 0xf8
115#define MC_CHANNEL_ERROR_INJECT 0xfc
116 #define INJECT_ADDR_PARITY 0x10
117 #define INJECT_ECC 0x08
118 #define MASK_CACHELINE 0x06
119 #define MASK_FULL_CACHELINE 0x06
120 #define MASK_MSB32_CACHELINE 0x04
121 #define MASK_LSB32_CACHELINE 0x02
122 #define NO_MASK_CACHELINE 0x00
123 #define REPEAT_EN 0x01
a0c36a1f 124
0b2b7b7e 125 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 126
0b2b7b7e
MCC
127#define MC_DOD_CH_DIMM0 0x48
128#define MC_DOD_CH_DIMM1 0x4c
129#define MC_DOD_CH_DIMM2 0x50
130 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
131 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
132 #define DIMM_PRESENT_MASK (1 << 9)
133 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
134 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
135 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
136 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
137 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 138 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 139 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
140 #define MC_DOD_NUMCOL_MASK 3
141 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 142
f122a892
MCC
143#define MC_RANK_PRESENT 0x7c
144
0b2b7b7e
MCC
145#define MC_SAG_CH_0 0x80
146#define MC_SAG_CH_1 0x84
147#define MC_SAG_CH_2 0x88
148#define MC_SAG_CH_3 0x8c
149#define MC_SAG_CH_4 0x90
150#define MC_SAG_CH_5 0x94
151#define MC_SAG_CH_6 0x98
152#define MC_SAG_CH_7 0x9c
153
154#define MC_RIR_LIMIT_CH_0 0x40
155#define MC_RIR_LIMIT_CH_1 0x44
156#define MC_RIR_LIMIT_CH_2 0x48
157#define MC_RIR_LIMIT_CH_3 0x4C
158#define MC_RIR_LIMIT_CH_4 0x50
159#define MC_RIR_LIMIT_CH_5 0x54
160#define MC_RIR_LIMIT_CH_6 0x58
161#define MC_RIR_LIMIT_CH_7 0x5C
162#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
163
164#define MC_RIR_WAY_CH 0x80
165 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
166 #define MC_RIR_WAY_RANK_MASK 0x7
167
a0c36a1f
MCC
168/*
169 * i7core structs
170 */
171
172#define NUM_CHANS 3
442305b1
MCC
173#define MAX_DIMMS 3 /* Max DIMMS per channel */
174#define MAX_MCR_FUNC 4
175#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
176
177struct i7core_info {
178 u32 mc_control;
179 u32 mc_status;
180 u32 max_dod;
f122a892 181 u32 ch_map;
a0c36a1f
MCC
182};
183
194a40fe
MCC
184
185struct i7core_inject {
186 int enable;
187
188 u32 section;
189 u32 type;
190 u32 eccmask;
191
192 /* Error address mask */
193 int channel, dimm, rank, bank, page, col;
194};
195
0b2b7b7e 196struct i7core_channel {
442305b1
MCC
197 u32 ranks;
198 u32 dimms;
0b2b7b7e
MCC
199};
200
8f331907 201struct pci_id_descr {
66607706
MCC
202 int dev;
203 int func;
204 int dev_id;
de06eeef 205 int optional;
8f331907
MCC
206};
207
f4742949
MCC
208struct i7core_dev {
209 struct list_head list;
210 u8 socket;
211 struct pci_dev **pdev;
de06eeef 212 int n_devs;
f4742949
MCC
213 struct mem_ctl_info *mci;
214};
215
a0c36a1f 216struct i7core_pvt {
f4742949
MCC
217 struct pci_dev *pci_noncore;
218 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
219 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
220
221 struct i7core_dev *i7core_dev;
67166af4 222
a0c36a1f 223 struct i7core_info info;
194a40fe 224 struct i7core_inject inject;
f4742949 225 struct i7core_channel channel[NUM_CHANS];
67166af4 226
f4742949 227 int channels; /* Number of active channels */
442305b1 228
f4742949
MCC
229 int ce_count_available;
230 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
231
232 /* ECC corrected errors counts per udimm */
f4742949
MCC
233 unsigned long udimm_ce_count[MAX_DIMMS];
234 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 235 /* ECC corrected errors counts per rdimm */
f4742949
MCC
236 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
237 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 238
f4742949 239 unsigned int is_registered;
14d2c083 240
d5381642
MCC
241 /* mcelog glue */
242 struct edac_mce edac_mce;
ca9c90ba
MCC
243
244 /* Fifo double buffers */
d5381642 245 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
246 struct mce mce_outentry[MCE_LOG_LEN];
247
248 /* Fifo in/out counters */
249 unsigned mce_in, mce_out;
250
251 /* Count indicator to show errors not got */
252 unsigned mce_overrun;
a0c36a1f
MCC
253};
254
66607706
MCC
255/* Static vars */
256static LIST_HEAD(i7core_edac_list);
257static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 258
8f331907
MCC
259#define PCI_DESCR(device, function, device_id) \
260 .dev = (device), \
261 .func = (function), \
262 .dev_id = (device_id)
263
de06eeef 264struct pci_id_descr pci_dev_descr_i7core[] = {
8f331907
MCC
265 /* Memory controller */
266 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
267 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
de06eeef
MCC
268 /* Exists only for RDIMM */
269 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
8f331907
MCC
270 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
271
272 /* Channel 0 */
273 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
274 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
275 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
276 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
277
278 /* Channel 1 */
279 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
280 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
281 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
282 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
283
284 /* Channel 2 */
285 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
286 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
287 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
288 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
289
290 /* Generic Non-core registers */
291 /*
292 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
293 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
294 * the probing code needs to test for the other address in case of
295 * failure of this one
296 */
fd382654 297 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
310cbb72 298
a0c36a1f 299};
8f331907
MCC
300
301/*
302 * pci_device_id table for which devices we are looking for
8f331907
MCC
303 */
304static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 305 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
8f331907
MCC
306 {0,} /* 0 terminated list. */
307};
308
a0c36a1f
MCC
309static struct edac_pci_ctl_info *i7core_pci;
310
311/****************************************************************************
312 Anciliary status routines
313 ****************************************************************************/
314
315 /* MC_CONTROL bits */
ef708b53
MCC
316#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
317#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
318
319 /* MC_STATUS bits */
61053fde 320#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 321#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
322
323 /* MC_MAX_DOD read functions */
854d3349 324static inline int numdimms(u32 dimms)
a0c36a1f 325{
854d3349 326 return (dimms & 0x3) + 1;
a0c36a1f
MCC
327}
328
854d3349 329static inline int numrank(u32 rank)
a0c36a1f
MCC
330{
331 static int ranks[4] = { 1, 2, 4, -EINVAL };
332
854d3349 333 return ranks[rank & 0x3];
a0c36a1f
MCC
334}
335
854d3349 336static inline int numbank(u32 bank)
a0c36a1f
MCC
337{
338 static int banks[4] = { 4, 8, 16, -EINVAL };
339
854d3349 340 return banks[bank & 0x3];
a0c36a1f
MCC
341}
342
854d3349 343static inline int numrow(u32 row)
a0c36a1f
MCC
344{
345 static int rows[8] = {
346 1 << 12, 1 << 13, 1 << 14, 1 << 15,
347 1 << 16, -EINVAL, -EINVAL, -EINVAL,
348 };
349
854d3349 350 return rows[row & 0x7];
a0c36a1f
MCC
351}
352
854d3349 353static inline int numcol(u32 col)
a0c36a1f
MCC
354{
355 static int cols[8] = {
356 1 << 10, 1 << 11, 1 << 12, -EINVAL,
357 };
854d3349 358 return cols[col & 0x3];
a0c36a1f
MCC
359}
360
f4742949 361static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
362{
363 struct i7core_dev *i7core_dev;
364
365 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
366 if (i7core_dev->socket == socket)
367 return i7core_dev;
368 }
369
370 return NULL;
371}
372
a0c36a1f
MCC
373/****************************************************************************
374 Memory check routines
375 ****************************************************************************/
67166af4
MCC
376static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
377 unsigned func)
ef708b53 378{
66607706 379 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 380 int i;
ef708b53 381
66607706
MCC
382 if (!i7core_dev)
383 return NULL;
384
de06eeef 385 for (i = 0; i < i7core_dev->n_devs; i++) {
66607706 386 if (!i7core_dev->pdev[i])
ef708b53
MCC
387 continue;
388
66607706
MCC
389 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
390 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
391 return i7core_dev->pdev[i];
ef708b53
MCC
392 }
393 }
394
eb94fc40
MCC
395 return NULL;
396}
397
ec6df24c
MCC
398/**
399 * i7core_get_active_channels() - gets the number of channels and csrows
400 * @socket: Quick Path Interconnect socket
401 * @channels: Number of channels that will be returned
402 * @csrows: Number of csrows found
403 *
404 * Since EDAC core needs to know in advance the number of available channels
405 * and csrows, in order to allocate memory for csrows/channels, it is needed
406 * to run two similar steps. At the first step, implemented on this function,
407 * it checks the number of csrows/channels present at one socket.
408 * this is used in order to properly allocate the size of mci components.
409 *
410 * It should be noticed that none of the current available datasheets explain
411 * or even mention how csrows are seen by the memory controller. So, we need
412 * to add a fake description for csrows.
413 * So, this driver is attributing one DIMM memory for one csrow.
414 */
67166af4
MCC
415static int i7core_get_active_channels(u8 socket, unsigned *channels,
416 unsigned *csrows)
eb94fc40
MCC
417{
418 struct pci_dev *pdev = NULL;
419 int i, j;
420 u32 status, control;
421
422 *channels = 0;
423 *csrows = 0;
424
67166af4 425 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 426 if (!pdev) {
67166af4
MCC
427 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
428 socket);
ef708b53 429 return -ENODEV;
b7c76151 430 }
ef708b53
MCC
431
432 /* Device 3 function 0 reads */
433 pci_read_config_dword(pdev, MC_STATUS, &status);
434 pci_read_config_dword(pdev, MC_CONTROL, &control);
435
436 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 437 u32 dimm_dod[3];
ef708b53
MCC
438 /* Check if the channel is active */
439 if (!(control & (1 << (8 + i))))
440 continue;
441
442 /* Check if the channel is disabled */
41fcb7fe 443 if (status & (1 << i))
ef708b53 444 continue;
ef708b53 445
67166af4 446 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 447 if (!pdev) {
67166af4
MCC
448 i7core_printk(KERN_ERR, "Couldn't find socket %d "
449 "fn %d.%d!!!\n",
450 socket, i + 4, 1);
eb94fc40
MCC
451 return -ENODEV;
452 }
453 /* Devices 4-6 function 1 */
454 pci_read_config_dword(pdev,
455 MC_DOD_CH_DIMM0, &dimm_dod[0]);
456 pci_read_config_dword(pdev,
457 MC_DOD_CH_DIMM1, &dimm_dod[1]);
458 pci_read_config_dword(pdev,
459 MC_DOD_CH_DIMM2, &dimm_dod[2]);
460
ef708b53 461 (*channels)++;
eb94fc40
MCC
462
463 for (j = 0; j < 3; j++) {
464 if (!DIMM_PRESENT(dimm_dod[j]))
465 continue;
466 (*csrows)++;
467 }
ef708b53
MCC
468 }
469
c77720b9 470 debugf0("Number of active channels on socket %d: %d\n",
67166af4 471 socket, *channels);
1c6fed80 472
ef708b53
MCC
473 return 0;
474}
475
f4742949 476static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
477{
478 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 479 struct csrow_info *csr;
854d3349 480 struct pci_dev *pdev;
ba6c5c62 481 int i, j;
5566cb7c 482 unsigned long last_page = 0;
1c6fed80 483 enum edac_type mode;
854d3349 484 enum mem_type mtype;
a0c36a1f 485
854d3349 486 /* Get data from the MC register, function 0 */
f4742949 487 pdev = pvt->pci_mcr[0];
7dd6953c 488 if (!pdev)
8f331907
MCC
489 return -ENODEV;
490
f122a892 491 /* Device 3 function 0 reads */
7dd6953c
MCC
492 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
493 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
494 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
495 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 496
17cb7b0c 497 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 498 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 499 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 500
1c6fed80 501 if (ECC_ENABLED(pvt)) {
41fcb7fe 502 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
503 if (ECCx8(pvt))
504 mode = EDAC_S8ECD8ED;
505 else
506 mode = EDAC_S4ECD4ED;
507 } else {
a0c36a1f 508 debugf0("ECC disabled\n");
1c6fed80
MCC
509 mode = EDAC_NONE;
510 }
a0c36a1f
MCC
511
512 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
513 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
514 "x%x x 0x%x\n",
854d3349
MCC
515 numdimms(pvt->info.max_dod),
516 numrank(pvt->info.max_dod >> 2),
276b824c 517 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
518 numrow(pvt->info.max_dod >> 6),
519 numcol(pvt->info.max_dod >> 9));
a0c36a1f 520
0b2b7b7e 521 for (i = 0; i < NUM_CHANS; i++) {
854d3349 522 u32 data, dimm_dod[3], value[8];
0b2b7b7e
MCC
523
524 if (!CH_ACTIVE(pvt, i)) {
525 debugf0("Channel %i is not active\n", i);
526 continue;
527 }
528 if (CH_DISABLED(pvt, i)) {
529 debugf0("Channel %i is disabled\n", i);
530 continue;
531 }
532
f122a892 533 /* Devices 4-6 function 0 */
f4742949 534 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
535 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
536
f4742949 537 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 538 4 : 2;
0b2b7b7e 539
854d3349
MCC
540 if (data & REGISTERED_DIMM)
541 mtype = MEM_RDDR3;
14d2c083 542 else
854d3349
MCC
543 mtype = MEM_DDR3;
544#if 0
0b2b7b7e
MCC
545 if (data & THREE_DIMMS_PRESENT)
546 pvt->channel[i].dimms = 3;
547 else if (data & SINGLE_QUAD_RANK_PRESENT)
548 pvt->channel[i].dimms = 1;
549 else
550 pvt->channel[i].dimms = 2;
854d3349
MCC
551#endif
552
553 /* Devices 4-6 function 1 */
f4742949 554 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 555 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 556 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 557 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 558 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 559 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 560
1c6fed80 561 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 562 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
563 i,
564 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
565 data,
f4742949 566 pvt->channel[i].ranks,
41fcb7fe 567 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
568
569 for (j = 0; j < 3; j++) {
570 u32 banks, ranks, rows, cols;
5566cb7c 571 u32 size, npages;
854d3349
MCC
572
573 if (!DIMM_PRESENT(dimm_dod[j]))
574 continue;
575
576 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
577 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
578 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
579 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
580
5566cb7c
MCC
581 /* DDR3 has 8 I/O banks */
582 size = (rows * cols * banks * ranks) >> (20 - 3);
583
f4742949 584 pvt->channel[i].dimms++;
854d3349 585
17cb7b0c
MCC
586 debugf0("\tdimm %d %d Mb offset: %x, "
587 "bank: %d, rank: %d, row: %#x, col: %#x\n",
588 j, size,
854d3349
MCC
589 RANKOFFSET(dimm_dod[j]),
590 banks, ranks, rows, cols);
591
eb94fc40
MCC
592#if PAGE_SHIFT > 20
593 npages = size >> (PAGE_SHIFT - 20);
594#else
595 npages = size << (20 - PAGE_SHIFT);
596#endif
5566cb7c 597
ba6c5c62 598 csr = &mci->csrows[*csrow];
5566cb7c
MCC
599 csr->first_page = last_page + 1;
600 last_page += npages;
601 csr->last_page = last_page;
602 csr->nr_pages = npages;
603
854d3349 604 csr->page_mask = 0;
eb94fc40 605 csr->grain = 8;
ba6c5c62 606 csr->csrow_idx = *csrow;
eb94fc40
MCC
607 csr->nr_channels = 1;
608
609 csr->channels[0].chan_idx = i;
610 csr->channels[0].ce_count = 0;
854d3349 611
f4742949 612 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 613
854d3349
MCC
614 switch (banks) {
615 case 4:
616 csr->dtype = DEV_X4;
617 break;
618 case 8:
619 csr->dtype = DEV_X8;
620 break;
621 case 16:
622 csr->dtype = DEV_X16;
623 break;
624 default:
625 csr->dtype = DEV_UNKNOWN;
626 }
627
628 csr->edac_mode = mode;
629 csr->mtype = mtype;
630
ba6c5c62 631 (*csrow)++;
854d3349 632 }
1c6fed80 633
854d3349
MCC
634 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
635 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
636 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
637 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
638 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
639 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
640 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
641 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 642 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 643 for (j = 0; j < 8; j++)
17cb7b0c 644 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
645 (value[j] >> 27) & 0x1,
646 (value[j] >> 24) & 0x7,
647 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
648 }
649
a0c36a1f
MCC
650 return 0;
651}
652
194a40fe
MCC
653/****************************************************************************
654 Error insertion routines
655 ****************************************************************************/
656
657/* The i7core has independent error injection features per channel.
658 However, to have a simpler code, we don't allow enabling error injection
659 on more than one channel.
660 Also, since a change at an inject parameter will be applied only at enable,
661 we're disabling error injection on all write calls to the sysfs nodes that
662 controls the error code injection.
663 */
8f331907 664static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
665{
666 struct i7core_pvt *pvt = mci->pvt_info;
667
668 pvt->inject.enable = 0;
669
f4742949 670 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
671 return -ENODEV;
672
f4742949 673 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 674 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
675
676 return 0;
194a40fe
MCC
677}
678
679/*
680 * i7core inject inject.section
681 *
682 * accept and store error injection inject.section value
683 * bit 0 - refers to the lower 32-byte half cacheline
684 * bit 1 - refers to the upper 32-byte half cacheline
685 */
686static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
687 const char *data, size_t count)
688{
689 struct i7core_pvt *pvt = mci->pvt_info;
690 unsigned long value;
691 int rc;
692
693 if (pvt->inject.enable)
41fcb7fe 694 disable_inject(mci);
194a40fe
MCC
695
696 rc = strict_strtoul(data, 10, &value);
697 if ((rc < 0) || (value > 3))
2068def5 698 return -EIO;
194a40fe
MCC
699
700 pvt->inject.section = (u32) value;
701 return count;
702}
703
704static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
705 char *data)
706{
707 struct i7core_pvt *pvt = mci->pvt_info;
708 return sprintf(data, "0x%08x\n", pvt->inject.section);
709}
710
711/*
712 * i7core inject.type
713 *
714 * accept and store error injection inject.section value
715 * bit 0 - repeat enable - Enable error repetition
716 * bit 1 - inject ECC error
717 * bit 2 - inject parity error
718 */
719static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
720 const char *data, size_t count)
721{
722 struct i7core_pvt *pvt = mci->pvt_info;
723 unsigned long value;
724 int rc;
725
726 if (pvt->inject.enable)
41fcb7fe 727 disable_inject(mci);
194a40fe
MCC
728
729 rc = strict_strtoul(data, 10, &value);
730 if ((rc < 0) || (value > 7))
2068def5 731 return -EIO;
194a40fe
MCC
732
733 pvt->inject.type = (u32) value;
734 return count;
735}
736
737static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
738 char *data)
739{
740 struct i7core_pvt *pvt = mci->pvt_info;
741 return sprintf(data, "0x%08x\n", pvt->inject.type);
742}
743
744/*
745 * i7core_inject_inject.eccmask_store
746 *
747 * The type of error (UE/CE) will depend on the inject.eccmask value:
748 * Any bits set to a 1 will flip the corresponding ECC bit
749 * Correctable errors can be injected by flipping 1 bit or the bits within
750 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
751 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
752 * uncorrectable error to be injected.
753 */
754static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
755 const char *data, size_t count)
756{
757 struct i7core_pvt *pvt = mci->pvt_info;
758 unsigned long value;
759 int rc;
760
761 if (pvt->inject.enable)
41fcb7fe 762 disable_inject(mci);
194a40fe
MCC
763
764 rc = strict_strtoul(data, 10, &value);
765 if (rc < 0)
2068def5 766 return -EIO;
194a40fe
MCC
767
768 pvt->inject.eccmask = (u32) value;
769 return count;
770}
771
772static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
773 char *data)
774{
775 struct i7core_pvt *pvt = mci->pvt_info;
776 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
777}
778
779/*
780 * i7core_addrmatch
781 *
782 * The type of error (UE/CE) will depend on the inject.eccmask value:
783 * Any bits set to a 1 will flip the corresponding ECC bit
784 * Correctable errors can be injected by flipping 1 bit or the bits within
785 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
786 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
787 * uncorrectable error to be injected.
788 */
194a40fe 789
a5538e53
MCC
790#define DECLARE_ADDR_MATCH(param, limit) \
791static ssize_t i7core_inject_store_##param( \
792 struct mem_ctl_info *mci, \
793 const char *data, size_t count) \
794{ \
cc301b3a 795 struct i7core_pvt *pvt; \
a5538e53
MCC
796 long value; \
797 int rc; \
798 \
cc301b3a
MCC
799 debugf1("%s()\n", __func__); \
800 pvt = mci->pvt_info; \
801 \
a5538e53
MCC
802 if (pvt->inject.enable) \
803 disable_inject(mci); \
804 \
4f87fad1 805 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
a5538e53
MCC
806 value = -1; \
807 else { \
808 rc = strict_strtoul(data, 10, &value); \
809 if ((rc < 0) || (value >= limit)) \
810 return -EIO; \
811 } \
812 \
813 pvt->inject.param = value; \
814 \
815 return count; \
816} \
817 \
818static ssize_t i7core_inject_show_##param( \
819 struct mem_ctl_info *mci, \
820 char *data) \
821{ \
cc301b3a
MCC
822 struct i7core_pvt *pvt; \
823 \
824 pvt = mci->pvt_info; \
825 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
826 if (pvt->inject.param < 0) \
827 return sprintf(data, "any\n"); \
828 else \
829 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
830}
831
a5538e53
MCC
832#define ATTR_ADDR_MATCH(param) \
833 { \
834 .attr = { \
835 .name = #param, \
836 .mode = (S_IRUGO | S_IWUSR) \
837 }, \
838 .show = i7core_inject_show_##param, \
839 .store = i7core_inject_store_##param, \
840 }
194a40fe 841
a5538e53
MCC
842DECLARE_ADDR_MATCH(channel, 3);
843DECLARE_ADDR_MATCH(dimm, 3);
844DECLARE_ADDR_MATCH(rank, 4);
845DECLARE_ADDR_MATCH(bank, 32);
846DECLARE_ADDR_MATCH(page, 0x10000);
847DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 848
276b824c
MCC
849static int write_and_test(struct pci_dev *dev, int where, u32 val)
850{
851 u32 read;
852 int count;
853
4157d9f5
MCC
854 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
855 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
856 where, val);
857
276b824c
MCC
858 for (count = 0; count < 10; count++) {
859 if (count)
b990538a 860 msleep(100);
276b824c
MCC
861 pci_write_config_dword(dev, where, val);
862 pci_read_config_dword(dev, where, &read);
863
864 if (read == val)
865 return 0;
866 }
867
4157d9f5
MCC
868 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
869 "write=%08x. Read=%08x\n",
870 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
871 where, val, read);
276b824c
MCC
872
873 return -EINVAL;
874}
875
194a40fe
MCC
876/*
877 * This routine prepares the Memory Controller for error injection.
878 * The error will be injected when some process tries to write to the
879 * memory that matches the given criteria.
880 * The criteria can be set in terms of a mask where dimm, rank, bank, page
881 * and col can be specified.
882 * A -1 value for any of the mask items will make the MCU to ignore
883 * that matching criteria for error injection.
884 *
885 * It should be noticed that the error will only happen after a write operation
886 * on a memory that matches the condition. if REPEAT_EN is not enabled at
887 * inject mask, then it will produce just one error. Otherwise, it will repeat
888 * until the injectmask would be cleaned.
889 *
890 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
891 * is reliable enough to check if the MC is using the
892 * three channels. However, this is not clear at the datasheet.
893 */
894static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
895 const char *data, size_t count)
896{
897 struct i7core_pvt *pvt = mci->pvt_info;
898 u32 injectmask;
899 u64 mask = 0;
900 int rc;
901 long enable;
902
f4742949 903 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
904 return 0;
905
194a40fe
MCC
906 rc = strict_strtoul(data, 10, &enable);
907 if ((rc < 0))
908 return 0;
909
910 if (enable) {
911 pvt->inject.enable = 1;
912 } else {
913 disable_inject(mci);
914 return count;
915 }
916
917 /* Sets pvt->inject.dimm mask */
918 if (pvt->inject.dimm < 0)
486dd09f 919 mask |= 1LL << 41;
194a40fe 920 else {
f4742949 921 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 922 mask |= (pvt->inject.dimm & 0x3LL) << 35;
194a40fe 923 else
486dd09f 924 mask |= (pvt->inject.dimm & 0x1LL) << 36;
194a40fe
MCC
925 }
926
927 /* Sets pvt->inject.rank mask */
928 if (pvt->inject.rank < 0)
486dd09f 929 mask |= 1LL << 40;
194a40fe 930 else {
f4742949 931 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 932 mask |= (pvt->inject.rank & 0x1LL) << 34;
194a40fe 933 else
486dd09f 934 mask |= (pvt->inject.rank & 0x3LL) << 34;
194a40fe
MCC
935 }
936
937 /* Sets pvt->inject.bank mask */
938 if (pvt->inject.bank < 0)
486dd09f 939 mask |= 1LL << 39;
194a40fe 940 else
486dd09f 941 mask |= (pvt->inject.bank & 0x15LL) << 30;
194a40fe
MCC
942
943 /* Sets pvt->inject.page mask */
944 if (pvt->inject.page < 0)
486dd09f 945 mask |= 1LL << 38;
194a40fe 946 else
486dd09f 947 mask |= (pvt->inject.page & 0xffff) << 14;
194a40fe
MCC
948
949 /* Sets pvt->inject.column mask */
950 if (pvt->inject.col < 0)
486dd09f 951 mask |= 1LL << 37;
194a40fe 952 else
486dd09f 953 mask |= (pvt->inject.col & 0x3fff);
194a40fe 954
276b824c
MCC
955 /*
956 * bit 0: REPEAT_EN
957 * bits 1-2: MASK_HALF_CACHELINE
958 * bit 3: INJECT_ECC
959 * bit 4: INJECT_ADDR_PARITY
960 */
961
962 injectmask = (pvt->inject.type & 1) |
963 (pvt->inject.section & 0x3) << 1 |
964 (pvt->inject.type & 0x6) << (3 - 1);
965
966 /* Unlock writes to registers - this register is write only */
f4742949 967 pci_write_config_dword(pvt->pci_noncore,
67166af4 968 MC_CFG_CONTROL, 0x2);
e9bd2e73 969
f4742949 970 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 971 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 972 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 973 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 974
f4742949 975 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
976 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
977
f4742949 978 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 979 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 980
194a40fe 981 /*
276b824c
MCC
982 * This is something undocumented, based on my tests
983 * Without writing 8 to this register, errors aren't injected. Not sure
984 * why.
194a40fe 985 */
f4742949 986 pci_write_config_dword(pvt->pci_noncore,
276b824c 987 MC_CFG_CONTROL, 8);
194a40fe 988
41fcb7fe
MCC
989 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
990 " inject 0x%08x\n",
194a40fe
MCC
991 mask, pvt->inject.eccmask, injectmask);
992
7b029d03 993
194a40fe
MCC
994 return count;
995}
996
997static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
998 char *data)
999{
1000 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1001 u32 injectmask;
1002
f4742949 1003 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1004 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1005
1006 debugf0("Inject error read: 0x%018x\n", injectmask);
1007
1008 if (injectmask & 0x0c)
1009 pvt->inject.enable = 1;
1010
194a40fe
MCC
1011 return sprintf(data, "%d\n", pvt->inject.enable);
1012}
1013
f338d736
MCC
1014#define DECLARE_COUNTER(param) \
1015static ssize_t i7core_show_counter_##param( \
1016 struct mem_ctl_info *mci, \
1017 char *data) \
1018{ \
1019 struct i7core_pvt *pvt = mci->pvt_info; \
1020 \
1021 debugf1("%s() \n", __func__); \
1022 if (!pvt->ce_count_available || (pvt->is_registered)) \
1023 return sprintf(data, "data unavailable\n"); \
1024 return sprintf(data, "%lu\n", \
1025 pvt->udimm_ce_count[param]); \
1026}
442305b1 1027
f338d736
MCC
1028#define ATTR_COUNTER(param) \
1029 { \
1030 .attr = { \
1031 .name = __stringify(udimm##param), \
1032 .mode = (S_IRUGO | S_IWUSR) \
1033 }, \
1034 .show = i7core_show_counter_##param \
d88b8507 1035 }
442305b1 1036
f338d736
MCC
1037DECLARE_COUNTER(0);
1038DECLARE_COUNTER(1);
1039DECLARE_COUNTER(2);
442305b1 1040
194a40fe
MCC
1041/*
1042 * Sysfs struct
1043 */
a5538e53
MCC
1044
1045
1046static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1047 ATTR_ADDR_MATCH(channel),
1048 ATTR_ADDR_MATCH(dimm),
1049 ATTR_ADDR_MATCH(rank),
1050 ATTR_ADDR_MATCH(bank),
1051 ATTR_ADDR_MATCH(page),
1052 ATTR_ADDR_MATCH(col),
1053 { .attr = { .name = NULL } }
1054};
1055
a5538e53
MCC
1056static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1057 .name = "inject_addrmatch",
1058 .mcidev_attr = i7core_addrmatch_attrs,
1059};
1060
f338d736
MCC
1061static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1062 ATTR_COUNTER(0),
1063 ATTR_COUNTER(1),
1064 ATTR_COUNTER(2),
1065};
1066
1067static struct mcidev_sysfs_group i7core_udimm_counters = {
1068 .name = "all_channel_counts",
1069 .mcidev_attr = i7core_udimm_counters_attrs,
1070};
1071
a5538e53 1072static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
194a40fe
MCC
1073 {
1074 .attr = {
1075 .name = "inject_section",
1076 .mode = (S_IRUGO | S_IWUSR)
1077 },
1078 .show = i7core_inject_section_show,
1079 .store = i7core_inject_section_store,
1080 }, {
1081 .attr = {
1082 .name = "inject_type",
1083 .mode = (S_IRUGO | S_IWUSR)
1084 },
1085 .show = i7core_inject_type_show,
1086 .store = i7core_inject_type_store,
1087 }, {
1088 .attr = {
1089 .name = "inject_eccmask",
1090 .mode = (S_IRUGO | S_IWUSR)
1091 },
1092 .show = i7core_inject_eccmask_show,
1093 .store = i7core_inject_eccmask_store,
1094 }, {
a5538e53 1095 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1096 }, {
1097 .attr = {
1098 .name = "inject_enable",
1099 .mode = (S_IRUGO | S_IWUSR)
1100 },
1101 .show = i7core_inject_enable_show,
1102 .store = i7core_inject_enable_store,
1103 },
f338d736 1104 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
42538680 1105 { .attr = { .name = NULL } }
194a40fe
MCC
1106};
1107
a0c36a1f
MCC
1108/****************************************************************************
1109 Device initialization routines: put/get, init/exit
1110 ****************************************************************************/
1111
1112/*
1113 * i7core_put_devices 'put' all the devices that we have
1114 * reserved via 'get'
1115 */
13d6e9b6 1116static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1117{
13d6e9b6 1118 int i;
a0c36a1f 1119
22e6bcbd 1120 debugf0(__FILE__ ": %s()\n", __func__);
de06eeef 1121 for (i = 0; i < i7core_dev->n_devs; i++) {
22e6bcbd
MCC
1122 struct pci_dev *pdev = i7core_dev->pdev[i];
1123 if (!pdev)
1124 continue;
1125 debugf0("Removing dev %02x:%02x.%d\n",
1126 pdev->bus->number,
1127 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1128 pci_dev_put(pdev);
1129 }
13d6e9b6 1130 kfree(i7core_dev->pdev);
22e6bcbd 1131 list_del(&i7core_dev->list);
13d6e9b6
MCC
1132 kfree(i7core_dev);
1133}
66607706 1134
13d6e9b6
MCC
1135static void i7core_put_all_devices(void)
1136{
42538680 1137 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1138
42538680 1139 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1140 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1141}
1142
de06eeef 1143static void i7core_xeon_pci_fixup(int dev_id)
bc2d7245
KM
1144{
1145 struct pci_dev *pdev = NULL;
1146 int i;
1147 /*
1148 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1149 * aren't announced by acpi. So, we need to use a legacy scan probing
1150 * to detect them
1151 */
de06eeef 1152 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
bc2d7245 1153 if (unlikely(!pdev)) {
f4742949 1154 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1155 pcibios_scan_specific_bus(255-i);
1156 }
1157}
1158
a0c36a1f
MCC
1159/*
1160 * i7core_get_devices Find and perform 'get' operation on the MCH's
1161 * device/functions we want to reference for this driver
1162 *
1163 * Need to 'get' device 16 func 1 and func 2
1164 */
de06eeef
MCC
1165int i7core_get_onedevice(struct pci_dev **prev, int devno,
1166 struct pci_id_descr *dev_descr, unsigned n_devs)
a0c36a1f 1167{
66607706
MCC
1168 struct i7core_dev *i7core_dev;
1169
8f331907 1170 struct pci_dev *pdev = NULL;
67166af4
MCC
1171 u8 bus = 0;
1172 u8 socket = 0;
a0c36a1f 1173
c77720b9 1174 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
de06eeef 1175 dev_descr->dev_id, *prev);
c77720b9 1176
c77720b9
MCC
1177 /*
1178 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1179 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1180 * to probe for the alternate address in case of failure
1181 */
de06eeef 1182 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
c77720b9 1183 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
fd382654 1184 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
d1fd4fb6 1185
c77720b9
MCC
1186 if (!pdev) {
1187 if (*prev) {
1188 *prev = pdev;
1189 return 0;
d1fd4fb6
MCC
1190 }
1191
de06eeef 1192 if (dev_descr->optional)
c77720b9 1193 return 0;
310cbb72 1194
c77720b9
MCC
1195 i7core_printk(KERN_ERR,
1196 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1197 dev_descr->dev, dev_descr->func,
1198 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
67166af4 1199
c77720b9
MCC
1200 /* End of list, leave */
1201 return -ENODEV;
1202 }
1203 bus = pdev->bus->number;
67166af4 1204
c77720b9
MCC
1205 if (bus == 0x3f)
1206 socket = 0;
1207 else
1208 socket = 255 - bus;
1209
66607706
MCC
1210 i7core_dev = get_i7core_dev(socket);
1211 if (!i7core_dev) {
1212 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1213 if (!i7core_dev)
1214 return -ENOMEM;
de06eeef 1215 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
66607706
MCC
1216 GFP_KERNEL);
1217 if (!i7core_dev->pdev)
1218 return -ENOMEM;
1219 i7core_dev->socket = socket;
de06eeef 1220 i7core_dev->n_devs = n_devs;
66607706 1221 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1222 }
67166af4 1223
66607706 1224 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1225 i7core_printk(KERN_ERR,
1226 "Duplicated device for "
1227 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1228 bus, dev_descr->dev, dev_descr->func,
1229 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1230 pci_dev_put(pdev);
1231 return -ENODEV;
1232 }
67166af4 1233
66607706 1234 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1235
1236 /* Sanity check */
de06eeef
MCC
1237 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1238 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
c77720b9
MCC
1239 i7core_printk(KERN_ERR,
1240 "Device PCI ID %04x:%04x "
1241 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
de06eeef 1242 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
c77720b9 1243 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
de06eeef 1244 bus, dev_descr->dev, dev_descr->func);
c77720b9
MCC
1245 return -ENODEV;
1246 }
ef708b53 1247
c77720b9
MCC
1248 /* Be sure that the device is enabled */
1249 if (unlikely(pci_enable_device(pdev) < 0)) {
1250 i7core_printk(KERN_ERR,
1251 "Couldn't enable "
1252 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1253 bus, dev_descr->dev, dev_descr->func,
1254 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1255 return -ENODEV;
1256 }
ef708b53 1257
d4c27795 1258 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1259 socket, bus, dev_descr->dev,
1260 dev_descr->func,
1261 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
8f331907 1262
c77720b9 1263 *prev = pdev;
ef708b53 1264
c77720b9
MCC
1265 return 0;
1266}
a0c36a1f 1267
de06eeef 1268static int i7core_get_devices(struct pci_id_descr dev_descr[], unsigned n_devs)
c77720b9 1269{
de06eeef 1270 int i, rc;
c77720b9 1271 struct pci_dev *pdev = NULL;
ef708b53 1272
de06eeef 1273 for (i = 0; i < n_devs; i++) {
c77720b9
MCC
1274 pdev = NULL;
1275 do {
de06eeef
MCC
1276 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1277 n_devs);
1278 if (rc < 0) {
13d6e9b6 1279 i7core_put_all_devices();
c77720b9
MCC
1280 return -ENODEV;
1281 }
1282 } while (pdev);
1283 }
66607706 1284
ef708b53 1285 return 0;
ef708b53
MCC
1286}
1287
f4742949
MCC
1288static int mci_bind_devs(struct mem_ctl_info *mci,
1289 struct i7core_dev *i7core_dev)
ef708b53
MCC
1290{
1291 struct i7core_pvt *pvt = mci->pvt_info;
1292 struct pci_dev *pdev;
f4742949 1293 int i, func, slot;
ef708b53 1294
f4742949
MCC
1295 /* Associates i7core_dev and mci for future usage */
1296 pvt->i7core_dev = i7core_dev;
1297 i7core_dev->mci = mci;
66607706 1298
f4742949 1299 pvt->is_registered = 0;
de06eeef 1300 for (i = 0; i < i7core_dev->n_devs; i++) {
f4742949
MCC
1301 pdev = i7core_dev->pdev[i];
1302 if (!pdev)
66607706
MCC
1303 continue;
1304
f4742949
MCC
1305 func = PCI_FUNC(pdev->devfn);
1306 slot = PCI_SLOT(pdev->devfn);
1307 if (slot == 3) {
1308 if (unlikely(func > MAX_MCR_FUNC))
1309 goto error;
1310 pvt->pci_mcr[func] = pdev;
1311 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1312 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1313 goto error;
f4742949
MCC
1314 pvt->pci_ch[slot - 4][func] = pdev;
1315 } else if (!slot && !func)
1316 pvt->pci_noncore = pdev;
1317 else
1318 goto error;
ef708b53 1319
f4742949
MCC
1320 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1321 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1322 pdev, i7core_dev->socket);
14d2c083 1323
f4742949
MCC
1324 if (PCI_SLOT(pdev->devfn) == 3 &&
1325 PCI_FUNC(pdev->devfn) == 2)
1326 pvt->is_registered = 1;
a0c36a1f 1327 }
e9bd2e73 1328
f338d736
MCC
1329 /*
1330 * Add extra nodes to count errors on udimm
1331 * For registered memory, this is not needed, since the counters
1332 * are already displayed at the standard locations
1333 */
1334 if (!pvt->is_registered)
1335 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1336 &i7core_udimm_counters;
1337
a0c36a1f 1338 return 0;
ef708b53
MCC
1339
1340error:
1341 i7core_printk(KERN_ERR, "Device %d, function %d "
1342 "is out of the expected range\n",
1343 slot, func);
1344 return -EINVAL;
a0c36a1f
MCC
1345}
1346
442305b1
MCC
1347/****************************************************************************
1348 Error check routines
1349 ****************************************************************************/
f4742949 1350static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1351 int chan, int dimm, int add)
1352{
1353 char *msg;
1354 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1355 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1356
1357 for (i = 0; i < add; i++) {
1358 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1359 "(Socket=%d channel=%d dimm=%d)",
1360 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1361
1362 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1363 kfree (msg);
1364 }
1365}
1366
1367static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1368 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1369{
1370 struct i7core_pvt *pvt = mci->pvt_info;
1371 int add0 = 0, add1 = 0, add2 = 0;
1372 /* Updates CE counters if it is not the first time here */
f4742949 1373 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1374 /* Updates CE counters */
1375
f4742949
MCC
1376 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1377 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1378 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1379
1380 if (add2 < 0)
1381 add2 += 0x7fff;
f4742949 1382 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1383
1384 if (add1 < 0)
1385 add1 += 0x7fff;
f4742949 1386 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1387
1388 if (add0 < 0)
1389 add0 += 0x7fff;
f4742949 1390 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1391 } else
f4742949 1392 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1393
1394 /* Store the new values */
f4742949
MCC
1395 pvt->rdimm_last_ce_count[chan][2] = new2;
1396 pvt->rdimm_last_ce_count[chan][1] = new1;
1397 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1398
1399 /*updated the edac core */
1400 if (add0 != 0)
f4742949 1401 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1402 if (add1 != 0)
f4742949 1403 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1404 if (add2 != 0)
f4742949 1405 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1406
1407}
1408
f4742949 1409static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1410{
1411 struct i7core_pvt *pvt = mci->pvt_info;
1412 u32 rcv[3][2];
1413 int i, new0, new1, new2;
1414
1415 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1416 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1417 &rcv[0][0]);
f4742949 1418 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1419 &rcv[0][1]);
f4742949 1420 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1421 &rcv[1][0]);
f4742949 1422 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1423 &rcv[1][1]);
f4742949 1424 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1425 &rcv[2][0]);
f4742949 1426 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1427 &rcv[2][1]);
1428 for (i = 0 ; i < 3; i++) {
1429 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1430 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1431 /*if the channel has 3 dimms*/
f4742949 1432 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1433 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1434 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1435 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1436 } else {
1437 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1438 DIMM_BOT_COR_ERR(rcv[i][0]);
1439 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1440 DIMM_BOT_COR_ERR(rcv[i][1]);
1441 new2 = 0;
1442 }
1443
f4742949 1444 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1445 }
1446}
442305b1
MCC
1447
1448/* This function is based on the device 3 function 4 registers as described on:
1449 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1450 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1451 * also available at:
1452 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1453 */
f4742949 1454static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1455{
1456 struct i7core_pvt *pvt = mci->pvt_info;
1457 u32 rcv1, rcv0;
1458 int new0, new1, new2;
1459
f4742949 1460 if (!pvt->pci_mcr[4]) {
b990538a 1461 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1462 return;
1463 }
1464
b4e8f0b6 1465 /* Corrected test errors */
f4742949
MCC
1466 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1467 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1468
1469 /* Store the new values */
1470 new2 = DIMM2_COR_ERR(rcv1);
1471 new1 = DIMM1_COR_ERR(rcv0);
1472 new0 = DIMM0_COR_ERR(rcv0);
1473
442305b1 1474 /* Updates CE counters if it is not the first time here */
f4742949 1475 if (pvt->ce_count_available) {
442305b1
MCC
1476 /* Updates CE counters */
1477 int add0, add1, add2;
1478
f4742949
MCC
1479 add2 = new2 - pvt->udimm_last_ce_count[2];
1480 add1 = new1 - pvt->udimm_last_ce_count[1];
1481 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1482
1483 if (add2 < 0)
1484 add2 += 0x7fff;
f4742949 1485 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1486
1487 if (add1 < 0)
1488 add1 += 0x7fff;
f4742949 1489 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1490
1491 if (add0 < 0)
1492 add0 += 0x7fff;
f4742949 1493 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1494
1495 if (add0 | add1 | add2)
1496 i7core_printk(KERN_ERR, "New Corrected error(s): "
1497 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1498 add0, add1, add2);
442305b1 1499 } else
f4742949 1500 pvt->ce_count_available = 1;
442305b1
MCC
1501
1502 /* Store the new values */
f4742949
MCC
1503 pvt->udimm_last_ce_count[2] = new2;
1504 pvt->udimm_last_ce_count[1] = new1;
1505 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1506}
1507
8a2f118e
MCC
1508/*
1509 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1510 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1511 * Nehalem are defined as family 0x06, model 0x1a
1512 *
1513 * The MCA registers used here are the following ones:
8a2f118e 1514 * struct mce field MCA Register
f237fcf2
MCC
1515 * m->status MSR_IA32_MC8_STATUS
1516 * m->addr MSR_IA32_MC8_ADDR
1517 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1518 * In the case of Nehalem, the error information is masked at .status and .misc
1519 * fields
1520 */
d5381642
MCC
1521static void i7core_mce_output_error(struct mem_ctl_info *mci,
1522 struct mce *m)
1523{
b4e8f0b6 1524 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1525 char *type, *optype, *err, *msg;
8a2f118e 1526 unsigned long error = m->status & 0x1ff0000l;
a639539f 1527 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1528 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1529 u32 dimm = (m->misc >> 16) & 0x3;
1530 u32 channel = (m->misc >> 18) & 0x3;
1531 u32 syndrome = m->misc >> 32;
1532 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1533 int csrow;
8a2f118e 1534
c5d34528
MCC
1535 if (m->mcgstatus & 1)
1536 type = "FATAL";
1537 else
1538 type = "NON_FATAL";
1539
a639539f 1540 switch (optypenum) {
b990538a
MCC
1541 case 0:
1542 optype = "generic undef request";
1543 break;
1544 case 1:
1545 optype = "read error";
1546 break;
1547 case 2:
1548 optype = "write error";
1549 break;
1550 case 3:
1551 optype = "addr/cmd error";
1552 break;
1553 case 4:
1554 optype = "scrubbing error";
1555 break;
1556 default:
1557 optype = "reserved";
1558 break;
a639539f
MCC
1559 }
1560
8a2f118e
MCC
1561 switch (errnum) {
1562 case 16:
1563 err = "read ECC error";
1564 break;
1565 case 17:
1566 err = "RAS ECC error";
1567 break;
1568 case 18:
1569 err = "write parity error";
1570 break;
1571 case 19:
1572 err = "redundacy loss";
1573 break;
1574 case 20:
1575 err = "reserved";
1576 break;
1577 case 21:
1578 err = "memory range error";
1579 break;
1580 case 22:
1581 err = "RTID out of range";
1582 break;
1583 case 23:
1584 err = "address parity error";
1585 break;
1586 case 24:
1587 err = "byte enable parity error";
1588 break;
1589 default:
1590 err = "unknown";
d5381642 1591 }
d5381642 1592
f237fcf2 1593 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1594 msg = kasprintf(GFP_ATOMIC,
f4742949 1595 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1596 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1597 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1598 syndrome, core_err_cnt, (long long)m->status,
1599 (long long)m->misc, optype, err);
8a2f118e
MCC
1600
1601 debugf0("%s", msg);
d5381642 1602
f4742949 1603 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1604
d5381642 1605 /* Call the helper to output message */
b4e8f0b6
MCC
1606 if (m->mcgstatus & 1)
1607 edac_mc_handle_fbd_ue(mci, csrow, 0,
1608 0 /* FIXME: should be channel here */, msg);
f4742949 1609 else if (!pvt->is_registered)
b4e8f0b6
MCC
1610 edac_mc_handle_fbd_ce(mci, csrow,
1611 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1612
1613 kfree(msg);
d5381642
MCC
1614}
1615
87d1d272
MCC
1616/*
1617 * i7core_check_error Retrieve and process errors reported by the
1618 * hardware. Called by the Core module.
1619 */
1620static void i7core_check_error(struct mem_ctl_info *mci)
1621{
d5381642
MCC
1622 struct i7core_pvt *pvt = mci->pvt_info;
1623 int i;
1624 unsigned count = 0;
ca9c90ba 1625 struct mce *m;
d5381642 1626
ca9c90ba
MCC
1627 /*
1628 * MCE first step: Copy all mce errors into a temporary buffer
1629 * We use a double buffering here, to reduce the risk of
1630 * loosing an error.
1631 */
1632 smp_rmb();
321ece4d
MCC
1633 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1634 % MCE_LOG_LEN;
ca9c90ba
MCC
1635 if (!count)
1636 return;
f4742949 1637
ca9c90ba 1638 m = pvt->mce_outentry;
321ece4d
MCC
1639 if (pvt->mce_in + count > MCE_LOG_LEN) {
1640 unsigned l = MCE_LOG_LEN - pvt->mce_in;
f4742949 1641
ca9c90ba
MCC
1642 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1643 smp_wmb();
1644 pvt->mce_in = 0;
1645 count -= l;
1646 m += l;
1647 }
1648 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1649 smp_wmb();
1650 pvt->mce_in += count;
1651
1652 smp_rmb();
1653 if (pvt->mce_overrun) {
1654 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1655 pvt->mce_overrun);
1656 smp_wmb();
1657 pvt->mce_overrun = 0;
1658 }
d5381642 1659
ca9c90ba
MCC
1660 /*
1661 * MCE second step: parse errors and display
1662 */
d5381642 1663 for (i = 0; i < count; i++)
ca9c90ba 1664 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1665
ca9c90ba
MCC
1666 /*
1667 * Now, let's increment CE error counts
1668 */
f4742949
MCC
1669 if (!pvt->is_registered)
1670 i7core_udimm_check_mc_ecc_err(mci);
1671 else
1672 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1673}
1674
d5381642
MCC
1675/*
1676 * i7core_mce_check_error Replicates mcelog routine to get errors
1677 * This routine simply queues mcelog errors, and
1678 * return. The error itself should be handled later
1679 * by i7core_check_error.
6e103be1
MCC
1680 * WARNING: As this routine should be called at NMI time, extra care should
1681 * be taken to avoid deadlocks, and to be as fast as possible.
d5381642
MCC
1682 */
1683static int i7core_mce_check_error(void *priv, struct mce *mce)
1684{
c5d34528
MCC
1685 struct mem_ctl_info *mci = priv;
1686 struct i7core_pvt *pvt = mci->pvt_info;
d5381642 1687
8a2f118e
MCC
1688 /*
1689 * Just let mcelog handle it if the error is
1690 * outside the memory controller
1691 */
1692 if (((mce->status & 0xffff) >> 7) != 1)
1693 return 0;
1694
f237fcf2
MCC
1695 /* Bank 8 registers are the only ones that we know how to handle */
1696 if (mce->bank != 8)
1697 return 0;
1698
f4742949 1699 /* Only handle if it is the right mc controller */
6e103be1 1700 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
f4742949
MCC
1701 return 0;
1702
ca9c90ba 1703 smp_rmb();
321ece4d 1704 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
ca9c90ba
MCC
1705 smp_wmb();
1706 pvt->mce_overrun++;
1707 return 0;
d5381642 1708 }
6e103be1
MCC
1709
1710 /* Copy memory error at the ringbuffer */
1711 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
ca9c90ba 1712 smp_wmb();
321ece4d 1713 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
d5381642 1714
c5d34528
MCC
1715 /* Handle fatal errors immediately */
1716 if (mce->mcgstatus & 1)
1717 i7core_check_error(mci);
1718
d5381642 1719 /* Advice mcelog that the error were handled */
8a2f118e 1720 return 1;
d5381642
MCC
1721}
1722
f4742949
MCC
1723static int i7core_register_mci(struct i7core_dev *i7core_dev,
1724 int num_channels, int num_csrows)
a0c36a1f
MCC
1725{
1726 struct mem_ctl_info *mci;
1727 struct i7core_pvt *pvt;
ba6c5c62 1728 int csrow = 0;
f4742949 1729 int rc;
a0c36a1f 1730
a0c36a1f 1731 /* allocate a new MC control structure */
d4c27795
MCC
1732 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1733 i7core_dev->socket);
f4742949
MCC
1734 if (unlikely(!mci))
1735 return -ENOMEM;
a0c36a1f
MCC
1736
1737 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1738
f4742949
MCC
1739 /* record ptr to the generic device */
1740 mci->dev = &i7core_dev->pdev[0]->dev;
1741
a0c36a1f 1742 pvt = mci->pvt_info;
ef708b53 1743 memset(pvt, 0, sizeof(*pvt));
67166af4 1744
41fcb7fe
MCC
1745 /*
1746 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1747 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1748 * memory channels
1749 */
1750 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1751 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1752 mci->edac_cap = EDAC_FLAG_NONE;
1753 mci->mod_name = "i7core_edac.c";
1754 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1755 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1756 i7core_dev->socket);
1757 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1758 mci->ctl_page_to_phys = NULL;
a5538e53 1759 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
87d1d272
MCC
1760 /* Set the function pointer to an actual operation function */
1761 mci->edac_check = i7core_check_error;
8f331907 1762
ef708b53 1763 /* Store pci devices at mci for faster access */
f4742949 1764 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1765 if (unlikely(rc < 0))
f4742949 1766 goto fail;
ef708b53
MCC
1767
1768 /* Get dimm basic config */
f4742949 1769 get_dimm_config(mci, &csrow);
ef708b53 1770
a0c36a1f 1771 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1772 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1773 debugf0("MC: " __FILE__
1774 ": %s(): failed edac_mc_add_mc()\n", __func__);
1775 /* FIXME: perhaps some code should go here that disables error
1776 * reporting if we just enabled it
1777 */
b7c76151
MCC
1778
1779 rc = -EINVAL;
f4742949 1780 goto fail;
a0c36a1f
MCC
1781 }
1782
1783 /* allocating generic PCI control info */
f4742949
MCC
1784 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1785 EDAC_MOD_STR);
41fcb7fe 1786 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1787 printk(KERN_WARNING
1788 "%s(): Unable to create PCI control\n",
1789 __func__);
1790 printk(KERN_WARNING
1791 "%s(): PCI error report via EDAC not setup\n",
1792 __func__);
1793 }
1794
194a40fe 1795 /* Default error mask is any memory */
ef708b53 1796 pvt->inject.channel = 0;
194a40fe
MCC
1797 pvt->inject.dimm = -1;
1798 pvt->inject.rank = -1;
1799 pvt->inject.bank = -1;
1800 pvt->inject.page = -1;
1801 pvt->inject.col = -1;
1802
d5381642 1803 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1804 pvt->edac_mce.priv = mci;
d5381642 1805 pvt->edac_mce.check_error = i7core_mce_check_error;
d5381642
MCC
1806
1807 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1808 if (unlikely(rc < 0)) {
d5381642
MCC
1809 debugf0("MC: " __FILE__
1810 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1811 }
1812
1813fail:
1814 edac_mc_free(mci);
1815 return rc;
1816}
1817
1818/*
1819 * i7core_probe Probe for ONE instance of device to see if it is
1820 * present.
1821 * return:
1822 * 0 for FOUND a device
1823 * < 0 for error code
1824 */
1825static int __devinit i7core_probe(struct pci_dev *pdev,
1826 const struct pci_device_id *id)
1827{
1828 int dev_idx = id->driver_data;
1829 int rc;
1830 struct i7core_dev *i7core_dev;
1831
1832 /*
d4c27795 1833 * All memory controllers are allocated at the first pass.
f4742949
MCC
1834 */
1835 if (unlikely(dev_idx >= 1))
1836 return -EINVAL;
1837
1838 /* get the pci devices we want to reserve for our use */
1839 mutex_lock(&i7core_edac_lock);
de06eeef
MCC
1840
1841 rc = i7core_get_devices(pci_dev_descr_i7core,
1842 ARRAY_SIZE(pci_dev_descr_i7core));
f4742949
MCC
1843 if (unlikely(rc < 0))
1844 goto fail0;
1845
1846 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1847 int channels;
1848 int csrows;
1849
1850 /* Check the number of active and not disabled channels */
1851 rc = i7core_get_active_channels(i7core_dev->socket,
1852 &channels, &csrows);
1853 if (unlikely(rc < 0))
1854 goto fail1;
1855
d4c27795
MCC
1856 rc = i7core_register_mci(i7core_dev, channels, csrows);
1857 if (unlikely(rc < 0))
1858 goto fail1;
d5381642
MCC
1859 }
1860
ef708b53 1861 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1862
66607706 1863 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1864 return 0;
1865
66607706 1866fail1:
13d6e9b6 1867 i7core_put_all_devices();
66607706
MCC
1868fail0:
1869 mutex_unlock(&i7core_edac_lock);
b7c76151 1870 return rc;
a0c36a1f
MCC
1871}
1872
1873/*
1874 * i7core_remove destructor for one instance of device
1875 *
1876 */
1877static void __devexit i7core_remove(struct pci_dev *pdev)
1878{
1879 struct mem_ctl_info *mci;
22e6bcbd 1880 struct i7core_dev *i7core_dev, *tmp;
a0c36a1f
MCC
1881
1882 debugf0(__FILE__ ": %s()\n", __func__);
1883
1884 if (i7core_pci)
1885 edac_pci_release_generic_ctl(i7core_pci);
1886
22e6bcbd
MCC
1887 /*
1888 * we have a trouble here: pdev value for removal will be wrong, since
1889 * it will point to the X58 register used to detect that the machine
1890 * is a Nehalem or upper design. However, due to the way several PCI
1891 * devices are grouped together to provide MC functionality, we need
1892 * to use a different method for releasing the devices
1893 */
87d1d272 1894
66607706 1895 mutex_lock(&i7core_edac_lock);
22e6bcbd
MCC
1896 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1897 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1898 if (mci) {
1899 struct i7core_pvt *pvt = mci->pvt_info;
1900
1901 i7core_dev = pvt->i7core_dev;
1902 edac_mce_unregister(&pvt->edac_mce);
1903 kfree(mci->ctl_name);
1904 edac_mc_free(mci);
1905 i7core_put_devices(i7core_dev);
1906 } else {
1907 i7core_printk(KERN_ERR,
1908 "Couldn't find mci for socket %d\n",
1909 i7core_dev->socket);
1910 }
1911 }
66607706 1912 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1913}
1914
a0c36a1f
MCC
1915MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1916
1917/*
1918 * i7core_driver pci_driver structure for this module
1919 *
1920 */
1921static struct pci_driver i7core_driver = {
1922 .name = "i7core_edac",
1923 .probe = i7core_probe,
1924 .remove = __devexit_p(i7core_remove),
1925 .id_table = i7core_pci_tbl,
1926};
1927
1928/*
1929 * i7core_init Module entry function
1930 * Try to initialize this module for its devices
1931 */
1932static int __init i7core_init(void)
1933{
1934 int pci_rc;
1935
1936 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1937
1938 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1939 opstate_init();
1940
de06eeef 1941 i7core_xeon_pci_fixup(pci_dev_descr_i7core[0].dev_id);
bc2d7245 1942
a0c36a1f
MCC
1943 pci_rc = pci_register_driver(&i7core_driver);
1944
3ef288a9
MCC
1945 if (pci_rc >= 0)
1946 return 0;
1947
1948 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1949 pci_rc);
1950
1951 return pci_rc;
a0c36a1f
MCC
1952}
1953
1954/*
1955 * i7core_exit() Module exit function
1956 * Unregister the driver
1957 */
1958static void __exit i7core_exit(void)
1959{
1960 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1961 pci_unregister_driver(&i7core_driver);
1962}
1963
1964module_init(i7core_init);
1965module_exit(i7core_exit);
1966
1967MODULE_LICENSE("GPL");
1968MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1969MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1970MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1971 I7CORE_REVISION);
1972
1973module_param(edac_op_state, int, 0444);
1974MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
This page took 0.140151 seconds and 5 git commands to generate.