1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/delay.h>
29 #include <linux/edac.h>
30 #include <linux/mmzone.h>
31 #include <linux/edac_mce.h>
32 #include <linux/smp.h>
33 #include <asm/processor.h>
35 #include "edac_core.h"
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
43 #define MAX_SOCKET_BUSES 2
47 * Alter this version for the module when modifications are made
49 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50 #define EDAC_MOD_STR "i7core_edac"
55 #define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
58 #define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
62 * i7core Memory Controller Registers
65 /* OFFSETS for Device 0 Function 0 */
67 #define MC_CFG_CONTROL 0x90
69 /* OFFSETS for Device 3 Function 0 */
71 #define MC_CONTROL 0x48
72 #define MC_STATUS 0x4c
73 #define MC_MAX_DOD 0x64
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
80 #define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
83 #define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
87 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88 #define MC_COR_ECC_CNT_0 0x80
89 #define MC_COR_ECC_CNT_1 0x84
90 #define MC_COR_ECC_CNT_2 0x88
91 #define MC_COR_ECC_CNT_3 0x8c
92 #define MC_COR_ECC_CNT_4 0x90
93 #define MC_COR_ECC_CNT_5 0x94
95 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
101 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
107 #define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
111 #define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
114 #define MC_CHANNEL_ADDR_MATCH 0xf0
115 #define MC_CHANNEL_ERROR_MASK 0xf8
116 #define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
128 #define MC_DOD_CH_DIMM0 0x48
129 #define MC_DOD_CH_DIMM1 0x4c
130 #define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
144 #define MC_RANK_PRESENT 0x7c
146 #define MC_SAG_CH_0 0x80
147 #define MC_SAG_CH_1 0x84
148 #define MC_SAG_CH_2 0x88
149 #define MC_SAG_CH_3 0x8c
150 #define MC_SAG_CH_4 0x90
151 #define MC_SAG_CH_5 0x94
152 #define MC_SAG_CH_6 0x98
153 #define MC_SAG_CH_7 0x9c
155 #define MC_RIR_LIMIT_CH_0 0x40
156 #define MC_RIR_LIMIT_CH_1 0x44
157 #define MC_RIR_LIMIT_CH_2 0x48
158 #define MC_RIR_LIMIT_CH_3 0x4C
159 #define MC_RIR_LIMIT_CH_4 0x50
160 #define MC_RIR_LIMIT_CH_5 0x54
161 #define MC_RIR_LIMIT_CH_6 0x58
162 #define MC_RIR_LIMIT_CH_7 0x5C
163 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
165 #define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
174 #define MAX_DIMMS 3 /* Max DIMMS per channel */
175 #define MAX_MCR_FUNC 4
176 #define MAX_CHAN_FUNC 3
186 struct i7core_inject {
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
197 struct i7core_channel {
202 struct pci_id_descr {
210 struct list_head list;
212 struct pci_dev **pdev;
214 struct mem_ctl_info *mci;
218 struct pci_dev *pci_noncore;
219 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
220 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
222 struct i7core_dev *i7core_dev;
224 struct i7core_info info;
225 struct i7core_inject inject;
226 struct i7core_channel channel[NUM_CHANS];
228 int channels; /* Number of active channels */
230 int ce_count_available;
231 int csrow_map[NUM_CHANS][MAX_DIMMS];
233 /* ECC corrected errors counts per udimm */
234 unsigned long udimm_ce_count[MAX_DIMMS];
235 int udimm_last_ce_count[MAX_DIMMS];
236 /* ECC corrected errors counts per rdimm */
237 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
238 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
240 unsigned int is_registered;
243 struct edac_mce edac_mce;
245 /* Fifo double buffers */
246 struct mce mce_entry[MCE_LOG_LEN];
247 struct mce mce_outentry[MCE_LOG_LEN];
249 /* Fifo in/out counters */
250 unsigned mce_in, mce_out;
252 /* Count indicator to show errors not got */
253 unsigned mce_overrun;
257 static LIST_HEAD(i7core_edac_list);
258 static DEFINE_MUTEX(i7core_edac_lock);
260 #define PCI_DESCR(device, function, device_id) \
262 .func = (function), \
263 .dev_id = (device_id)
265 struct pci_id_descr pci_dev_descr_i7core[] = {
266 /* Memory controller */
267 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
268 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
269 /* Exists only for RDIMM */
270 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
271 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
274 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
275 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
276 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
277 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
280 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
281 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
282 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
283 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
286 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
287 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
288 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
289 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
291 /* Generic Non-core registers */
293 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
294 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
295 * the probing code needs to test for the other address in case of
296 * failure of this one
298 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
302 struct pci_id_descr pci_dev_descr_lynnfield[] = {
303 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
304 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
305 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
307 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
308 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
309 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
310 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
312 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
313 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
314 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
315 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
318 * This is the PCI device has an alternate address on some
319 * processors like Core i7 860
321 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
325 * pci_device_id table for which devices we are looking for
327 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
328 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
329 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
330 {0,} /* 0 terminated list. */
333 static struct edac_pci_ctl_info *i7core_pci;
335 /****************************************************************************
336 Anciliary status routines
337 ****************************************************************************/
339 /* MC_CONTROL bits */
340 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
341 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
344 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
345 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
347 /* MC_MAX_DOD read functions */
348 static inline int numdimms(u32 dimms)
350 return (dimms & 0x3) + 1;
353 static inline int numrank(u32 rank)
355 static int ranks[4] = { 1, 2, 4, -EINVAL };
357 return ranks[rank & 0x3];
360 static inline int numbank(u32 bank)
362 static int banks[4] = { 4, 8, 16, -EINVAL };
364 return banks[bank & 0x3];
367 static inline int numrow(u32 row)
369 static int rows[8] = {
370 1 << 12, 1 << 13, 1 << 14, 1 << 15,
371 1 << 16, -EINVAL, -EINVAL, -EINVAL,
374 return rows[row & 0x7];
377 static inline int numcol(u32 col)
379 static int cols[8] = {
380 1 << 10, 1 << 11, 1 << 12, -EINVAL,
382 return cols[col & 0x3];
385 static struct i7core_dev *get_i7core_dev(u8 socket)
387 struct i7core_dev *i7core_dev;
389 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
390 if (i7core_dev->socket == socket)
397 /****************************************************************************
398 Memory check routines
399 ****************************************************************************/
400 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
403 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
409 for (i = 0; i < i7core_dev->n_devs; i++) {
410 if (!i7core_dev->pdev[i])
413 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
414 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
415 return i7core_dev->pdev[i];
423 * i7core_get_active_channels() - gets the number of channels and csrows
424 * @socket: Quick Path Interconnect socket
425 * @channels: Number of channels that will be returned
426 * @csrows: Number of csrows found
428 * Since EDAC core needs to know in advance the number of available channels
429 * and csrows, in order to allocate memory for csrows/channels, it is needed
430 * to run two similar steps. At the first step, implemented on this function,
431 * it checks the number of csrows/channels present at one socket.
432 * this is used in order to properly allocate the size of mci components.
434 * It should be noticed that none of the current available datasheets explain
435 * or even mention how csrows are seen by the memory controller. So, we need
436 * to add a fake description for csrows.
437 * So, this driver is attributing one DIMM memory for one csrow.
439 static int i7core_get_active_channels(u8 socket, unsigned *channels,
442 struct pci_dev *pdev = NULL;
449 pdev = get_pdev_slot_func(socket, 3, 0);
451 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
456 /* Device 3 function 0 reads */
457 pci_read_config_dword(pdev, MC_STATUS, &status);
458 pci_read_config_dword(pdev, MC_CONTROL, &control);
460 for (i = 0; i < NUM_CHANS; i++) {
462 /* Check if the channel is active */
463 if (!(control & (1 << (8 + i))))
466 /* Check if the channel is disabled */
467 if (status & (1 << i))
470 pdev = get_pdev_slot_func(socket, i + 4, 1);
472 i7core_printk(KERN_ERR, "Couldn't find socket %d "
477 /* Devices 4-6 function 1 */
478 pci_read_config_dword(pdev,
479 MC_DOD_CH_DIMM0, &dimm_dod[0]);
480 pci_read_config_dword(pdev,
481 MC_DOD_CH_DIMM1, &dimm_dod[1]);
482 pci_read_config_dword(pdev,
483 MC_DOD_CH_DIMM2, &dimm_dod[2]);
487 for (j = 0; j < 3; j++) {
488 if (!DIMM_PRESENT(dimm_dod[j]))
494 debugf0("Number of active channels on socket %d: %d\n",
500 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
502 struct i7core_pvt *pvt = mci->pvt_info;
503 struct csrow_info *csr;
504 struct pci_dev *pdev;
506 unsigned long last_page = 0;
510 /* Get data from the MC register, function 0 */
511 pdev = pvt->pci_mcr[0];
515 /* Device 3 function 0 reads */
516 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
517 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
518 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
519 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
521 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
522 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
523 pvt->info.max_dod, pvt->info.ch_map);
525 if (ECC_ENABLED(pvt)) {
526 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
528 mode = EDAC_S8ECD8ED;
530 mode = EDAC_S4ECD4ED;
532 debugf0("ECC disabled\n");
536 /* FIXME: need to handle the error codes */
537 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
539 numdimms(pvt->info.max_dod),
540 numrank(pvt->info.max_dod >> 2),
541 numbank(pvt->info.max_dod >> 4),
542 numrow(pvt->info.max_dod >> 6),
543 numcol(pvt->info.max_dod >> 9));
545 for (i = 0; i < NUM_CHANS; i++) {
546 u32 data, dimm_dod[3], value[8];
548 if (!pvt->pci_ch[i][0])
551 if (!CH_ACTIVE(pvt, i)) {
552 debugf0("Channel %i is not active\n", i);
555 if (CH_DISABLED(pvt, i)) {
556 debugf0("Channel %i is disabled\n", i);
560 /* Devices 4-6 function 0 */
561 pci_read_config_dword(pvt->pci_ch[i][0],
562 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
564 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
567 if (data & REGISTERED_DIMM)
572 if (data & THREE_DIMMS_PRESENT)
573 pvt->channel[i].dimms = 3;
574 else if (data & SINGLE_QUAD_RANK_PRESENT)
575 pvt->channel[i].dimms = 1;
577 pvt->channel[i].dimms = 2;
580 /* Devices 4-6 function 1 */
581 pci_read_config_dword(pvt->pci_ch[i][1],
582 MC_DOD_CH_DIMM0, &dimm_dod[0]);
583 pci_read_config_dword(pvt->pci_ch[i][1],
584 MC_DOD_CH_DIMM1, &dimm_dod[1]);
585 pci_read_config_dword(pvt->pci_ch[i][1],
586 MC_DOD_CH_DIMM2, &dimm_dod[2]);
588 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
589 "%d ranks, %cDIMMs\n",
591 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
593 pvt->channel[i].ranks,
594 (data & REGISTERED_DIMM) ? 'R' : 'U');
596 for (j = 0; j < 3; j++) {
597 u32 banks, ranks, rows, cols;
600 if (!DIMM_PRESENT(dimm_dod[j]))
603 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
604 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
605 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
606 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
608 /* DDR3 has 8 I/O banks */
609 size = (rows * cols * banks * ranks) >> (20 - 3);
611 pvt->channel[i].dimms++;
613 debugf0("\tdimm %d %d Mb offset: %x, "
614 "bank: %d, rank: %d, row: %#x, col: %#x\n",
616 RANKOFFSET(dimm_dod[j]),
617 banks, ranks, rows, cols);
620 npages = size >> (PAGE_SHIFT - 20);
622 npages = size << (20 - PAGE_SHIFT);
625 csr = &mci->csrows[*csrow];
626 csr->first_page = last_page + 1;
628 csr->last_page = last_page;
629 csr->nr_pages = npages;
633 csr->csrow_idx = *csrow;
634 csr->nr_channels = 1;
636 csr->channels[0].chan_idx = i;
637 csr->channels[0].ce_count = 0;
639 pvt->csrow_map[i][j] = *csrow;
649 csr->dtype = DEV_X16;
652 csr->dtype = DEV_UNKNOWN;
655 csr->edac_mode = mode;
661 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
662 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
663 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
664 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
665 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
666 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
667 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
668 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
669 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
670 for (j = 0; j < 8; j++)
671 debugf1("\t\t%#x\t%#x\t%#x\n",
672 (value[j] >> 27) & 0x1,
673 (value[j] >> 24) & 0x7,
674 (value[j] && ((1 << 24) - 1)));
680 /****************************************************************************
681 Error insertion routines
682 ****************************************************************************/
684 /* The i7core has independent error injection features per channel.
685 However, to have a simpler code, we don't allow enabling error injection
686 on more than one channel.
687 Also, since a change at an inject parameter will be applied only at enable,
688 we're disabling error injection on all write calls to the sysfs nodes that
689 controls the error code injection.
691 static int disable_inject(struct mem_ctl_info *mci)
693 struct i7core_pvt *pvt = mci->pvt_info;
695 pvt->inject.enable = 0;
697 if (!pvt->pci_ch[pvt->inject.channel][0])
700 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
701 MC_CHANNEL_ERROR_INJECT, 0);
707 * i7core inject inject.section
709 * accept and store error injection inject.section value
710 * bit 0 - refers to the lower 32-byte half cacheline
711 * bit 1 - refers to the upper 32-byte half cacheline
713 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
714 const char *data, size_t count)
716 struct i7core_pvt *pvt = mci->pvt_info;
720 if (pvt->inject.enable)
723 rc = strict_strtoul(data, 10, &value);
724 if ((rc < 0) || (value > 3))
727 pvt->inject.section = (u32) value;
731 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
734 struct i7core_pvt *pvt = mci->pvt_info;
735 return sprintf(data, "0x%08x\n", pvt->inject.section);
741 * accept and store error injection inject.section value
742 * bit 0 - repeat enable - Enable error repetition
743 * bit 1 - inject ECC error
744 * bit 2 - inject parity error
746 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
747 const char *data, size_t count)
749 struct i7core_pvt *pvt = mci->pvt_info;
753 if (pvt->inject.enable)
756 rc = strict_strtoul(data, 10, &value);
757 if ((rc < 0) || (value > 7))
760 pvt->inject.type = (u32) value;
764 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
767 struct i7core_pvt *pvt = mci->pvt_info;
768 return sprintf(data, "0x%08x\n", pvt->inject.type);
772 * i7core_inject_inject.eccmask_store
774 * The type of error (UE/CE) will depend on the inject.eccmask value:
775 * Any bits set to a 1 will flip the corresponding ECC bit
776 * Correctable errors can be injected by flipping 1 bit or the bits within
777 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
778 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
779 * uncorrectable error to be injected.
781 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
782 const char *data, size_t count)
784 struct i7core_pvt *pvt = mci->pvt_info;
788 if (pvt->inject.enable)
791 rc = strict_strtoul(data, 10, &value);
795 pvt->inject.eccmask = (u32) value;
799 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
802 struct i7core_pvt *pvt = mci->pvt_info;
803 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
809 * The type of error (UE/CE) will depend on the inject.eccmask value:
810 * Any bits set to a 1 will flip the corresponding ECC bit
811 * Correctable errors can be injected by flipping 1 bit or the bits within
812 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
813 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
814 * uncorrectable error to be injected.
817 #define DECLARE_ADDR_MATCH(param, limit) \
818 static ssize_t i7core_inject_store_##param( \
819 struct mem_ctl_info *mci, \
820 const char *data, size_t count) \
822 struct i7core_pvt *pvt; \
826 debugf1("%s()\n", __func__); \
827 pvt = mci->pvt_info; \
829 if (pvt->inject.enable) \
830 disable_inject(mci); \
832 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
835 rc = strict_strtoul(data, 10, &value); \
836 if ((rc < 0) || (value >= limit)) \
840 pvt->inject.param = value; \
845 static ssize_t i7core_inject_show_##param( \
846 struct mem_ctl_info *mci, \
849 struct i7core_pvt *pvt; \
851 pvt = mci->pvt_info; \
852 debugf1("%s() pvt=%p\n", __func__, pvt); \
853 if (pvt->inject.param < 0) \
854 return sprintf(data, "any\n"); \
856 return sprintf(data, "%d\n", pvt->inject.param);\
859 #define ATTR_ADDR_MATCH(param) \
863 .mode = (S_IRUGO | S_IWUSR) \
865 .show = i7core_inject_show_##param, \
866 .store = i7core_inject_store_##param, \
869 DECLARE_ADDR_MATCH(channel, 3);
870 DECLARE_ADDR_MATCH(dimm, 3);
871 DECLARE_ADDR_MATCH(rank, 4);
872 DECLARE_ADDR_MATCH(bank, 32);
873 DECLARE_ADDR_MATCH(page, 0x10000);
874 DECLARE_ADDR_MATCH(col, 0x4000);
876 static int write_and_test(struct pci_dev *dev, int where, u32 val)
881 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
882 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885 for (count = 0; count < 10; count++) {
888 pci_write_config_dword(dev, where, val);
889 pci_read_config_dword(dev, where, &read);
895 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
896 "write=%08x. Read=%08x\n",
897 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
904 * This routine prepares the Memory Controller for error injection.
905 * The error will be injected when some process tries to write to the
906 * memory that matches the given criteria.
907 * The criteria can be set in terms of a mask where dimm, rank, bank, page
908 * and col can be specified.
909 * A -1 value for any of the mask items will make the MCU to ignore
910 * that matching criteria for error injection.
912 * It should be noticed that the error will only happen after a write operation
913 * on a memory that matches the condition. if REPEAT_EN is not enabled at
914 * inject mask, then it will produce just one error. Otherwise, it will repeat
915 * until the injectmask would be cleaned.
917 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
918 * is reliable enough to check if the MC is using the
919 * three channels. However, this is not clear at the datasheet.
921 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
922 const char *data, size_t count)
924 struct i7core_pvt *pvt = mci->pvt_info;
930 if (!pvt->pci_ch[pvt->inject.channel][0])
933 rc = strict_strtoul(data, 10, &enable);
938 pvt->inject.enable = 1;
944 /* Sets pvt->inject.dimm mask */
945 if (pvt->inject.dimm < 0)
948 if (pvt->channel[pvt->inject.channel].dimms > 2)
949 mask |= (pvt->inject.dimm & 0x3LL) << 35;
951 mask |= (pvt->inject.dimm & 0x1LL) << 36;
954 /* Sets pvt->inject.rank mask */
955 if (pvt->inject.rank < 0)
958 if (pvt->channel[pvt->inject.channel].dimms > 2)
959 mask |= (pvt->inject.rank & 0x1LL) << 34;
961 mask |= (pvt->inject.rank & 0x3LL) << 34;
964 /* Sets pvt->inject.bank mask */
965 if (pvt->inject.bank < 0)
968 mask |= (pvt->inject.bank & 0x15LL) << 30;
970 /* Sets pvt->inject.page mask */
971 if (pvt->inject.page < 0)
974 mask |= (pvt->inject.page & 0xffff) << 14;
976 /* Sets pvt->inject.column mask */
977 if (pvt->inject.col < 0)
980 mask |= (pvt->inject.col & 0x3fff);
984 * bits 1-2: MASK_HALF_CACHELINE
986 * bit 4: INJECT_ADDR_PARITY
989 injectmask = (pvt->inject.type & 1) |
990 (pvt->inject.section & 0x3) << 1 |
991 (pvt->inject.type & 0x6) << (3 - 1);
993 /* Unlock writes to registers - this register is write only */
994 pci_write_config_dword(pvt->pci_noncore,
995 MC_CFG_CONTROL, 0x2);
997 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
998 MC_CHANNEL_ADDR_MATCH, mask);
999 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1000 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1002 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1003 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1005 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1006 MC_CHANNEL_ERROR_INJECT, injectmask);
1009 * This is something undocumented, based on my tests
1010 * Without writing 8 to this register, errors aren't injected. Not sure
1013 pci_write_config_dword(pvt->pci_noncore,
1016 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1018 mask, pvt->inject.eccmask, injectmask);
1024 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1027 struct i7core_pvt *pvt = mci->pvt_info;
1030 if (!pvt->pci_ch[pvt->inject.channel][0])
1033 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1034 MC_CHANNEL_ERROR_INJECT, &injectmask);
1036 debugf0("Inject error read: 0x%018x\n", injectmask);
1038 if (injectmask & 0x0c)
1039 pvt->inject.enable = 1;
1041 return sprintf(data, "%d\n", pvt->inject.enable);
1044 #define DECLARE_COUNTER(param) \
1045 static ssize_t i7core_show_counter_##param( \
1046 struct mem_ctl_info *mci, \
1049 struct i7core_pvt *pvt = mci->pvt_info; \
1051 debugf1("%s() \n", __func__); \
1052 if (!pvt->ce_count_available || (pvt->is_registered)) \
1053 return sprintf(data, "data unavailable\n"); \
1054 return sprintf(data, "%lu\n", \
1055 pvt->udimm_ce_count[param]); \
1058 #define ATTR_COUNTER(param) \
1061 .name = __stringify(udimm##param), \
1062 .mode = (S_IRUGO | S_IWUSR) \
1064 .show = i7core_show_counter_##param \
1076 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1077 ATTR_ADDR_MATCH(channel),
1078 ATTR_ADDR_MATCH(dimm),
1079 ATTR_ADDR_MATCH(rank),
1080 ATTR_ADDR_MATCH(bank),
1081 ATTR_ADDR_MATCH(page),
1082 ATTR_ADDR_MATCH(col),
1083 { .attr = { .name = NULL } }
1086 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1087 .name = "inject_addrmatch",
1088 .mcidev_attr = i7core_addrmatch_attrs,
1091 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1097 static struct mcidev_sysfs_group i7core_udimm_counters = {
1098 .name = "all_channel_counts",
1099 .mcidev_attr = i7core_udimm_counters_attrs,
1102 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1105 .name = "inject_section",
1106 .mode = (S_IRUGO | S_IWUSR)
1108 .show = i7core_inject_section_show,
1109 .store = i7core_inject_section_store,
1112 .name = "inject_type",
1113 .mode = (S_IRUGO | S_IWUSR)
1115 .show = i7core_inject_type_show,
1116 .store = i7core_inject_type_store,
1119 .name = "inject_eccmask",
1120 .mode = (S_IRUGO | S_IWUSR)
1122 .show = i7core_inject_eccmask_show,
1123 .store = i7core_inject_eccmask_store,
1125 .grp = &i7core_inject_addrmatch,
1128 .name = "inject_enable",
1129 .mode = (S_IRUGO | S_IWUSR)
1131 .show = i7core_inject_enable_show,
1132 .store = i7core_inject_enable_store,
1134 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
1135 { .attr = { .name = NULL } }
1138 /****************************************************************************
1139 Device initialization routines: put/get, init/exit
1140 ****************************************************************************/
1143 * i7core_put_devices 'put' all the devices that we have
1144 * reserved via 'get'
1146 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1150 debugf0(__FILE__ ": %s()\n", __func__);
1151 for (i = 0; i < i7core_dev->n_devs; i++) {
1152 struct pci_dev *pdev = i7core_dev->pdev[i];
1155 debugf0("Removing dev %02x:%02x.%d\n",
1157 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1160 kfree(i7core_dev->pdev);
1161 list_del(&i7core_dev->list);
1165 static void i7core_put_all_devices(void)
1167 struct i7core_dev *i7core_dev, *tmp;
1169 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1170 i7core_put_devices(i7core_dev);
1173 static void i7core_xeon_pci_fixup(int dev_id)
1175 struct pci_dev *pdev = NULL;
1178 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1179 * aren't announced by acpi. So, we need to use a legacy scan probing
1182 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
1183 if (unlikely(!pdev)) {
1184 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1185 pcibios_scan_specific_bus(255-i);
1190 * i7core_get_devices Find and perform 'get' operation on the MCH's
1191 * device/functions we want to reference for this driver
1193 * Need to 'get' device 16 func 1 and func 2
1195 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1196 struct pci_id_descr *dev_descr, unsigned n_devs)
1198 struct i7core_dev *i7core_dev;
1200 struct pci_dev *pdev = NULL;
1204 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1205 dev_descr->dev_id, *prev);
1208 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1209 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1210 * to probe for the alternate address in case of failure
1212 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1213 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1214 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1216 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1217 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1218 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1227 if (dev_descr->optional)
1230 i7core_printk(KERN_ERR,
1231 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1232 dev_descr->dev, dev_descr->func,
1233 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1235 /* End of list, leave */
1238 bus = pdev->bus->number;
1245 i7core_dev = get_i7core_dev(socket);
1247 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1250 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1252 if (!i7core_dev->pdev)
1254 i7core_dev->socket = socket;
1255 i7core_dev->n_devs = n_devs;
1256 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1259 if (i7core_dev->pdev[devno]) {
1260 i7core_printk(KERN_ERR,
1261 "Duplicated device for "
1262 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1263 bus, dev_descr->dev, dev_descr->func,
1264 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1269 i7core_dev->pdev[devno] = pdev;
1272 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1273 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1274 i7core_printk(KERN_ERR,
1275 "Device PCI ID %04x:%04x "
1276 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1277 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1278 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1279 bus, dev_descr->dev, dev_descr->func);
1283 /* Be sure that the device is enabled */
1284 if (unlikely(pci_enable_device(pdev) < 0)) {
1285 i7core_printk(KERN_ERR,
1287 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1288 bus, dev_descr->dev, dev_descr->func,
1289 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1293 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1294 socket, bus, dev_descr->dev,
1296 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1303 static int i7core_get_devices(struct pci_id_descr dev_descr[], unsigned n_devs)
1306 struct pci_dev *pdev = NULL;
1308 for (i = 0; i < n_devs; i++) {
1311 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1314 i7core_put_all_devices();
1323 static int mci_bind_devs(struct mem_ctl_info *mci,
1324 struct i7core_dev *i7core_dev)
1326 struct i7core_pvt *pvt = mci->pvt_info;
1327 struct pci_dev *pdev;
1330 /* Associates i7core_dev and mci for future usage */
1331 pvt->i7core_dev = i7core_dev;
1332 i7core_dev->mci = mci;
1334 pvt->is_registered = 0;
1335 for (i = 0; i < i7core_dev->n_devs; i++) {
1336 pdev = i7core_dev->pdev[i];
1340 func = PCI_FUNC(pdev->devfn);
1341 slot = PCI_SLOT(pdev->devfn);
1343 if (unlikely(func > MAX_MCR_FUNC))
1345 pvt->pci_mcr[func] = pdev;
1346 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1347 if (unlikely(func > MAX_CHAN_FUNC))
1349 pvt->pci_ch[slot - 4][func] = pdev;
1350 } else if (!slot && !func)
1351 pvt->pci_noncore = pdev;
1355 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1356 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1357 pdev, i7core_dev->socket);
1359 if (PCI_SLOT(pdev->devfn) == 3 &&
1360 PCI_FUNC(pdev->devfn) == 2)
1361 pvt->is_registered = 1;
1365 * Add extra nodes to count errors on udimm
1366 * For registered memory, this is not needed, since the counters
1367 * are already displayed at the standard locations
1369 if (!pvt->is_registered)
1370 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1371 &i7core_udimm_counters;
1376 i7core_printk(KERN_ERR, "Device %d, function %d "
1377 "is out of the expected range\n",
1382 /****************************************************************************
1383 Error check routines
1384 ****************************************************************************/
1385 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1386 int chan, int dimm, int add)
1389 struct i7core_pvt *pvt = mci->pvt_info;
1390 int row = pvt->csrow_map[chan][dimm], i;
1392 for (i = 0; i < add; i++) {
1393 msg = kasprintf(GFP_KERNEL, "Corrected error "
1394 "(Socket=%d channel=%d dimm=%d)",
1395 pvt->i7core_dev->socket, chan, dimm);
1397 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1402 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1403 int chan, int new0, int new1, int new2)
1405 struct i7core_pvt *pvt = mci->pvt_info;
1406 int add0 = 0, add1 = 0, add2 = 0;
1407 /* Updates CE counters if it is not the first time here */
1408 if (pvt->ce_count_available) {
1409 /* Updates CE counters */
1411 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1412 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1413 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1417 pvt->rdimm_ce_count[chan][2] += add2;
1421 pvt->rdimm_ce_count[chan][1] += add1;
1425 pvt->rdimm_ce_count[chan][0] += add0;
1427 pvt->ce_count_available = 1;
1429 /* Store the new values */
1430 pvt->rdimm_last_ce_count[chan][2] = new2;
1431 pvt->rdimm_last_ce_count[chan][1] = new1;
1432 pvt->rdimm_last_ce_count[chan][0] = new0;
1434 /*updated the edac core */
1436 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1438 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1440 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1444 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1446 struct i7core_pvt *pvt = mci->pvt_info;
1448 int i, new0, new1, new2;
1450 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1451 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1453 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1455 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1457 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1459 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1461 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1463 for (i = 0 ; i < 3; i++) {
1464 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1465 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1466 /*if the channel has 3 dimms*/
1467 if (pvt->channel[i].dimms > 2) {
1468 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1469 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1470 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1472 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1473 DIMM_BOT_COR_ERR(rcv[i][0]);
1474 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1475 DIMM_BOT_COR_ERR(rcv[i][1]);
1479 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1483 /* This function is based on the device 3 function 4 registers as described on:
1484 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1485 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1486 * also available at:
1487 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1489 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1491 struct i7core_pvt *pvt = mci->pvt_info;
1493 int new0, new1, new2;
1495 if (!pvt->pci_mcr[4]) {
1496 debugf0("%s MCR registers not found\n", __func__);
1500 /* Corrected test errors */
1501 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1502 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1504 /* Store the new values */
1505 new2 = DIMM2_COR_ERR(rcv1);
1506 new1 = DIMM1_COR_ERR(rcv0);
1507 new0 = DIMM0_COR_ERR(rcv0);
1509 /* Updates CE counters if it is not the first time here */
1510 if (pvt->ce_count_available) {
1511 /* Updates CE counters */
1512 int add0, add1, add2;
1514 add2 = new2 - pvt->udimm_last_ce_count[2];
1515 add1 = new1 - pvt->udimm_last_ce_count[1];
1516 add0 = new0 - pvt->udimm_last_ce_count[0];
1520 pvt->udimm_ce_count[2] += add2;
1524 pvt->udimm_ce_count[1] += add1;
1528 pvt->udimm_ce_count[0] += add0;
1530 if (add0 | add1 | add2)
1531 i7core_printk(KERN_ERR, "New Corrected error(s): "
1532 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1535 pvt->ce_count_available = 1;
1537 /* Store the new values */
1538 pvt->udimm_last_ce_count[2] = new2;
1539 pvt->udimm_last_ce_count[1] = new1;
1540 pvt->udimm_last_ce_count[0] = new0;
1544 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1545 * Architectures Software Developer’s Manual Volume 3B.
1546 * Nehalem are defined as family 0x06, model 0x1a
1548 * The MCA registers used here are the following ones:
1549 * struct mce field MCA Register
1550 * m->status MSR_IA32_MC8_STATUS
1551 * m->addr MSR_IA32_MC8_ADDR
1552 * m->misc MSR_IA32_MC8_MISC
1553 * In the case of Nehalem, the error information is masked at .status and .misc
1556 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1559 struct i7core_pvt *pvt = mci->pvt_info;
1560 char *type, *optype, *err, *msg;
1561 unsigned long error = m->status & 0x1ff0000l;
1562 u32 optypenum = (m->status >> 4) & 0x07;
1563 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1564 u32 dimm = (m->misc >> 16) & 0x3;
1565 u32 channel = (m->misc >> 18) & 0x3;
1566 u32 syndrome = m->misc >> 32;
1567 u32 errnum = find_first_bit(&error, 32);
1570 if (m->mcgstatus & 1)
1575 switch (optypenum) {
1577 optype = "generic undef request";
1580 optype = "read error";
1583 optype = "write error";
1586 optype = "addr/cmd error";
1589 optype = "scrubbing error";
1592 optype = "reserved";
1598 err = "read ECC error";
1601 err = "RAS ECC error";
1604 err = "write parity error";
1607 err = "redundacy loss";
1613 err = "memory range error";
1616 err = "RTID out of range";
1619 err = "address parity error";
1622 err = "byte enable parity error";
1628 /* FIXME: should convert addr into bank and rank information */
1629 msg = kasprintf(GFP_ATOMIC,
1630 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1631 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1632 type, (long long) m->addr, m->cpu, dimm, channel,
1633 syndrome, core_err_cnt, (long long)m->status,
1634 (long long)m->misc, optype, err);
1638 csrow = pvt->csrow_map[channel][dimm];
1640 /* Call the helper to output message */
1641 if (m->mcgstatus & 1)
1642 edac_mc_handle_fbd_ue(mci, csrow, 0,
1643 0 /* FIXME: should be channel here */, msg);
1644 else if (!pvt->is_registered)
1645 edac_mc_handle_fbd_ce(mci, csrow,
1646 0 /* FIXME: should be channel here */, msg);
1652 * i7core_check_error Retrieve and process errors reported by the
1653 * hardware. Called by the Core module.
1655 static void i7core_check_error(struct mem_ctl_info *mci)
1657 struct i7core_pvt *pvt = mci->pvt_info;
1663 * MCE first step: Copy all mce errors into a temporary buffer
1664 * We use a double buffering here, to reduce the risk of
1668 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1673 m = pvt->mce_outentry;
1674 if (pvt->mce_in + count > MCE_LOG_LEN) {
1675 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1677 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1683 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1685 pvt->mce_in += count;
1688 if (pvt->mce_overrun) {
1689 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1692 pvt->mce_overrun = 0;
1696 * MCE second step: parse errors and display
1698 for (i = 0; i < count; i++)
1699 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1702 * Now, let's increment CE error counts
1704 if (!pvt->is_registered)
1705 i7core_udimm_check_mc_ecc_err(mci);
1707 i7core_rdimm_check_mc_ecc_err(mci);
1711 * i7core_mce_check_error Replicates mcelog routine to get errors
1712 * This routine simply queues mcelog errors, and
1713 * return. The error itself should be handled later
1714 * by i7core_check_error.
1715 * WARNING: As this routine should be called at NMI time, extra care should
1716 * be taken to avoid deadlocks, and to be as fast as possible.
1718 static int i7core_mce_check_error(void *priv, struct mce *mce)
1720 struct mem_ctl_info *mci = priv;
1721 struct i7core_pvt *pvt = mci->pvt_info;
1724 * Just let mcelog handle it if the error is
1725 * outside the memory controller
1727 if (((mce->status & 0xffff) >> 7) != 1)
1730 /* Bank 8 registers are the only ones that we know how to handle */
1735 /* Only handle if it is the right mc controller */
1736 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1741 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1747 /* Copy memory error at the ringbuffer */
1748 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1750 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1752 /* Handle fatal errors immediately */
1753 if (mce->mcgstatus & 1)
1754 i7core_check_error(mci);
1756 /* Advice mcelog that the error were handled */
1760 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1761 int num_channels, int num_csrows)
1763 struct mem_ctl_info *mci;
1764 struct i7core_pvt *pvt;
1768 /* allocate a new MC control structure */
1769 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1770 i7core_dev->socket);
1774 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1776 /* record ptr to the generic device */
1777 mci->dev = &i7core_dev->pdev[0]->dev;
1779 pvt = mci->pvt_info;
1780 memset(pvt, 0, sizeof(*pvt));
1783 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1784 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1787 mci->mtype_cap = MEM_FLAG_DDR3;
1788 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1789 mci->edac_cap = EDAC_FLAG_NONE;
1790 mci->mod_name = "i7core_edac.c";
1791 mci->mod_ver = I7CORE_REVISION;
1792 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1793 i7core_dev->socket);
1794 mci->dev_name = pci_name(i7core_dev->pdev[0]);
1795 mci->ctl_page_to_phys = NULL;
1796 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1797 /* Set the function pointer to an actual operation function */
1798 mci->edac_check = i7core_check_error;
1800 /* Store pci devices at mci for faster access */
1801 rc = mci_bind_devs(mci, i7core_dev);
1802 if (unlikely(rc < 0))
1805 /* Get dimm basic config */
1806 get_dimm_config(mci, &csrow);
1808 /* add this new MC control structure to EDAC's list of MCs */
1809 if (unlikely(edac_mc_add_mc(mci))) {
1810 debugf0("MC: " __FILE__
1811 ": %s(): failed edac_mc_add_mc()\n", __func__);
1812 /* FIXME: perhaps some code should go here that disables error
1813 * reporting if we just enabled it
1820 /* allocating generic PCI control info */
1821 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1823 if (unlikely(!i7core_pci)) {
1825 "%s(): Unable to create PCI control\n",
1828 "%s(): PCI error report via EDAC not setup\n",
1832 /* Default error mask is any memory */
1833 pvt->inject.channel = 0;
1834 pvt->inject.dimm = -1;
1835 pvt->inject.rank = -1;
1836 pvt->inject.bank = -1;
1837 pvt->inject.page = -1;
1838 pvt->inject.col = -1;
1840 /* Registers on edac_mce in order to receive memory errors */
1841 pvt->edac_mce.priv = mci;
1842 pvt->edac_mce.check_error = i7core_mce_check_error;
1844 rc = edac_mce_register(&pvt->edac_mce);
1845 if (unlikely(rc < 0)) {
1846 debugf0("MC: " __FILE__
1847 ": %s(): failed edac_mce_register()\n", __func__);
1856 * i7core_probe Probe for ONE instance of device to see if it is
1859 * 0 for FOUND a device
1860 * < 0 for error code
1862 static int __devinit i7core_probe(struct pci_dev *pdev,
1863 const struct pci_device_id *id)
1865 int dev_idx = id->driver_data;
1867 struct i7core_dev *i7core_dev;
1870 * All memory controllers are allocated at the first pass.
1872 if (unlikely(dev_idx >= 1))
1875 /* get the pci devices we want to reserve for our use */
1876 mutex_lock(&i7core_edac_lock);
1878 if (pdev->device == PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0) {
1879 printk(KERN_INFO "i7core_edac: detected a "
1880 "Lynnfield processor\n");
1881 rc = i7core_get_devices(pci_dev_descr_lynnfield,
1882 ARRAY_SIZE(pci_dev_descr_lynnfield));
1884 printk(KERN_INFO "i7core_edac: detected a "
1885 "Nehalem/Nehalem-EP processor\n");
1886 rc = i7core_get_devices(pci_dev_descr_i7core,
1887 ARRAY_SIZE(pci_dev_descr_i7core));
1890 if (unlikely(rc < 0))
1893 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1897 /* Check the number of active and not disabled channels */
1898 rc = i7core_get_active_channels(i7core_dev->socket,
1899 &channels, &csrows);
1900 if (unlikely(rc < 0))
1903 rc = i7core_register_mci(i7core_dev, channels, csrows);
1904 if (unlikely(rc < 0))
1908 i7core_printk(KERN_INFO, "Driver loaded.\n");
1910 mutex_unlock(&i7core_edac_lock);
1914 i7core_put_all_devices();
1916 mutex_unlock(&i7core_edac_lock);
1921 * i7core_remove destructor for one instance of device
1924 static void __devexit i7core_remove(struct pci_dev *pdev)
1926 struct mem_ctl_info *mci;
1927 struct i7core_dev *i7core_dev, *tmp;
1929 debugf0(__FILE__ ": %s()\n", __func__);
1932 edac_pci_release_generic_ctl(i7core_pci);
1935 * we have a trouble here: pdev value for removal will be wrong, since
1936 * it will point to the X58 register used to detect that the machine
1937 * is a Nehalem or upper design. However, due to the way several PCI
1938 * devices are grouped together to provide MC functionality, we need
1939 * to use a different method for releasing the devices
1942 mutex_lock(&i7core_edac_lock);
1943 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1944 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1946 struct i7core_pvt *pvt = mci->pvt_info;
1948 i7core_dev = pvt->i7core_dev;
1949 edac_mce_unregister(&pvt->edac_mce);
1950 kfree(mci->ctl_name);
1952 i7core_put_devices(i7core_dev);
1954 i7core_printk(KERN_ERR,
1955 "Couldn't find mci for socket %d\n",
1956 i7core_dev->socket);
1959 mutex_unlock(&i7core_edac_lock);
1962 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1965 * i7core_driver pci_driver structure for this module
1968 static struct pci_driver i7core_driver = {
1969 .name = "i7core_edac",
1970 .probe = i7core_probe,
1971 .remove = __devexit_p(i7core_remove),
1972 .id_table = i7core_pci_tbl,
1976 * i7core_init Module entry function
1977 * Try to initialize this module for its devices
1979 static int __init i7core_init(void)
1983 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1985 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1988 i7core_xeon_pci_fixup(pci_dev_descr_i7core[0].dev_id);
1990 pci_rc = pci_register_driver(&i7core_driver);
1995 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2002 * i7core_exit() Module exit function
2003 * Unregister the driver
2005 static void __exit i7core_exit(void)
2007 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2008 pci_unregister_driver(&i7core_driver);
2011 module_init(i7core_init);
2012 module_exit(i7core_exit);
2014 MODULE_LICENSE("GPL");
2015 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2016 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2017 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2020 module_param(edac_op_state, int, 0444);
2021 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");