]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/edac/i7core_edac.c
i7core_edac: Fix a bug when printing error counts with RDIMMs
[net-next-2.6.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
d5381642
MCC
30#include <linux/edac_mce.h>
31#include <linux/spinlock.h>
f4742949 32#include <linux/smp.h>
14d2c083 33#include <asm/processor.h>
a0c36a1f
MCC
34
35#include "edac_core.h"
36
f4742949
MCC
37/*
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
42 */
43#define MAX_SOCKET_BUSES 2
44
45
a0c36a1f
MCC
46/*
47 * Alter this version for the module when modifications are made
48 */
49#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50#define EDAC_MOD_STR "i7core_edac"
51
a0c36a1f
MCC
52/*
53 * Debug macros
54 */
55#define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
57
58#define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
60
61/*
62 * i7core Memory Controller Registers
63 */
64
e9bd2e73
MCC
65 /* OFFSETS for Device 0 Function 0 */
66
67#define MC_CFG_CONTROL 0x90
68
a0c36a1f
MCC
69 /* OFFSETS for Device 3 Function 0 */
70
71#define MC_CONTROL 0x48
72#define MC_STATUS 0x4c
73#define MC_MAX_DOD 0x64
74
442305b1
MCC
75/*
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
78 */
79
80#define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82
83#define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86
b4e8f0b6
MCC
87/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88#define MC_COR_ECC_CNT_0 0x80
89#define MC_COR_ECC_CNT_1 0x84
90#define MC_COR_ECC_CNT_2 0x88
91#define MC_COR_ECC_CNT_3 0x8c
92#define MC_COR_ECC_CNT_4 0x90
93#define MC_COR_ECC_CNT_5 0x94
94
95#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
97
98
a0c36a1f
MCC
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100
0b2b7b7e
MCC
101#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
106
f122a892
MCC
107#define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110
0b2b7b7e
MCC
111#define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
113
a0c36a1f 114#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
115#define MC_CHANNEL_ERROR_MASK 0xf8
116#define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
a0c36a1f 125
0b2b7b7e 126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 127
0b2b7b7e
MCC
128#define MC_DOD_CH_DIMM0 0x48
129#define MC_DOD_CH_DIMM1 0x4c
130#define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 143
f122a892
MCC
144#define MC_RANK_PRESENT 0x7c
145
0b2b7b7e
MCC
146#define MC_SAG_CH_0 0x80
147#define MC_SAG_CH_1 0x84
148#define MC_SAG_CH_2 0x88
149#define MC_SAG_CH_3 0x8c
150#define MC_SAG_CH_4 0x90
151#define MC_SAG_CH_5 0x94
152#define MC_SAG_CH_6 0x98
153#define MC_SAG_CH_7 0x9c
154
155#define MC_RIR_LIMIT_CH_0 0x40
156#define MC_RIR_LIMIT_CH_1 0x44
157#define MC_RIR_LIMIT_CH_2 0x48
158#define MC_RIR_LIMIT_CH_3 0x4C
159#define MC_RIR_LIMIT_CH_4 0x50
160#define MC_RIR_LIMIT_CH_5 0x54
161#define MC_RIR_LIMIT_CH_6 0x58
162#define MC_RIR_LIMIT_CH_7 0x5C
163#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164
165#define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
168
a0c36a1f
MCC
169/*
170 * i7core structs
171 */
172
173#define NUM_CHANS 3
442305b1
MCC
174#define MAX_DIMMS 3 /* Max DIMMS per channel */
175#define MAX_MCR_FUNC 4
176#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
177
178struct i7core_info {
179 u32 mc_control;
180 u32 mc_status;
181 u32 max_dod;
f122a892 182 u32 ch_map;
a0c36a1f
MCC
183};
184
194a40fe
MCC
185
186struct i7core_inject {
187 int enable;
188
189 u32 section;
190 u32 type;
191 u32 eccmask;
192
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
195};
196
0b2b7b7e 197struct i7core_channel {
442305b1
MCC
198 u32 ranks;
199 u32 dimms;
0b2b7b7e
MCC
200};
201
8f331907 202struct pci_id_descr {
66607706
MCC
203 int dev;
204 int func;
205 int dev_id;
8f331907
MCC
206};
207
f4742949
MCC
208struct i7core_dev {
209 struct list_head list;
210 u8 socket;
211 struct pci_dev **pdev;
212 struct mem_ctl_info *mci;
213};
214
a0c36a1f 215struct i7core_pvt {
f4742949
MCC
216 struct pci_dev *pci_noncore;
217 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
218 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
219
220 struct i7core_dev *i7core_dev;
67166af4 221
a0c36a1f 222 struct i7core_info info;
194a40fe 223 struct i7core_inject inject;
f4742949 224 struct i7core_channel channel[NUM_CHANS];
67166af4 225
f4742949 226 int channels; /* Number of active channels */
442305b1 227
f4742949
MCC
228 int ce_count_available;
229 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
230
231 /* ECC corrected errors counts per udimm */
f4742949
MCC
232 unsigned long udimm_ce_count[MAX_DIMMS];
233 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 234 /* ECC corrected errors counts per rdimm */
f4742949
MCC
235 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
236 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 237
f4742949 238 unsigned int is_registered;
14d2c083 239
d5381642
MCC
240 /* mcelog glue */
241 struct edac_mce edac_mce;
242 struct mce mce_entry[MCE_LOG_LEN];
243 unsigned mce_count;
244 spinlock_t mce_lock;
a0c36a1f
MCC
245};
246
66607706
MCC
247/* Static vars */
248static LIST_HEAD(i7core_edac_list);
249static DEFINE_MUTEX(i7core_edac_lock);
f4742949 250static u8 max_num_sockets;
a0c36a1f 251
8f331907
MCC
252#define PCI_DESCR(device, function, device_id) \
253 .dev = (device), \
254 .func = (function), \
255 .dev_id = (device_id)
256
66607706 257struct pci_id_descr pci_dev_descr[] = {
8f331907
MCC
258 /* Memory controller */
259 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
260 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
b990538a 261 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM */
8f331907
MCC
262 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
263
264 /* Channel 0 */
265 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
266 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
267 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
268 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
269
270 /* Channel 1 */
271 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
272 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
273 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
274 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
275
276 /* Channel 2 */
277 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
278 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
279 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
280 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
281
282 /* Generic Non-core registers */
283 /*
284 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
285 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
286 * the probing code needs to test for the other address in case of
287 * failure of this one
288 */
289 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
290
a0c36a1f 291};
66607706 292#define N_DEVS ARRAY_SIZE(pci_dev_descr)
8f331907
MCC
293
294/*
295 * pci_device_id table for which devices we are looking for
8f331907
MCC
296 */
297static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 298 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
8f331907
MCC
299 {0,} /* 0 terminated list. */
300};
301
a0c36a1f
MCC
302static struct edac_pci_ctl_info *i7core_pci;
303
304/****************************************************************************
305 Anciliary status routines
306 ****************************************************************************/
307
308 /* MC_CONTROL bits */
ef708b53
MCC
309#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
310#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
311
312 /* MC_STATUS bits */
61053fde 313#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 314#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
315
316 /* MC_MAX_DOD read functions */
854d3349 317static inline int numdimms(u32 dimms)
a0c36a1f 318{
854d3349 319 return (dimms & 0x3) + 1;
a0c36a1f
MCC
320}
321
854d3349 322static inline int numrank(u32 rank)
a0c36a1f
MCC
323{
324 static int ranks[4] = { 1, 2, 4, -EINVAL };
325
854d3349 326 return ranks[rank & 0x3];
a0c36a1f
MCC
327}
328
854d3349 329static inline int numbank(u32 bank)
a0c36a1f
MCC
330{
331 static int banks[4] = { 4, 8, 16, -EINVAL };
332
854d3349 333 return banks[bank & 0x3];
a0c36a1f
MCC
334}
335
854d3349 336static inline int numrow(u32 row)
a0c36a1f
MCC
337{
338 static int rows[8] = {
339 1 << 12, 1 << 13, 1 << 14, 1 << 15,
340 1 << 16, -EINVAL, -EINVAL, -EINVAL,
341 };
342
854d3349 343 return rows[row & 0x7];
a0c36a1f
MCC
344}
345
854d3349 346static inline int numcol(u32 col)
a0c36a1f
MCC
347{
348 static int cols[8] = {
349 1 << 10, 1 << 11, 1 << 12, -EINVAL,
350 };
854d3349 351 return cols[col & 0x3];
a0c36a1f
MCC
352}
353
f4742949 354static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
355{
356 struct i7core_dev *i7core_dev;
357
358 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
359 if (i7core_dev->socket == socket)
360 return i7core_dev;
361 }
362
363 return NULL;
364}
365
a0c36a1f
MCC
366/****************************************************************************
367 Memory check routines
368 ****************************************************************************/
67166af4
MCC
369static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
370 unsigned func)
ef708b53 371{
66607706 372 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 373 int i;
ef708b53 374
66607706
MCC
375 if (!i7core_dev)
376 return NULL;
377
ef708b53 378 for (i = 0; i < N_DEVS; i++) {
66607706 379 if (!i7core_dev->pdev[i])
ef708b53
MCC
380 continue;
381
66607706
MCC
382 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
383 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
384 return i7core_dev->pdev[i];
ef708b53
MCC
385 }
386 }
387
eb94fc40
MCC
388 return NULL;
389}
390
ec6df24c
MCC
391/**
392 * i7core_get_active_channels() - gets the number of channels and csrows
393 * @socket: Quick Path Interconnect socket
394 * @channels: Number of channels that will be returned
395 * @csrows: Number of csrows found
396 *
397 * Since EDAC core needs to know in advance the number of available channels
398 * and csrows, in order to allocate memory for csrows/channels, it is needed
399 * to run two similar steps. At the first step, implemented on this function,
400 * it checks the number of csrows/channels present at one socket.
401 * this is used in order to properly allocate the size of mci components.
402 *
403 * It should be noticed that none of the current available datasheets explain
404 * or even mention how csrows are seen by the memory controller. So, we need
405 * to add a fake description for csrows.
406 * So, this driver is attributing one DIMM memory for one csrow.
407 */
67166af4
MCC
408static int i7core_get_active_channels(u8 socket, unsigned *channels,
409 unsigned *csrows)
eb94fc40
MCC
410{
411 struct pci_dev *pdev = NULL;
412 int i, j;
413 u32 status, control;
414
415 *channels = 0;
416 *csrows = 0;
417
67166af4 418 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 419 if (!pdev) {
67166af4
MCC
420 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
421 socket);
ef708b53 422 return -ENODEV;
b7c76151 423 }
ef708b53
MCC
424
425 /* Device 3 function 0 reads */
426 pci_read_config_dword(pdev, MC_STATUS, &status);
427 pci_read_config_dword(pdev, MC_CONTROL, &control);
428
429 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 430 u32 dimm_dod[3];
ef708b53
MCC
431 /* Check if the channel is active */
432 if (!(control & (1 << (8 + i))))
433 continue;
434
435 /* Check if the channel is disabled */
41fcb7fe 436 if (status & (1 << i))
ef708b53 437 continue;
ef708b53 438
67166af4 439 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 440 if (!pdev) {
67166af4
MCC
441 i7core_printk(KERN_ERR, "Couldn't find socket %d "
442 "fn %d.%d!!!\n",
443 socket, i + 4, 1);
eb94fc40
MCC
444 return -ENODEV;
445 }
446 /* Devices 4-6 function 1 */
447 pci_read_config_dword(pdev,
448 MC_DOD_CH_DIMM0, &dimm_dod[0]);
449 pci_read_config_dword(pdev,
450 MC_DOD_CH_DIMM1, &dimm_dod[1]);
451 pci_read_config_dword(pdev,
452 MC_DOD_CH_DIMM2, &dimm_dod[2]);
453
ef708b53 454 (*channels)++;
eb94fc40
MCC
455
456 for (j = 0; j < 3; j++) {
457 if (!DIMM_PRESENT(dimm_dod[j]))
458 continue;
459 (*csrows)++;
460 }
ef708b53
MCC
461 }
462
c77720b9 463 debugf0("Number of active channels on socket %d: %d\n",
67166af4 464 socket, *channels);
1c6fed80 465
ef708b53
MCC
466 return 0;
467}
468
f4742949 469static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
470{
471 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 472 struct csrow_info *csr;
854d3349 473 struct pci_dev *pdev;
ba6c5c62 474 int i, j;
f4742949 475 u8 socket = pvt->i7core_dev->socket;
5566cb7c 476 unsigned long last_page = 0;
1c6fed80 477 enum edac_type mode;
854d3349 478 enum mem_type mtype;
a0c36a1f 479
854d3349 480 /* Get data from the MC register, function 0 */
f4742949 481 pdev = pvt->pci_mcr[0];
7dd6953c 482 if (!pdev)
8f331907
MCC
483 return -ENODEV;
484
f122a892 485 /* Device 3 function 0 reads */
7dd6953c
MCC
486 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
487 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
488 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
489 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 490
17cb7b0c
MCC
491 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
492 socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 493 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 494
1c6fed80 495 if (ECC_ENABLED(pvt)) {
41fcb7fe 496 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
497 if (ECCx8(pvt))
498 mode = EDAC_S8ECD8ED;
499 else
500 mode = EDAC_S4ECD4ED;
501 } else {
a0c36a1f 502 debugf0("ECC disabled\n");
1c6fed80
MCC
503 mode = EDAC_NONE;
504 }
a0c36a1f
MCC
505
506 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
507 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
508 "x%x x 0x%x\n",
854d3349
MCC
509 numdimms(pvt->info.max_dod),
510 numrank(pvt->info.max_dod >> 2),
276b824c 511 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
512 numrow(pvt->info.max_dod >> 6),
513 numcol(pvt->info.max_dod >> 9));
a0c36a1f 514
0b2b7b7e 515 for (i = 0; i < NUM_CHANS; i++) {
854d3349 516 u32 data, dimm_dod[3], value[8];
0b2b7b7e
MCC
517
518 if (!CH_ACTIVE(pvt, i)) {
519 debugf0("Channel %i is not active\n", i);
520 continue;
521 }
522 if (CH_DISABLED(pvt, i)) {
523 debugf0("Channel %i is disabled\n", i);
524 continue;
525 }
526
f122a892 527 /* Devices 4-6 function 0 */
f4742949 528 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
529 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
530
f4742949 531 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 532 4 : 2;
0b2b7b7e 533
854d3349
MCC
534 if (data & REGISTERED_DIMM)
535 mtype = MEM_RDDR3;
14d2c083 536 else
854d3349
MCC
537 mtype = MEM_DDR3;
538#if 0
0b2b7b7e
MCC
539 if (data & THREE_DIMMS_PRESENT)
540 pvt->channel[i].dimms = 3;
541 else if (data & SINGLE_QUAD_RANK_PRESENT)
542 pvt->channel[i].dimms = 1;
543 else
544 pvt->channel[i].dimms = 2;
854d3349
MCC
545#endif
546
547 /* Devices 4-6 function 1 */
f4742949 548 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 549 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 550 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 551 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 552 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 553 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 554
1c6fed80 555 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 556 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
557 i,
558 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
559 data,
f4742949 560 pvt->channel[i].ranks,
41fcb7fe 561 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
562
563 for (j = 0; j < 3; j++) {
564 u32 banks, ranks, rows, cols;
5566cb7c 565 u32 size, npages;
854d3349
MCC
566
567 if (!DIMM_PRESENT(dimm_dod[j]))
568 continue;
569
570 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
571 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
572 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
573 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
574
5566cb7c
MCC
575 /* DDR3 has 8 I/O banks */
576 size = (rows * cols * banks * ranks) >> (20 - 3);
577
f4742949 578 pvt->channel[i].dimms++;
854d3349 579
17cb7b0c
MCC
580 debugf0("\tdimm %d %d Mb offset: %x, "
581 "bank: %d, rank: %d, row: %#x, col: %#x\n",
582 j, size,
854d3349
MCC
583 RANKOFFSET(dimm_dod[j]),
584 banks, ranks, rows, cols);
585
eb94fc40
MCC
586#if PAGE_SHIFT > 20
587 npages = size >> (PAGE_SHIFT - 20);
588#else
589 npages = size << (20 - PAGE_SHIFT);
590#endif
5566cb7c 591
ba6c5c62 592 csr = &mci->csrows[*csrow];
5566cb7c
MCC
593 csr->first_page = last_page + 1;
594 last_page += npages;
595 csr->last_page = last_page;
596 csr->nr_pages = npages;
597
854d3349 598 csr->page_mask = 0;
eb94fc40 599 csr->grain = 8;
ba6c5c62 600 csr->csrow_idx = *csrow;
eb94fc40
MCC
601 csr->nr_channels = 1;
602
603 csr->channels[0].chan_idx = i;
604 csr->channels[0].ce_count = 0;
854d3349 605
f4742949 606 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 607
854d3349
MCC
608 switch (banks) {
609 case 4:
610 csr->dtype = DEV_X4;
611 break;
612 case 8:
613 csr->dtype = DEV_X8;
614 break;
615 case 16:
616 csr->dtype = DEV_X16;
617 break;
618 default:
619 csr->dtype = DEV_UNKNOWN;
620 }
621
622 csr->edac_mode = mode;
623 csr->mtype = mtype;
624
ba6c5c62 625 (*csrow)++;
854d3349 626 }
1c6fed80 627
854d3349
MCC
628 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
629 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
630 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
631 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
632 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
633 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
634 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
635 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 636 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 637 for (j = 0; j < 8; j++)
17cb7b0c 638 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
639 (value[j] >> 27) & 0x1,
640 (value[j] >> 24) & 0x7,
641 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
642 }
643
a0c36a1f
MCC
644 return 0;
645}
646
194a40fe
MCC
647/****************************************************************************
648 Error insertion routines
649 ****************************************************************************/
650
651/* The i7core has independent error injection features per channel.
652 However, to have a simpler code, we don't allow enabling error injection
653 on more than one channel.
654 Also, since a change at an inject parameter will be applied only at enable,
655 we're disabling error injection on all write calls to the sysfs nodes that
656 controls the error code injection.
657 */
8f331907 658static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
659{
660 struct i7core_pvt *pvt = mci->pvt_info;
661
662 pvt->inject.enable = 0;
663
f4742949 664 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
665 return -ENODEV;
666
f4742949 667 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 668 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
669
670 return 0;
194a40fe
MCC
671}
672
673/*
674 * i7core inject inject.section
675 *
676 * accept and store error injection inject.section value
677 * bit 0 - refers to the lower 32-byte half cacheline
678 * bit 1 - refers to the upper 32-byte half cacheline
679 */
680static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
681 const char *data, size_t count)
682{
683 struct i7core_pvt *pvt = mci->pvt_info;
684 unsigned long value;
685 int rc;
686
687 if (pvt->inject.enable)
41fcb7fe 688 disable_inject(mci);
194a40fe
MCC
689
690 rc = strict_strtoul(data, 10, &value);
691 if ((rc < 0) || (value > 3))
2068def5 692 return -EIO;
194a40fe
MCC
693
694 pvt->inject.section = (u32) value;
695 return count;
696}
697
698static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
699 char *data)
700{
701 struct i7core_pvt *pvt = mci->pvt_info;
702 return sprintf(data, "0x%08x\n", pvt->inject.section);
703}
704
705/*
706 * i7core inject.type
707 *
708 * accept and store error injection inject.section value
709 * bit 0 - repeat enable - Enable error repetition
710 * bit 1 - inject ECC error
711 * bit 2 - inject parity error
712 */
713static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
714 const char *data, size_t count)
715{
716 struct i7core_pvt *pvt = mci->pvt_info;
717 unsigned long value;
718 int rc;
719
720 if (pvt->inject.enable)
41fcb7fe 721 disable_inject(mci);
194a40fe
MCC
722
723 rc = strict_strtoul(data, 10, &value);
724 if ((rc < 0) || (value > 7))
2068def5 725 return -EIO;
194a40fe
MCC
726
727 pvt->inject.type = (u32) value;
728 return count;
729}
730
731static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
732 char *data)
733{
734 struct i7core_pvt *pvt = mci->pvt_info;
735 return sprintf(data, "0x%08x\n", pvt->inject.type);
736}
737
738/*
739 * i7core_inject_inject.eccmask_store
740 *
741 * The type of error (UE/CE) will depend on the inject.eccmask value:
742 * Any bits set to a 1 will flip the corresponding ECC bit
743 * Correctable errors can be injected by flipping 1 bit or the bits within
744 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
745 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
746 * uncorrectable error to be injected.
747 */
748static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
749 const char *data, size_t count)
750{
751 struct i7core_pvt *pvt = mci->pvt_info;
752 unsigned long value;
753 int rc;
754
755 if (pvt->inject.enable)
41fcb7fe 756 disable_inject(mci);
194a40fe
MCC
757
758 rc = strict_strtoul(data, 10, &value);
759 if (rc < 0)
2068def5 760 return -EIO;
194a40fe
MCC
761
762 pvt->inject.eccmask = (u32) value;
763 return count;
764}
765
766static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
767 char *data)
768{
769 struct i7core_pvt *pvt = mci->pvt_info;
770 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
771}
772
773/*
774 * i7core_addrmatch
775 *
776 * The type of error (UE/CE) will depend on the inject.eccmask value:
777 * Any bits set to a 1 will flip the corresponding ECC bit
778 * Correctable errors can be injected by flipping 1 bit or the bits within
779 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
780 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
781 * uncorrectable error to be injected.
782 */
783static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
784 const char *data, size_t count)
785{
786 struct i7core_pvt *pvt = mci->pvt_info;
787 char *cmd, *val;
788 long value;
789 int rc;
790
791 if (pvt->inject.enable)
41fcb7fe 792 disable_inject(mci);
194a40fe
MCC
793
794 do {
795 cmd = strsep((char **) &data, ":");
796 if (!cmd)
797 break;
798 val = strsep((char **) &data, " \n\t");
799 if (!val)
800 return cmd - data;
801
41fcb7fe 802 if (!strcasecmp(val, "any"))
194a40fe
MCC
803 value = -1;
804 else {
805 rc = strict_strtol(val, 10, &value);
806 if ((rc < 0) || (value < 0))
807 return cmd - data;
808 }
809
41fcb7fe 810 if (!strcasecmp(cmd, "channel")) {
194a40fe
MCC
811 if (value < 3)
812 pvt->inject.channel = value;
813 else
814 return cmd - data;
41fcb7fe 815 } else if (!strcasecmp(cmd, "dimm")) {
276b824c 816 if (value < 3)
194a40fe
MCC
817 pvt->inject.dimm = value;
818 else
819 return cmd - data;
41fcb7fe 820 } else if (!strcasecmp(cmd, "rank")) {
194a40fe
MCC
821 if (value < 4)
822 pvt->inject.rank = value;
823 else
824 return cmd - data;
41fcb7fe 825 } else if (!strcasecmp(cmd, "bank")) {
276b824c 826 if (value < 32)
194a40fe
MCC
827 pvt->inject.bank = value;
828 else
829 return cmd - data;
41fcb7fe 830 } else if (!strcasecmp(cmd, "page")) {
194a40fe
MCC
831 if (value <= 0xffff)
832 pvt->inject.page = value;
833 else
834 return cmd - data;
41fcb7fe
MCC
835 } else if (!strcasecmp(cmd, "col") ||
836 !strcasecmp(cmd, "column")) {
194a40fe
MCC
837 if (value <= 0x3fff)
838 pvt->inject.col = value;
839 else
840 return cmd - data;
841 }
842 } while (1);
843
844 return count;
845}
846
847static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
848 char *data)
849{
850 struct i7core_pvt *pvt = mci->pvt_info;
851 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
852
853 if (pvt->inject.channel < 0)
854 sprintf(channel, "any");
855 else
856 sprintf(channel, "%d", pvt->inject.channel);
857 if (pvt->inject.dimm < 0)
858 sprintf(dimm, "any");
859 else
860 sprintf(dimm, "%d", pvt->inject.dimm);
861 if (pvt->inject.bank < 0)
862 sprintf(bank, "any");
863 else
864 sprintf(bank, "%d", pvt->inject.bank);
865 if (pvt->inject.rank < 0)
866 sprintf(rank, "any");
867 else
868 sprintf(rank, "%d", pvt->inject.rank);
869 if (pvt->inject.page < 0)
870 sprintf(page, "any");
871 else
872 sprintf(page, "0x%04x", pvt->inject.page);
873 if (pvt->inject.col < 0)
874 sprintf(col, "any");
875 else
876 sprintf(col, "0x%04x", pvt->inject.col);
877
878 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
879 "rank: %s\npage: %s\ncolumn: %s\n",
880 channel, dimm, bank, rank, page, col);
881}
882
276b824c
MCC
883static int write_and_test(struct pci_dev *dev, int where, u32 val)
884{
885 u32 read;
886 int count;
887
4157d9f5
MCC
888 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
889 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
890 where, val);
891
276b824c
MCC
892 for (count = 0; count < 10; count++) {
893 if (count)
b990538a 894 msleep(100);
276b824c
MCC
895 pci_write_config_dword(dev, where, val);
896 pci_read_config_dword(dev, where, &read);
897
898 if (read == val)
899 return 0;
900 }
901
4157d9f5
MCC
902 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
903 "write=%08x. Read=%08x\n",
904 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
905 where, val, read);
276b824c
MCC
906
907 return -EINVAL;
908}
909
194a40fe
MCC
910/*
911 * This routine prepares the Memory Controller for error injection.
912 * The error will be injected when some process tries to write to the
913 * memory that matches the given criteria.
914 * The criteria can be set in terms of a mask where dimm, rank, bank, page
915 * and col can be specified.
916 * A -1 value for any of the mask items will make the MCU to ignore
917 * that matching criteria for error injection.
918 *
919 * It should be noticed that the error will only happen after a write operation
920 * on a memory that matches the condition. if REPEAT_EN is not enabled at
921 * inject mask, then it will produce just one error. Otherwise, it will repeat
922 * until the injectmask would be cleaned.
923 *
924 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
925 * is reliable enough to check if the MC is using the
926 * three channels. However, this is not clear at the datasheet.
927 */
928static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
929 const char *data, size_t count)
930{
931 struct i7core_pvt *pvt = mci->pvt_info;
932 u32 injectmask;
933 u64 mask = 0;
934 int rc;
935 long enable;
936
f4742949 937 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
938 return 0;
939
194a40fe
MCC
940 rc = strict_strtoul(data, 10, &enable);
941 if ((rc < 0))
942 return 0;
943
944 if (enable) {
945 pvt->inject.enable = 1;
946 } else {
947 disable_inject(mci);
948 return count;
949 }
950
951 /* Sets pvt->inject.dimm mask */
952 if (pvt->inject.dimm < 0)
7b029d03 953 mask |= 1L << 41;
194a40fe 954 else {
f4742949 955 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 956 mask |= (pvt->inject.dimm & 0x3L) << 35;
194a40fe 957 else
7b029d03 958 mask |= (pvt->inject.dimm & 0x1L) << 36;
194a40fe
MCC
959 }
960
961 /* Sets pvt->inject.rank mask */
962 if (pvt->inject.rank < 0)
7b029d03 963 mask |= 1L << 40;
194a40fe 964 else {
f4742949 965 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 966 mask |= (pvt->inject.rank & 0x1L) << 34;
194a40fe 967 else
7b029d03 968 mask |= (pvt->inject.rank & 0x3L) << 34;
194a40fe
MCC
969 }
970
971 /* Sets pvt->inject.bank mask */
972 if (pvt->inject.bank < 0)
7b029d03 973 mask |= 1L << 39;
194a40fe 974 else
7b029d03 975 mask |= (pvt->inject.bank & 0x15L) << 30;
194a40fe
MCC
976
977 /* Sets pvt->inject.page mask */
978 if (pvt->inject.page < 0)
7b029d03 979 mask |= 1L << 38;
194a40fe 980 else
7b029d03 981 mask |= (pvt->inject.page & 0xffffL) << 14;
194a40fe
MCC
982
983 /* Sets pvt->inject.column mask */
984 if (pvt->inject.col < 0)
7b029d03 985 mask |= 1L << 37;
194a40fe 986 else
7b029d03 987 mask |= (pvt->inject.col & 0x3fffL);
194a40fe 988
276b824c
MCC
989 /*
990 * bit 0: REPEAT_EN
991 * bits 1-2: MASK_HALF_CACHELINE
992 * bit 3: INJECT_ECC
993 * bit 4: INJECT_ADDR_PARITY
994 */
995
996 injectmask = (pvt->inject.type & 1) |
997 (pvt->inject.section & 0x3) << 1 |
998 (pvt->inject.type & 0x6) << (3 - 1);
999
1000 /* Unlock writes to registers - this register is write only */
f4742949 1001 pci_write_config_dword(pvt->pci_noncore,
67166af4 1002 MC_CFG_CONTROL, 0x2);
e9bd2e73 1003
f4742949 1004 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1005 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1006 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1007 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1008
f4742949 1009 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1010 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1011
f4742949 1012 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1013 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1014
194a40fe 1015 /*
276b824c
MCC
1016 * This is something undocumented, based on my tests
1017 * Without writing 8 to this register, errors aren't injected. Not sure
1018 * why.
194a40fe 1019 */
f4742949 1020 pci_write_config_dword(pvt->pci_noncore,
276b824c 1021 MC_CFG_CONTROL, 8);
194a40fe 1022
41fcb7fe
MCC
1023 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1024 " inject 0x%08x\n",
194a40fe
MCC
1025 mask, pvt->inject.eccmask, injectmask);
1026
7b029d03 1027
194a40fe
MCC
1028 return count;
1029}
1030
1031static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1032 char *data)
1033{
1034 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1035 u32 injectmask;
1036
f4742949 1037 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1038 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1039
1040 debugf0("Inject error read: 0x%018x\n", injectmask);
1041
1042 if (injectmask & 0x0c)
1043 pvt->inject.enable = 1;
1044
194a40fe
MCC
1045 return sprintf(data, "%d\n", pvt->inject.enable);
1046}
1047
442305b1
MCC
1048static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1049{
f4742949 1050 unsigned i, count, total = 0;
442305b1
MCC
1051 struct i7core_pvt *pvt = mci->pvt_info;
1052
f4742949
MCC
1053 if (!pvt->ce_count_available) {
1054 count = sprintf(data, "data unavailable\n");
1055 return 0;
67166af4 1056 }
d88b8507 1057 if (!pvt->is_registered) {
f4742949
MCC
1058 count = sprintf(data, "all channels "
1059 "UDIMM0: %lu UDIMM1: %lu UDIMM2: %lu\n",
1060 pvt->udimm_ce_count[0],
1061 pvt->udimm_ce_count[1],
1062 pvt->udimm_ce_count[2]);
d88b8507
MCC
1063 data += count;
1064 total += count;
1065 } else {
f4742949
MCC
1066 for (i = 0; i < NUM_CHANS; i++) {
1067 count = sprintf(data, "channel %d RDIMM0: %lu "
1068 "RDIMM1: %lu RDIMM2: %lu\n",
1069 i,
1070 pvt->rdimm_ce_count[i][0],
1071 pvt->rdimm_ce_count[i][1],
1072 pvt->rdimm_ce_count[i][2]);
d88b8507
MCC
1073 data += count;
1074 total += count;
1075 }
1076 }
442305b1 1077
67166af4 1078 return total;
442305b1
MCC
1079}
1080
194a40fe
MCC
1081/*
1082 * Sysfs struct
1083 */
1084static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
194a40fe
MCC
1085 {
1086 .attr = {
1087 .name = "inject_section",
1088 .mode = (S_IRUGO | S_IWUSR)
1089 },
1090 .show = i7core_inject_section_show,
1091 .store = i7core_inject_section_store,
1092 }, {
1093 .attr = {
1094 .name = "inject_type",
1095 .mode = (S_IRUGO | S_IWUSR)
1096 },
1097 .show = i7core_inject_type_show,
1098 .store = i7core_inject_type_store,
1099 }, {
1100 .attr = {
1101 .name = "inject_eccmask",
1102 .mode = (S_IRUGO | S_IWUSR)
1103 },
1104 .show = i7core_inject_eccmask_show,
1105 .store = i7core_inject_eccmask_store,
1106 }, {
1107 .attr = {
1108 .name = "inject_addrmatch",
1109 .mode = (S_IRUGO | S_IWUSR)
1110 },
1111 .show = i7core_inject_addrmatch_show,
1112 .store = i7core_inject_addrmatch_store,
1113 }, {
1114 .attr = {
1115 .name = "inject_enable",
1116 .mode = (S_IRUGO | S_IWUSR)
1117 },
1118 .show = i7core_inject_enable_show,
1119 .store = i7core_inject_enable_store,
442305b1
MCC
1120 }, {
1121 .attr = {
1122 .name = "corrected_error_counts",
1123 .mode = (S_IRUGO | S_IWUSR)
1124 },
1125 .show = i7core_ce_regs_show,
1126 .store = NULL,
194a40fe
MCC
1127 },
1128};
1129
a0c36a1f
MCC
1130/****************************************************************************
1131 Device initialization routines: put/get, init/exit
1132 ****************************************************************************/
1133
1134/*
1135 * i7core_put_devices 'put' all the devices that we have
1136 * reserved via 'get'
1137 */
8f331907 1138static void i7core_put_devices(void)
a0c36a1f 1139{
67166af4 1140 int i, j;
a0c36a1f 1141
f4742949 1142 for (i = 0; i < max_num_sockets; i++) {
66607706
MCC
1143 struct i7core_dev *i7core_dev = get_i7core_dev(i);
1144 if (!i7core_dev)
1145 continue;
1146
67166af4 1147 for (j = 0; j < N_DEVS; j++)
66607706
MCC
1148 pci_dev_put(i7core_dev->pdev[j]);
1149
1150 list_del(&i7core_dev->list);
1151 kfree(i7core_dev->pdev);
1152 kfree(i7core_dev);
1153 }
a0c36a1f
MCC
1154}
1155
bc2d7245
KM
1156static void i7core_xeon_pci_fixup(void)
1157{
1158 struct pci_dev *pdev = NULL;
1159 int i;
1160 /*
1161 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1162 * aren't announced by acpi. So, we need to use a legacy scan probing
1163 * to detect them
1164 */
1165 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1166 pci_dev_descr[0].dev_id, NULL);
bc2d7245 1167 if (unlikely(!pdev)) {
f4742949 1168 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1169 pcibios_scan_specific_bus(255-i);
1170 }
1171}
1172
a0c36a1f
MCC
1173/*
1174 * i7core_get_devices Find and perform 'get' operation on the MCH's
1175 * device/functions we want to reference for this driver
1176 *
1177 * Need to 'get' device 16 func 1 and func 2
1178 */
c77720b9 1179int i7core_get_onedevice(struct pci_dev **prev, int devno)
a0c36a1f 1180{
66607706
MCC
1181 struct i7core_dev *i7core_dev;
1182
8f331907 1183 struct pci_dev *pdev = NULL;
67166af4
MCC
1184 u8 bus = 0;
1185 u8 socket = 0;
a0c36a1f 1186
c77720b9 1187 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1188 pci_dev_descr[devno].dev_id, *prev);
c77720b9 1189
c77720b9
MCC
1190 /*
1191 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1192 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1193 * to probe for the alternate address in case of failure
1194 */
66607706 1195 if (pci_dev_descr[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
c77720b9
MCC
1196 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1197 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
d1fd4fb6 1198
c77720b9
MCC
1199 if (!pdev) {
1200 if (*prev) {
1201 *prev = pdev;
1202 return 0;
d1fd4fb6
MCC
1203 }
1204
310cbb72 1205 /*
c77720b9
MCC
1206 * Dev 3 function 2 only exists on chips with RDIMMs
1207 * so, it is ok to not found it
310cbb72 1208 */
66607706 1209 if ((pci_dev_descr[devno].dev == 3) && (pci_dev_descr[devno].func == 2)) {
c77720b9
MCC
1210 *prev = pdev;
1211 return 0;
1212 }
310cbb72 1213
c77720b9
MCC
1214 i7core_printk(KERN_ERR,
1215 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1216 pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1217 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
67166af4 1218
c77720b9
MCC
1219 /* End of list, leave */
1220 return -ENODEV;
1221 }
1222 bus = pdev->bus->number;
67166af4 1223
c77720b9
MCC
1224 if (bus == 0x3f)
1225 socket = 0;
1226 else
1227 socket = 255 - bus;
1228
66607706
MCC
1229 i7core_dev = get_i7core_dev(socket);
1230 if (!i7core_dev) {
1231 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1232 if (!i7core_dev)
1233 return -ENOMEM;
1234 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * N_DEVS,
1235 GFP_KERNEL);
1236 if (!i7core_dev->pdev)
1237 return -ENOMEM;
1238 i7core_dev->socket = socket;
1239 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1240 }
67166af4 1241
66607706 1242 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1243 i7core_printk(KERN_ERR,
1244 "Duplicated device for "
1245 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1246 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1247 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1248 pci_dev_put(pdev);
1249 return -ENODEV;
1250 }
67166af4 1251
66607706 1252 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1253
1254 /* Sanity check */
66607706
MCC
1255 if (unlikely(PCI_SLOT(pdev->devfn) != pci_dev_descr[devno].dev ||
1256 PCI_FUNC(pdev->devfn) != pci_dev_descr[devno].func)) {
c77720b9
MCC
1257 i7core_printk(KERN_ERR,
1258 "Device PCI ID %04x:%04x "
1259 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
66607706 1260 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id,
c77720b9 1261 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
66607706 1262 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func);
c77720b9
MCC
1263 return -ENODEV;
1264 }
ef708b53 1265
c77720b9
MCC
1266 /* Be sure that the device is enabled */
1267 if (unlikely(pci_enable_device(pdev) < 0)) {
1268 i7core_printk(KERN_ERR,
1269 "Couldn't enable "
1270 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1271 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1272 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1273 return -ENODEV;
1274 }
ef708b53 1275
d4c27795
MCC
1276 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1277 socket, bus, pci_dev_descr[devno].dev,
1278 pci_dev_descr[devno].func,
1279 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
8f331907 1280
c77720b9 1281 *prev = pdev;
ef708b53 1282
c77720b9
MCC
1283 return 0;
1284}
a0c36a1f 1285
f4742949 1286static int i7core_get_devices(void)
c77720b9
MCC
1287{
1288 int i;
1289 struct pci_dev *pdev = NULL;
ef708b53 1290
c77720b9
MCC
1291 for (i = 0; i < N_DEVS; i++) {
1292 pdev = NULL;
1293 do {
1294 if (i7core_get_onedevice(&pdev, i) < 0) {
1295 i7core_put_devices();
1296 return -ENODEV;
1297 }
1298 } while (pdev);
1299 }
66607706 1300
ef708b53 1301 return 0;
ef708b53
MCC
1302}
1303
f4742949
MCC
1304static int mci_bind_devs(struct mem_ctl_info *mci,
1305 struct i7core_dev *i7core_dev)
ef708b53
MCC
1306{
1307 struct i7core_pvt *pvt = mci->pvt_info;
1308 struct pci_dev *pdev;
f4742949 1309 int i, func, slot;
ef708b53 1310
f4742949
MCC
1311 /* Associates i7core_dev and mci for future usage */
1312 pvt->i7core_dev = i7core_dev;
1313 i7core_dev->mci = mci;
66607706 1314
f4742949
MCC
1315 pvt->is_registered = 0;
1316 for (i = 0; i < N_DEVS; i++) {
1317 pdev = i7core_dev->pdev[i];
1318 if (!pdev)
66607706
MCC
1319 continue;
1320
f4742949
MCC
1321 func = PCI_FUNC(pdev->devfn);
1322 slot = PCI_SLOT(pdev->devfn);
1323 if (slot == 3) {
1324 if (unlikely(func > MAX_MCR_FUNC))
1325 goto error;
1326 pvt->pci_mcr[func] = pdev;
1327 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1328 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1329 goto error;
f4742949
MCC
1330 pvt->pci_ch[slot - 4][func] = pdev;
1331 } else if (!slot && !func)
1332 pvt->pci_noncore = pdev;
1333 else
1334 goto error;
ef708b53 1335
f4742949
MCC
1336 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1337 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1338 pdev, i7core_dev->socket);
14d2c083 1339
f4742949
MCC
1340 if (PCI_SLOT(pdev->devfn) == 3 &&
1341 PCI_FUNC(pdev->devfn) == 2)
1342 pvt->is_registered = 1;
a0c36a1f 1343 }
e9bd2e73 1344
a0c36a1f 1345 return 0;
ef708b53
MCC
1346
1347error:
1348 i7core_printk(KERN_ERR, "Device %d, function %d "
1349 "is out of the expected range\n",
1350 slot, func);
1351 return -EINVAL;
a0c36a1f
MCC
1352}
1353
442305b1
MCC
1354/****************************************************************************
1355 Error check routines
1356 ****************************************************************************/
f4742949 1357static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1358 int chan, int dimm, int add)
1359{
1360 char *msg;
1361 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1362 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1363
1364 for (i = 0; i < add; i++) {
1365 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1366 "(Socket=%d channel=%d dimm=%d)",
1367 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1368
1369 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1370 kfree (msg);
1371 }
1372}
1373
1374static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1375 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1376{
1377 struct i7core_pvt *pvt = mci->pvt_info;
1378 int add0 = 0, add1 = 0, add2 = 0;
1379 /* Updates CE counters if it is not the first time here */
f4742949 1380 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1381 /* Updates CE counters */
1382
f4742949
MCC
1383 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1384 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1385 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1386
1387 if (add2 < 0)
1388 add2 += 0x7fff;
f4742949 1389 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1390
1391 if (add1 < 0)
1392 add1 += 0x7fff;
f4742949 1393 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1394
1395 if (add0 < 0)
1396 add0 += 0x7fff;
f4742949 1397 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1398 } else
f4742949 1399 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1400
1401 /* Store the new values */
f4742949
MCC
1402 pvt->rdimm_last_ce_count[chan][2] = new2;
1403 pvt->rdimm_last_ce_count[chan][1] = new1;
1404 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1405
1406 /*updated the edac core */
1407 if (add0 != 0)
f4742949 1408 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1409 if (add1 != 0)
f4742949 1410 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1411 if (add2 != 0)
f4742949 1412 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1413
1414}
1415
f4742949 1416static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1417{
1418 struct i7core_pvt *pvt = mci->pvt_info;
1419 u32 rcv[3][2];
1420 int i, new0, new1, new2;
1421
1422 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1423 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1424 &rcv[0][0]);
f4742949 1425 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1426 &rcv[0][1]);
f4742949 1427 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1428 &rcv[1][0]);
f4742949 1429 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1430 &rcv[1][1]);
f4742949 1431 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1432 &rcv[2][0]);
f4742949 1433 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1434 &rcv[2][1]);
1435 for (i = 0 ; i < 3; i++) {
1436 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1437 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1438 /*if the channel has 3 dimms*/
f4742949 1439 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1440 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1441 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1442 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1443 } else {
1444 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1445 DIMM_BOT_COR_ERR(rcv[i][0]);
1446 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1447 DIMM_BOT_COR_ERR(rcv[i][1]);
1448 new2 = 0;
1449 }
1450
f4742949 1451 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1452 }
1453}
442305b1
MCC
1454
1455/* This function is based on the device 3 function 4 registers as described on:
1456 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1457 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1458 * also available at:
1459 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1460 */
f4742949 1461static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1462{
1463 struct i7core_pvt *pvt = mci->pvt_info;
1464 u32 rcv1, rcv0;
1465 int new0, new1, new2;
1466
f4742949 1467 if (!pvt->pci_mcr[4]) {
b990538a 1468 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1469 return;
1470 }
1471
b4e8f0b6 1472 /* Corrected test errors */
f4742949
MCC
1473 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1474 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1475
1476 /* Store the new values */
1477 new2 = DIMM2_COR_ERR(rcv1);
1478 new1 = DIMM1_COR_ERR(rcv0);
1479 new0 = DIMM0_COR_ERR(rcv0);
1480
442305b1 1481 /* Updates CE counters if it is not the first time here */
f4742949 1482 if (pvt->ce_count_available) {
442305b1
MCC
1483 /* Updates CE counters */
1484 int add0, add1, add2;
1485
f4742949
MCC
1486 add2 = new2 - pvt->udimm_last_ce_count[2];
1487 add1 = new1 - pvt->udimm_last_ce_count[1];
1488 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1489
1490 if (add2 < 0)
1491 add2 += 0x7fff;
f4742949 1492 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1493
1494 if (add1 < 0)
1495 add1 += 0x7fff;
f4742949 1496 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1497
1498 if (add0 < 0)
1499 add0 += 0x7fff;
f4742949 1500 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1501
1502 if (add0 | add1 | add2)
1503 i7core_printk(KERN_ERR, "New Corrected error(s): "
1504 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1505 add0, add1, add2);
442305b1 1506 } else
f4742949 1507 pvt->ce_count_available = 1;
442305b1
MCC
1508
1509 /* Store the new values */
f4742949
MCC
1510 pvt->udimm_last_ce_count[2] = new2;
1511 pvt->udimm_last_ce_count[1] = new1;
1512 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1513}
1514
8a2f118e
MCC
1515/*
1516 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1517 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1518 * Nehalem are defined as family 0x06, model 0x1a
1519 *
1520 * The MCA registers used here are the following ones:
8a2f118e 1521 * struct mce field MCA Register
f237fcf2
MCC
1522 * m->status MSR_IA32_MC8_STATUS
1523 * m->addr MSR_IA32_MC8_ADDR
1524 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1525 * In the case of Nehalem, the error information is masked at .status and .misc
1526 * fields
1527 */
d5381642
MCC
1528static void i7core_mce_output_error(struct mem_ctl_info *mci,
1529 struct mce *m)
1530{
b4e8f0b6 1531 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1532 char *type, *optype, *err, *msg;
8a2f118e 1533 unsigned long error = m->status & 0x1ff0000l;
a639539f 1534 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1535 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1536 u32 dimm = (m->misc >> 16) & 0x3;
1537 u32 channel = (m->misc >> 18) & 0x3;
1538 u32 syndrome = m->misc >> 32;
1539 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1540 int csrow;
8a2f118e 1541
c5d34528
MCC
1542 if (m->mcgstatus & 1)
1543 type = "FATAL";
1544 else
1545 type = "NON_FATAL";
1546
a639539f 1547 switch (optypenum) {
b990538a
MCC
1548 case 0:
1549 optype = "generic undef request";
1550 break;
1551 case 1:
1552 optype = "read error";
1553 break;
1554 case 2:
1555 optype = "write error";
1556 break;
1557 case 3:
1558 optype = "addr/cmd error";
1559 break;
1560 case 4:
1561 optype = "scrubbing error";
1562 break;
1563 default:
1564 optype = "reserved";
1565 break;
a639539f
MCC
1566 }
1567
8a2f118e
MCC
1568 switch (errnum) {
1569 case 16:
1570 err = "read ECC error";
1571 break;
1572 case 17:
1573 err = "RAS ECC error";
1574 break;
1575 case 18:
1576 err = "write parity error";
1577 break;
1578 case 19:
1579 err = "redundacy loss";
1580 break;
1581 case 20:
1582 err = "reserved";
1583 break;
1584 case 21:
1585 err = "memory range error";
1586 break;
1587 case 22:
1588 err = "RTID out of range";
1589 break;
1590 case 23:
1591 err = "address parity error";
1592 break;
1593 case 24:
1594 err = "byte enable parity error";
1595 break;
1596 default:
1597 err = "unknown";
d5381642 1598 }
d5381642 1599
f237fcf2 1600 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1601 msg = kasprintf(GFP_ATOMIC,
f4742949 1602 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1603 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1604 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1605 syndrome, core_err_cnt, (long long)m->status,
1606 (long long)m->misc, optype, err);
8a2f118e
MCC
1607
1608 debugf0("%s", msg);
d5381642 1609
f4742949 1610 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1611
d5381642 1612 /* Call the helper to output message */
b4e8f0b6
MCC
1613 if (m->mcgstatus & 1)
1614 edac_mc_handle_fbd_ue(mci, csrow, 0,
1615 0 /* FIXME: should be channel here */, msg);
f4742949 1616 else if (!pvt->is_registered)
b4e8f0b6
MCC
1617 edac_mc_handle_fbd_ce(mci, csrow,
1618 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1619
1620 kfree(msg);
d5381642
MCC
1621}
1622
87d1d272
MCC
1623/*
1624 * i7core_check_error Retrieve and process errors reported by the
1625 * hardware. Called by the Core module.
1626 */
1627static void i7core_check_error(struct mem_ctl_info *mci)
1628{
d5381642
MCC
1629 struct i7core_pvt *pvt = mci->pvt_info;
1630 int i;
1631 unsigned count = 0;
1632 struct mce *m = NULL;
1633 unsigned long flags;
1634
d5381642
MCC
1635 /* Copy all mce errors into a temporary buffer */
1636 spin_lock_irqsave(&pvt->mce_lock, flags);
1637 if (pvt->mce_count) {
1638 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
f4742949 1639
d5381642
MCC
1640 if (m) {
1641 count = pvt->mce_count;
1642 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1643 }
1644 pvt->mce_count = 0;
1645 }
f4742949 1646
d5381642
MCC
1647 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1648
1649 /* proccess mcelog errors */
1650 for (i = 0; i < count; i++)
1651 i7core_mce_output_error(mci, &m[i]);
1652
1653 kfree(m);
1654
1655 /* check memory count errors */
f4742949
MCC
1656 if (!pvt->is_registered)
1657 i7core_udimm_check_mc_ecc_err(mci);
1658 else
1659 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1660}
1661
d5381642
MCC
1662/*
1663 * i7core_mce_check_error Replicates mcelog routine to get errors
1664 * This routine simply queues mcelog errors, and
1665 * return. The error itself should be handled later
1666 * by i7core_check_error.
1667 */
1668static int i7core_mce_check_error(void *priv, struct mce *mce)
1669{
c5d34528
MCC
1670 struct mem_ctl_info *mci = priv;
1671 struct i7core_pvt *pvt = mci->pvt_info;
d5381642
MCC
1672 unsigned long flags;
1673
8a2f118e
MCC
1674 /*
1675 * Just let mcelog handle it if the error is
1676 * outside the memory controller
1677 */
1678 if (((mce->status & 0xffff) >> 7) != 1)
1679 return 0;
1680
f237fcf2
MCC
1681 /* Bank 8 registers are the only ones that we know how to handle */
1682 if (mce->bank != 8)
1683 return 0;
1684
f4742949 1685 /* Only handle if it is the right mc controller */
6c6aa3af
MCC
1686 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) {
1687 debugf0("mc%d: ignoring mce log for socket %d. "
1688 "Another mc should get it.\n",
1689 pvt->i7core_dev->socket,
1690 cpu_data(mce->cpu).phys_proc_id);
f4742949 1691 return 0;
6c6aa3af 1692 }
f4742949 1693
d5381642
MCC
1694 spin_lock_irqsave(&pvt->mce_lock, flags);
1695 if (pvt->mce_count < MCE_LOG_LEN) {
1696 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1697 pvt->mce_count++;
1698 }
1699 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1700
c5d34528
MCC
1701 /* Handle fatal errors immediately */
1702 if (mce->mcgstatus & 1)
1703 i7core_check_error(mci);
1704
d5381642 1705 /* Advice mcelog that the error were handled */
8a2f118e 1706 return 1;
d5381642
MCC
1707}
1708
f4742949
MCC
1709static int i7core_register_mci(struct i7core_dev *i7core_dev,
1710 int num_channels, int num_csrows)
a0c36a1f
MCC
1711{
1712 struct mem_ctl_info *mci;
1713 struct i7core_pvt *pvt;
ba6c5c62 1714 int csrow = 0;
f4742949 1715 int rc;
a0c36a1f 1716
a0c36a1f 1717 /* allocate a new MC control structure */
d4c27795
MCC
1718 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1719 i7core_dev->socket);
f4742949
MCC
1720 if (unlikely(!mci))
1721 return -ENOMEM;
a0c36a1f
MCC
1722
1723 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1724
f4742949
MCC
1725 /* record ptr to the generic device */
1726 mci->dev = &i7core_dev->pdev[0]->dev;
1727
a0c36a1f 1728 pvt = mci->pvt_info;
ef708b53 1729 memset(pvt, 0, sizeof(*pvt));
67166af4 1730
41fcb7fe
MCC
1731 /*
1732 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1733 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1734 * memory channels
1735 */
1736 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1737 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1738 mci->edac_cap = EDAC_FLAG_NONE;
1739 mci->mod_name = "i7core_edac.c";
1740 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1741 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1742 i7core_dev->socket);
1743 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1744 mci->ctl_page_to_phys = NULL;
194a40fe 1745 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
87d1d272
MCC
1746 /* Set the function pointer to an actual operation function */
1747 mci->edac_check = i7core_check_error;
8f331907 1748
ef708b53 1749 /* Store pci devices at mci for faster access */
f4742949 1750 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1751 if (unlikely(rc < 0))
f4742949 1752 goto fail;
ef708b53
MCC
1753
1754 /* Get dimm basic config */
f4742949 1755 get_dimm_config(mci, &csrow);
ef708b53 1756
a0c36a1f 1757 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1758 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1759 debugf0("MC: " __FILE__
1760 ": %s(): failed edac_mc_add_mc()\n", __func__);
1761 /* FIXME: perhaps some code should go here that disables error
1762 * reporting if we just enabled it
1763 */
b7c76151
MCC
1764
1765 rc = -EINVAL;
f4742949 1766 goto fail;
a0c36a1f
MCC
1767 }
1768
1769 /* allocating generic PCI control info */
f4742949
MCC
1770 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1771 EDAC_MOD_STR);
41fcb7fe 1772 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1773 printk(KERN_WARNING
1774 "%s(): Unable to create PCI control\n",
1775 __func__);
1776 printk(KERN_WARNING
1777 "%s(): PCI error report via EDAC not setup\n",
1778 __func__);
1779 }
1780
194a40fe 1781 /* Default error mask is any memory */
ef708b53 1782 pvt->inject.channel = 0;
194a40fe
MCC
1783 pvt->inject.dimm = -1;
1784 pvt->inject.rank = -1;
1785 pvt->inject.bank = -1;
1786 pvt->inject.page = -1;
1787 pvt->inject.col = -1;
1788
d5381642 1789 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1790 pvt->edac_mce.priv = mci;
d5381642
MCC
1791 pvt->edac_mce.check_error = i7core_mce_check_error;
1792 spin_lock_init(&pvt->mce_lock);
1793
1794 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1795 if (unlikely(rc < 0)) {
d5381642
MCC
1796 debugf0("MC: " __FILE__
1797 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1798 }
1799
1800fail:
1801 edac_mc_free(mci);
1802 return rc;
1803}
1804
1805/*
1806 * i7core_probe Probe for ONE instance of device to see if it is
1807 * present.
1808 * return:
1809 * 0 for FOUND a device
1810 * < 0 for error code
1811 */
1812static int __devinit i7core_probe(struct pci_dev *pdev,
1813 const struct pci_device_id *id)
1814{
1815 int dev_idx = id->driver_data;
1816 int rc;
1817 struct i7core_dev *i7core_dev;
1818
1819 /*
d4c27795 1820 * All memory controllers are allocated at the first pass.
f4742949
MCC
1821 */
1822 if (unlikely(dev_idx >= 1))
1823 return -EINVAL;
1824
1825 /* get the pci devices we want to reserve for our use */
1826 mutex_lock(&i7core_edac_lock);
1827 rc = i7core_get_devices();
1828 if (unlikely(rc < 0))
1829 goto fail0;
1830
1831 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1832 int channels;
1833 int csrows;
1834
1835 /* Check the number of active and not disabled channels */
1836 rc = i7core_get_active_channels(i7core_dev->socket,
1837 &channels, &csrows);
1838 if (unlikely(rc < 0))
1839 goto fail1;
1840
d4c27795
MCC
1841 rc = i7core_register_mci(i7core_dev, channels, csrows);
1842 if (unlikely(rc < 0))
1843 goto fail1;
d5381642
MCC
1844 }
1845
ef708b53 1846 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1847
66607706 1848 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1849 return 0;
1850
66607706 1851fail1:
b7c76151 1852 i7core_put_devices();
66607706
MCC
1853fail0:
1854 mutex_unlock(&i7core_edac_lock);
b7c76151 1855 return rc;
a0c36a1f
MCC
1856}
1857
1858/*
1859 * i7core_remove destructor for one instance of device
1860 *
1861 */
1862static void __devexit i7core_remove(struct pci_dev *pdev)
1863{
1864 struct mem_ctl_info *mci;
d5381642 1865 struct i7core_pvt *pvt;
a0c36a1f
MCC
1866
1867 debugf0(__FILE__ ": %s()\n", __func__);
1868
1869 if (i7core_pci)
1870 edac_pci_release_generic_ctl(i7core_pci);
1871
87d1d272 1872
d5381642 1873 mci = edac_mc_del_mc(&pdev->dev);
a0c36a1f
MCC
1874 if (!mci)
1875 return;
1876
d5381642
MCC
1877 /* Unregisters on edac_mce in order to receive memory errors */
1878 pvt = mci->pvt_info;
1879 edac_mce_unregister(&pvt->edac_mce);
1880
a0c36a1f 1881 /* retrieve references to resources, and free those resources */
66607706 1882 mutex_lock(&i7core_edac_lock);
d4c27795
MCC
1883
1884 /* FIXME: This should put the devices only for this mci!!! */
8f331907 1885 i7core_put_devices();
66607706 1886 mutex_unlock(&i7core_edac_lock);
a0c36a1f 1887
f4742949 1888 kfree(mci->ctl_name);
a0c36a1f
MCC
1889 edac_mc_free(mci);
1890}
1891
a0c36a1f
MCC
1892MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1893
1894/*
1895 * i7core_driver pci_driver structure for this module
1896 *
1897 */
1898static struct pci_driver i7core_driver = {
1899 .name = "i7core_edac",
1900 .probe = i7core_probe,
1901 .remove = __devexit_p(i7core_remove),
1902 .id_table = i7core_pci_tbl,
1903};
1904
1905/*
1906 * i7core_init Module entry function
1907 * Try to initialize this module for its devices
1908 */
1909static int __init i7core_init(void)
1910{
1911 int pci_rc;
1912
1913 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1914
1915 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1916 opstate_init();
1917
bc2d7245
KM
1918 i7core_xeon_pci_fixup();
1919
a0c36a1f
MCC
1920 pci_rc = pci_register_driver(&i7core_driver);
1921
3ef288a9
MCC
1922 if (pci_rc >= 0)
1923 return 0;
1924
1925 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1926 pci_rc);
1927
1928 return pci_rc;
a0c36a1f
MCC
1929}
1930
1931/*
1932 * i7core_exit() Module exit function
1933 * Unregister the driver
1934 */
1935static void __exit i7core_exit(void)
1936{
1937 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1938 pci_unregister_driver(&i7core_driver);
1939}
1940
1941module_init(i7core_init);
1942module_exit(i7core_exit);
1943
1944MODULE_LICENSE("GPL");
1945MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1946MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1947MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1948 I7CORE_REVISION);
1949
1950module_param(edac_op_state, int, 0444);
1951MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");