]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/edac/i7core_edac.c
i7core_edac: We need to use list_for_each_entry_safe to avoid errors
[net-next-2.6.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
d5381642
MCC
30#include <linux/edac_mce.h>
31#include <linux/spinlock.h>
f4742949 32#include <linux/smp.h>
14d2c083 33#include <asm/processor.h>
a0c36a1f
MCC
34
35#include "edac_core.h"
36
f4742949
MCC
37/*
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
42 */
43#define MAX_SOCKET_BUSES 2
44
45
a0c36a1f
MCC
46/*
47 * Alter this version for the module when modifications are made
48 */
49#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50#define EDAC_MOD_STR "i7core_edac"
51
a0c36a1f
MCC
52/*
53 * Debug macros
54 */
55#define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
57
58#define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
60
61/*
62 * i7core Memory Controller Registers
63 */
64
e9bd2e73
MCC
65 /* OFFSETS for Device 0 Function 0 */
66
67#define MC_CFG_CONTROL 0x90
68
a0c36a1f
MCC
69 /* OFFSETS for Device 3 Function 0 */
70
71#define MC_CONTROL 0x48
72#define MC_STATUS 0x4c
73#define MC_MAX_DOD 0x64
74
442305b1
MCC
75/*
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
78 */
79
80#define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82
83#define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86
b4e8f0b6
MCC
87/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88#define MC_COR_ECC_CNT_0 0x80
89#define MC_COR_ECC_CNT_1 0x84
90#define MC_COR_ECC_CNT_2 0x88
91#define MC_COR_ECC_CNT_3 0x8c
92#define MC_COR_ECC_CNT_4 0x90
93#define MC_COR_ECC_CNT_5 0x94
94
95#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
97
98
a0c36a1f
MCC
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100
0b2b7b7e
MCC
101#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
106
f122a892
MCC
107#define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110
0b2b7b7e
MCC
111#define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
113
a0c36a1f 114#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
115#define MC_CHANNEL_ERROR_MASK 0xf8
116#define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
a0c36a1f 125
0b2b7b7e 126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 127
0b2b7b7e
MCC
128#define MC_DOD_CH_DIMM0 0x48
129#define MC_DOD_CH_DIMM1 0x4c
130#define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 143
f122a892
MCC
144#define MC_RANK_PRESENT 0x7c
145
0b2b7b7e
MCC
146#define MC_SAG_CH_0 0x80
147#define MC_SAG_CH_1 0x84
148#define MC_SAG_CH_2 0x88
149#define MC_SAG_CH_3 0x8c
150#define MC_SAG_CH_4 0x90
151#define MC_SAG_CH_5 0x94
152#define MC_SAG_CH_6 0x98
153#define MC_SAG_CH_7 0x9c
154
155#define MC_RIR_LIMIT_CH_0 0x40
156#define MC_RIR_LIMIT_CH_1 0x44
157#define MC_RIR_LIMIT_CH_2 0x48
158#define MC_RIR_LIMIT_CH_3 0x4C
159#define MC_RIR_LIMIT_CH_4 0x50
160#define MC_RIR_LIMIT_CH_5 0x54
161#define MC_RIR_LIMIT_CH_6 0x58
162#define MC_RIR_LIMIT_CH_7 0x5C
163#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164
165#define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
168
a0c36a1f
MCC
169/*
170 * i7core structs
171 */
172
173#define NUM_CHANS 3
442305b1
MCC
174#define MAX_DIMMS 3 /* Max DIMMS per channel */
175#define MAX_MCR_FUNC 4
176#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
177
178struct i7core_info {
179 u32 mc_control;
180 u32 mc_status;
181 u32 max_dod;
f122a892 182 u32 ch_map;
a0c36a1f
MCC
183};
184
194a40fe
MCC
185
186struct i7core_inject {
187 int enable;
188
189 u32 section;
190 u32 type;
191 u32 eccmask;
192
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
195};
196
0b2b7b7e 197struct i7core_channel {
442305b1
MCC
198 u32 ranks;
199 u32 dimms;
0b2b7b7e
MCC
200};
201
8f331907 202struct pci_id_descr {
66607706
MCC
203 int dev;
204 int func;
205 int dev_id;
8f331907
MCC
206};
207
f4742949
MCC
208struct i7core_dev {
209 struct list_head list;
210 u8 socket;
211 struct pci_dev **pdev;
212 struct mem_ctl_info *mci;
213};
214
a0c36a1f 215struct i7core_pvt {
f4742949
MCC
216 struct pci_dev *pci_noncore;
217 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
218 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
219
220 struct i7core_dev *i7core_dev;
67166af4 221
a0c36a1f 222 struct i7core_info info;
194a40fe 223 struct i7core_inject inject;
f4742949 224 struct i7core_channel channel[NUM_CHANS];
67166af4 225
f4742949 226 int channels; /* Number of active channels */
442305b1 227
f4742949
MCC
228 int ce_count_available;
229 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
230
231 /* ECC corrected errors counts per udimm */
f4742949
MCC
232 unsigned long udimm_ce_count[MAX_DIMMS];
233 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 234 /* ECC corrected errors counts per rdimm */
f4742949
MCC
235 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
236 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 237
f4742949 238 unsigned int is_registered;
14d2c083 239
d5381642
MCC
240 /* mcelog glue */
241 struct edac_mce edac_mce;
242 struct mce mce_entry[MCE_LOG_LEN];
243 unsigned mce_count;
244 spinlock_t mce_lock;
a0c36a1f
MCC
245};
246
66607706
MCC
247/* Static vars */
248static LIST_HEAD(i7core_edac_list);
249static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 250
8f331907
MCC
251#define PCI_DESCR(device, function, device_id) \
252 .dev = (device), \
253 .func = (function), \
254 .dev_id = (device_id)
255
66607706 256struct pci_id_descr pci_dev_descr[] = {
8f331907
MCC
257 /* Memory controller */
258 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
259 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
b990538a 260 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM */
8f331907
MCC
261 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
262
263 /* Channel 0 */
264 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
265 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
266 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
267 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
268
269 /* Channel 1 */
270 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
271 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
272 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
273 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
274
275 /* Channel 2 */
276 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
277 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
278 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
279 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
280
281 /* Generic Non-core registers */
282 /*
283 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
284 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
285 * the probing code needs to test for the other address in case of
286 * failure of this one
287 */
288 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
289
a0c36a1f 290};
66607706 291#define N_DEVS ARRAY_SIZE(pci_dev_descr)
8f331907
MCC
292
293/*
294 * pci_device_id table for which devices we are looking for
8f331907
MCC
295 */
296static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 297 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
8f331907
MCC
298 {0,} /* 0 terminated list. */
299};
300
a0c36a1f
MCC
301static struct edac_pci_ctl_info *i7core_pci;
302
303/****************************************************************************
304 Anciliary status routines
305 ****************************************************************************/
306
307 /* MC_CONTROL bits */
ef708b53
MCC
308#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
309#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
310
311 /* MC_STATUS bits */
61053fde 312#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 313#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
314
315 /* MC_MAX_DOD read functions */
854d3349 316static inline int numdimms(u32 dimms)
a0c36a1f 317{
854d3349 318 return (dimms & 0x3) + 1;
a0c36a1f
MCC
319}
320
854d3349 321static inline int numrank(u32 rank)
a0c36a1f
MCC
322{
323 static int ranks[4] = { 1, 2, 4, -EINVAL };
324
854d3349 325 return ranks[rank & 0x3];
a0c36a1f
MCC
326}
327
854d3349 328static inline int numbank(u32 bank)
a0c36a1f
MCC
329{
330 static int banks[4] = { 4, 8, 16, -EINVAL };
331
854d3349 332 return banks[bank & 0x3];
a0c36a1f
MCC
333}
334
854d3349 335static inline int numrow(u32 row)
a0c36a1f
MCC
336{
337 static int rows[8] = {
338 1 << 12, 1 << 13, 1 << 14, 1 << 15,
339 1 << 16, -EINVAL, -EINVAL, -EINVAL,
340 };
341
854d3349 342 return rows[row & 0x7];
a0c36a1f
MCC
343}
344
854d3349 345static inline int numcol(u32 col)
a0c36a1f
MCC
346{
347 static int cols[8] = {
348 1 << 10, 1 << 11, 1 << 12, -EINVAL,
349 };
854d3349 350 return cols[col & 0x3];
a0c36a1f
MCC
351}
352
f4742949 353static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
354{
355 struct i7core_dev *i7core_dev;
356
357 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
358 if (i7core_dev->socket == socket)
359 return i7core_dev;
360 }
361
362 return NULL;
363}
364
a0c36a1f
MCC
365/****************************************************************************
366 Memory check routines
367 ****************************************************************************/
67166af4
MCC
368static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
369 unsigned func)
ef708b53 370{
66607706 371 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 372 int i;
ef708b53 373
66607706
MCC
374 if (!i7core_dev)
375 return NULL;
376
ef708b53 377 for (i = 0; i < N_DEVS; i++) {
66607706 378 if (!i7core_dev->pdev[i])
ef708b53
MCC
379 continue;
380
66607706
MCC
381 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
382 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
383 return i7core_dev->pdev[i];
ef708b53
MCC
384 }
385 }
386
eb94fc40
MCC
387 return NULL;
388}
389
ec6df24c
MCC
390/**
391 * i7core_get_active_channels() - gets the number of channels and csrows
392 * @socket: Quick Path Interconnect socket
393 * @channels: Number of channels that will be returned
394 * @csrows: Number of csrows found
395 *
396 * Since EDAC core needs to know in advance the number of available channels
397 * and csrows, in order to allocate memory for csrows/channels, it is needed
398 * to run two similar steps. At the first step, implemented on this function,
399 * it checks the number of csrows/channels present at one socket.
400 * this is used in order to properly allocate the size of mci components.
401 *
402 * It should be noticed that none of the current available datasheets explain
403 * or even mention how csrows are seen by the memory controller. So, we need
404 * to add a fake description for csrows.
405 * So, this driver is attributing one DIMM memory for one csrow.
406 */
67166af4
MCC
407static int i7core_get_active_channels(u8 socket, unsigned *channels,
408 unsigned *csrows)
eb94fc40
MCC
409{
410 struct pci_dev *pdev = NULL;
411 int i, j;
412 u32 status, control;
413
414 *channels = 0;
415 *csrows = 0;
416
67166af4 417 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 418 if (!pdev) {
67166af4
MCC
419 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
420 socket);
ef708b53 421 return -ENODEV;
b7c76151 422 }
ef708b53
MCC
423
424 /* Device 3 function 0 reads */
425 pci_read_config_dword(pdev, MC_STATUS, &status);
426 pci_read_config_dword(pdev, MC_CONTROL, &control);
427
428 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 429 u32 dimm_dod[3];
ef708b53
MCC
430 /* Check if the channel is active */
431 if (!(control & (1 << (8 + i))))
432 continue;
433
434 /* Check if the channel is disabled */
41fcb7fe 435 if (status & (1 << i))
ef708b53 436 continue;
ef708b53 437
67166af4 438 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 439 if (!pdev) {
67166af4
MCC
440 i7core_printk(KERN_ERR, "Couldn't find socket %d "
441 "fn %d.%d!!!\n",
442 socket, i + 4, 1);
eb94fc40
MCC
443 return -ENODEV;
444 }
445 /* Devices 4-6 function 1 */
446 pci_read_config_dword(pdev,
447 MC_DOD_CH_DIMM0, &dimm_dod[0]);
448 pci_read_config_dword(pdev,
449 MC_DOD_CH_DIMM1, &dimm_dod[1]);
450 pci_read_config_dword(pdev,
451 MC_DOD_CH_DIMM2, &dimm_dod[2]);
452
ef708b53 453 (*channels)++;
eb94fc40
MCC
454
455 for (j = 0; j < 3; j++) {
456 if (!DIMM_PRESENT(dimm_dod[j]))
457 continue;
458 (*csrows)++;
459 }
ef708b53
MCC
460 }
461
c77720b9 462 debugf0("Number of active channels on socket %d: %d\n",
67166af4 463 socket, *channels);
1c6fed80 464
ef708b53
MCC
465 return 0;
466}
467
f4742949 468static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
469{
470 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 471 struct csrow_info *csr;
854d3349 472 struct pci_dev *pdev;
ba6c5c62 473 int i, j;
f4742949 474 u8 socket = pvt->i7core_dev->socket;
5566cb7c 475 unsigned long last_page = 0;
1c6fed80 476 enum edac_type mode;
854d3349 477 enum mem_type mtype;
a0c36a1f 478
854d3349 479 /* Get data from the MC register, function 0 */
f4742949 480 pdev = pvt->pci_mcr[0];
7dd6953c 481 if (!pdev)
8f331907
MCC
482 return -ENODEV;
483
f122a892 484 /* Device 3 function 0 reads */
7dd6953c
MCC
485 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
486 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
487 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
488 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 489
17cb7b0c
MCC
490 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
491 socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 492 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 493
1c6fed80 494 if (ECC_ENABLED(pvt)) {
41fcb7fe 495 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
496 if (ECCx8(pvt))
497 mode = EDAC_S8ECD8ED;
498 else
499 mode = EDAC_S4ECD4ED;
500 } else {
a0c36a1f 501 debugf0("ECC disabled\n");
1c6fed80
MCC
502 mode = EDAC_NONE;
503 }
a0c36a1f
MCC
504
505 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
506 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
507 "x%x x 0x%x\n",
854d3349
MCC
508 numdimms(pvt->info.max_dod),
509 numrank(pvt->info.max_dod >> 2),
276b824c 510 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
511 numrow(pvt->info.max_dod >> 6),
512 numcol(pvt->info.max_dod >> 9));
a0c36a1f 513
0b2b7b7e 514 for (i = 0; i < NUM_CHANS; i++) {
854d3349 515 u32 data, dimm_dod[3], value[8];
0b2b7b7e
MCC
516
517 if (!CH_ACTIVE(pvt, i)) {
518 debugf0("Channel %i is not active\n", i);
519 continue;
520 }
521 if (CH_DISABLED(pvt, i)) {
522 debugf0("Channel %i is disabled\n", i);
523 continue;
524 }
525
f122a892 526 /* Devices 4-6 function 0 */
f4742949 527 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
528 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
529
f4742949 530 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 531 4 : 2;
0b2b7b7e 532
854d3349
MCC
533 if (data & REGISTERED_DIMM)
534 mtype = MEM_RDDR3;
14d2c083 535 else
854d3349
MCC
536 mtype = MEM_DDR3;
537#if 0
0b2b7b7e
MCC
538 if (data & THREE_DIMMS_PRESENT)
539 pvt->channel[i].dimms = 3;
540 else if (data & SINGLE_QUAD_RANK_PRESENT)
541 pvt->channel[i].dimms = 1;
542 else
543 pvt->channel[i].dimms = 2;
854d3349
MCC
544#endif
545
546 /* Devices 4-6 function 1 */
f4742949 547 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 548 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 549 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 550 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 551 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 552 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 553
1c6fed80 554 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 555 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
556 i,
557 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
558 data,
f4742949 559 pvt->channel[i].ranks,
41fcb7fe 560 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
561
562 for (j = 0; j < 3; j++) {
563 u32 banks, ranks, rows, cols;
5566cb7c 564 u32 size, npages;
854d3349
MCC
565
566 if (!DIMM_PRESENT(dimm_dod[j]))
567 continue;
568
569 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
570 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
571 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
572 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
573
5566cb7c
MCC
574 /* DDR3 has 8 I/O banks */
575 size = (rows * cols * banks * ranks) >> (20 - 3);
576
f4742949 577 pvt->channel[i].dimms++;
854d3349 578
17cb7b0c
MCC
579 debugf0("\tdimm %d %d Mb offset: %x, "
580 "bank: %d, rank: %d, row: %#x, col: %#x\n",
581 j, size,
854d3349
MCC
582 RANKOFFSET(dimm_dod[j]),
583 banks, ranks, rows, cols);
584
eb94fc40
MCC
585#if PAGE_SHIFT > 20
586 npages = size >> (PAGE_SHIFT - 20);
587#else
588 npages = size << (20 - PAGE_SHIFT);
589#endif
5566cb7c 590
ba6c5c62 591 csr = &mci->csrows[*csrow];
5566cb7c
MCC
592 csr->first_page = last_page + 1;
593 last_page += npages;
594 csr->last_page = last_page;
595 csr->nr_pages = npages;
596
854d3349 597 csr->page_mask = 0;
eb94fc40 598 csr->grain = 8;
ba6c5c62 599 csr->csrow_idx = *csrow;
eb94fc40
MCC
600 csr->nr_channels = 1;
601
602 csr->channels[0].chan_idx = i;
603 csr->channels[0].ce_count = 0;
854d3349 604
f4742949 605 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 606
854d3349
MCC
607 switch (banks) {
608 case 4:
609 csr->dtype = DEV_X4;
610 break;
611 case 8:
612 csr->dtype = DEV_X8;
613 break;
614 case 16:
615 csr->dtype = DEV_X16;
616 break;
617 default:
618 csr->dtype = DEV_UNKNOWN;
619 }
620
621 csr->edac_mode = mode;
622 csr->mtype = mtype;
623
ba6c5c62 624 (*csrow)++;
854d3349 625 }
1c6fed80 626
854d3349
MCC
627 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
628 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
629 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
630 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
631 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
632 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
633 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
634 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 635 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 636 for (j = 0; j < 8; j++)
17cb7b0c 637 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
638 (value[j] >> 27) & 0x1,
639 (value[j] >> 24) & 0x7,
640 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
641 }
642
a0c36a1f
MCC
643 return 0;
644}
645
194a40fe
MCC
646/****************************************************************************
647 Error insertion routines
648 ****************************************************************************/
649
650/* The i7core has independent error injection features per channel.
651 However, to have a simpler code, we don't allow enabling error injection
652 on more than one channel.
653 Also, since a change at an inject parameter will be applied only at enable,
654 we're disabling error injection on all write calls to the sysfs nodes that
655 controls the error code injection.
656 */
8f331907 657static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
658{
659 struct i7core_pvt *pvt = mci->pvt_info;
660
661 pvt->inject.enable = 0;
662
f4742949 663 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
664 return -ENODEV;
665
f4742949 666 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 667 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
668
669 return 0;
194a40fe
MCC
670}
671
672/*
673 * i7core inject inject.section
674 *
675 * accept and store error injection inject.section value
676 * bit 0 - refers to the lower 32-byte half cacheline
677 * bit 1 - refers to the upper 32-byte half cacheline
678 */
679static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
680 const char *data, size_t count)
681{
682 struct i7core_pvt *pvt = mci->pvt_info;
683 unsigned long value;
684 int rc;
685
686 if (pvt->inject.enable)
41fcb7fe 687 disable_inject(mci);
194a40fe
MCC
688
689 rc = strict_strtoul(data, 10, &value);
690 if ((rc < 0) || (value > 3))
2068def5 691 return -EIO;
194a40fe
MCC
692
693 pvt->inject.section = (u32) value;
694 return count;
695}
696
697static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
698 char *data)
699{
700 struct i7core_pvt *pvt = mci->pvt_info;
701 return sprintf(data, "0x%08x\n", pvt->inject.section);
702}
703
704/*
705 * i7core inject.type
706 *
707 * accept and store error injection inject.section value
708 * bit 0 - repeat enable - Enable error repetition
709 * bit 1 - inject ECC error
710 * bit 2 - inject parity error
711 */
712static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
713 const char *data, size_t count)
714{
715 struct i7core_pvt *pvt = mci->pvt_info;
716 unsigned long value;
717 int rc;
718
719 if (pvt->inject.enable)
41fcb7fe 720 disable_inject(mci);
194a40fe
MCC
721
722 rc = strict_strtoul(data, 10, &value);
723 if ((rc < 0) || (value > 7))
2068def5 724 return -EIO;
194a40fe
MCC
725
726 pvt->inject.type = (u32) value;
727 return count;
728}
729
730static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
731 char *data)
732{
733 struct i7core_pvt *pvt = mci->pvt_info;
734 return sprintf(data, "0x%08x\n", pvt->inject.type);
735}
736
737/*
738 * i7core_inject_inject.eccmask_store
739 *
740 * The type of error (UE/CE) will depend on the inject.eccmask value:
741 * Any bits set to a 1 will flip the corresponding ECC bit
742 * Correctable errors can be injected by flipping 1 bit or the bits within
743 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
744 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
745 * uncorrectable error to be injected.
746 */
747static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
748 const char *data, size_t count)
749{
750 struct i7core_pvt *pvt = mci->pvt_info;
751 unsigned long value;
752 int rc;
753
754 if (pvt->inject.enable)
41fcb7fe 755 disable_inject(mci);
194a40fe
MCC
756
757 rc = strict_strtoul(data, 10, &value);
758 if (rc < 0)
2068def5 759 return -EIO;
194a40fe
MCC
760
761 pvt->inject.eccmask = (u32) value;
762 return count;
763}
764
765static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
766 char *data)
767{
768 struct i7core_pvt *pvt = mci->pvt_info;
769 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
770}
771
772/*
773 * i7core_addrmatch
774 *
775 * The type of error (UE/CE) will depend on the inject.eccmask value:
776 * Any bits set to a 1 will flip the corresponding ECC bit
777 * Correctable errors can be injected by flipping 1 bit or the bits within
778 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
779 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
780 * uncorrectable error to be injected.
781 */
782static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
783 const char *data, size_t count)
784{
785 struct i7core_pvt *pvt = mci->pvt_info;
786 char *cmd, *val;
787 long value;
788 int rc;
789
790 if (pvt->inject.enable)
41fcb7fe 791 disable_inject(mci);
194a40fe
MCC
792
793 do {
794 cmd = strsep((char **) &data, ":");
795 if (!cmd)
796 break;
797 val = strsep((char **) &data, " \n\t");
798 if (!val)
799 return cmd - data;
800
41fcb7fe 801 if (!strcasecmp(val, "any"))
194a40fe
MCC
802 value = -1;
803 else {
804 rc = strict_strtol(val, 10, &value);
805 if ((rc < 0) || (value < 0))
806 return cmd - data;
807 }
808
41fcb7fe 809 if (!strcasecmp(cmd, "channel")) {
194a40fe
MCC
810 if (value < 3)
811 pvt->inject.channel = value;
812 else
813 return cmd - data;
41fcb7fe 814 } else if (!strcasecmp(cmd, "dimm")) {
276b824c 815 if (value < 3)
194a40fe
MCC
816 pvt->inject.dimm = value;
817 else
818 return cmd - data;
41fcb7fe 819 } else if (!strcasecmp(cmd, "rank")) {
194a40fe
MCC
820 if (value < 4)
821 pvt->inject.rank = value;
822 else
823 return cmd - data;
41fcb7fe 824 } else if (!strcasecmp(cmd, "bank")) {
276b824c 825 if (value < 32)
194a40fe
MCC
826 pvt->inject.bank = value;
827 else
828 return cmd - data;
41fcb7fe 829 } else if (!strcasecmp(cmd, "page")) {
194a40fe
MCC
830 if (value <= 0xffff)
831 pvt->inject.page = value;
832 else
833 return cmd - data;
41fcb7fe
MCC
834 } else if (!strcasecmp(cmd, "col") ||
835 !strcasecmp(cmd, "column")) {
194a40fe
MCC
836 if (value <= 0x3fff)
837 pvt->inject.col = value;
838 else
839 return cmd - data;
840 }
841 } while (1);
842
843 return count;
844}
845
846static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
847 char *data)
848{
849 struct i7core_pvt *pvt = mci->pvt_info;
850 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
851
852 if (pvt->inject.channel < 0)
853 sprintf(channel, "any");
854 else
855 sprintf(channel, "%d", pvt->inject.channel);
856 if (pvt->inject.dimm < 0)
857 sprintf(dimm, "any");
858 else
859 sprintf(dimm, "%d", pvt->inject.dimm);
860 if (pvt->inject.bank < 0)
861 sprintf(bank, "any");
862 else
863 sprintf(bank, "%d", pvt->inject.bank);
864 if (pvt->inject.rank < 0)
865 sprintf(rank, "any");
866 else
867 sprintf(rank, "%d", pvt->inject.rank);
868 if (pvt->inject.page < 0)
869 sprintf(page, "any");
870 else
871 sprintf(page, "0x%04x", pvt->inject.page);
872 if (pvt->inject.col < 0)
873 sprintf(col, "any");
874 else
875 sprintf(col, "0x%04x", pvt->inject.col);
876
877 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
878 "rank: %s\npage: %s\ncolumn: %s\n",
879 channel, dimm, bank, rank, page, col);
880}
881
276b824c
MCC
882static int write_and_test(struct pci_dev *dev, int where, u32 val)
883{
884 u32 read;
885 int count;
886
4157d9f5
MCC
887 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
888 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
889 where, val);
890
276b824c
MCC
891 for (count = 0; count < 10; count++) {
892 if (count)
b990538a 893 msleep(100);
276b824c
MCC
894 pci_write_config_dword(dev, where, val);
895 pci_read_config_dword(dev, where, &read);
896
897 if (read == val)
898 return 0;
899 }
900
4157d9f5
MCC
901 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
902 "write=%08x. Read=%08x\n",
903 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
904 where, val, read);
276b824c
MCC
905
906 return -EINVAL;
907}
908
194a40fe
MCC
909/*
910 * This routine prepares the Memory Controller for error injection.
911 * The error will be injected when some process tries to write to the
912 * memory that matches the given criteria.
913 * The criteria can be set in terms of a mask where dimm, rank, bank, page
914 * and col can be specified.
915 * A -1 value for any of the mask items will make the MCU to ignore
916 * that matching criteria for error injection.
917 *
918 * It should be noticed that the error will only happen after a write operation
919 * on a memory that matches the condition. if REPEAT_EN is not enabled at
920 * inject mask, then it will produce just one error. Otherwise, it will repeat
921 * until the injectmask would be cleaned.
922 *
923 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
924 * is reliable enough to check if the MC is using the
925 * three channels. However, this is not clear at the datasheet.
926 */
927static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
928 const char *data, size_t count)
929{
930 struct i7core_pvt *pvt = mci->pvt_info;
931 u32 injectmask;
932 u64 mask = 0;
933 int rc;
934 long enable;
935
f4742949 936 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
937 return 0;
938
194a40fe
MCC
939 rc = strict_strtoul(data, 10, &enable);
940 if ((rc < 0))
941 return 0;
942
943 if (enable) {
944 pvt->inject.enable = 1;
945 } else {
946 disable_inject(mci);
947 return count;
948 }
949
950 /* Sets pvt->inject.dimm mask */
951 if (pvt->inject.dimm < 0)
7b029d03 952 mask |= 1L << 41;
194a40fe 953 else {
f4742949 954 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 955 mask |= (pvt->inject.dimm & 0x3L) << 35;
194a40fe 956 else
7b029d03 957 mask |= (pvt->inject.dimm & 0x1L) << 36;
194a40fe
MCC
958 }
959
960 /* Sets pvt->inject.rank mask */
961 if (pvt->inject.rank < 0)
7b029d03 962 mask |= 1L << 40;
194a40fe 963 else {
f4742949 964 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 965 mask |= (pvt->inject.rank & 0x1L) << 34;
194a40fe 966 else
7b029d03 967 mask |= (pvt->inject.rank & 0x3L) << 34;
194a40fe
MCC
968 }
969
970 /* Sets pvt->inject.bank mask */
971 if (pvt->inject.bank < 0)
7b029d03 972 mask |= 1L << 39;
194a40fe 973 else
7b029d03 974 mask |= (pvt->inject.bank & 0x15L) << 30;
194a40fe
MCC
975
976 /* Sets pvt->inject.page mask */
977 if (pvt->inject.page < 0)
7b029d03 978 mask |= 1L << 38;
194a40fe 979 else
7b029d03 980 mask |= (pvt->inject.page & 0xffffL) << 14;
194a40fe
MCC
981
982 /* Sets pvt->inject.column mask */
983 if (pvt->inject.col < 0)
7b029d03 984 mask |= 1L << 37;
194a40fe 985 else
7b029d03 986 mask |= (pvt->inject.col & 0x3fffL);
194a40fe 987
276b824c
MCC
988 /*
989 * bit 0: REPEAT_EN
990 * bits 1-2: MASK_HALF_CACHELINE
991 * bit 3: INJECT_ECC
992 * bit 4: INJECT_ADDR_PARITY
993 */
994
995 injectmask = (pvt->inject.type & 1) |
996 (pvt->inject.section & 0x3) << 1 |
997 (pvt->inject.type & 0x6) << (3 - 1);
998
999 /* Unlock writes to registers - this register is write only */
f4742949 1000 pci_write_config_dword(pvt->pci_noncore,
67166af4 1001 MC_CFG_CONTROL, 0x2);
e9bd2e73 1002
f4742949 1003 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1004 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1005 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1006 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1007
f4742949 1008 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1009 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1010
f4742949 1011 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1012 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1013
194a40fe 1014 /*
276b824c
MCC
1015 * This is something undocumented, based on my tests
1016 * Without writing 8 to this register, errors aren't injected. Not sure
1017 * why.
194a40fe 1018 */
f4742949 1019 pci_write_config_dword(pvt->pci_noncore,
276b824c 1020 MC_CFG_CONTROL, 8);
194a40fe 1021
41fcb7fe
MCC
1022 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1023 " inject 0x%08x\n",
194a40fe
MCC
1024 mask, pvt->inject.eccmask, injectmask);
1025
7b029d03 1026
194a40fe
MCC
1027 return count;
1028}
1029
1030static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1031 char *data)
1032{
1033 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1034 u32 injectmask;
1035
f4742949 1036 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1037 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1038
1039 debugf0("Inject error read: 0x%018x\n", injectmask);
1040
1041 if (injectmask & 0x0c)
1042 pvt->inject.enable = 1;
1043
194a40fe
MCC
1044 return sprintf(data, "%d\n", pvt->inject.enable);
1045}
1046
442305b1
MCC
1047static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1048{
f4742949 1049 unsigned i, count, total = 0;
442305b1
MCC
1050 struct i7core_pvt *pvt = mci->pvt_info;
1051
f4742949
MCC
1052 if (!pvt->ce_count_available) {
1053 count = sprintf(data, "data unavailable\n");
1054 return 0;
67166af4 1055 }
d88b8507 1056 if (!pvt->is_registered) {
f4742949
MCC
1057 count = sprintf(data, "all channels "
1058 "UDIMM0: %lu UDIMM1: %lu UDIMM2: %lu\n",
1059 pvt->udimm_ce_count[0],
1060 pvt->udimm_ce_count[1],
1061 pvt->udimm_ce_count[2]);
d88b8507
MCC
1062 data += count;
1063 total += count;
1064 } else {
f4742949
MCC
1065 for (i = 0; i < NUM_CHANS; i++) {
1066 count = sprintf(data, "channel %d RDIMM0: %lu "
1067 "RDIMM1: %lu RDIMM2: %lu\n",
1068 i,
1069 pvt->rdimm_ce_count[i][0],
1070 pvt->rdimm_ce_count[i][1],
1071 pvt->rdimm_ce_count[i][2]);
d88b8507
MCC
1072 data += count;
1073 total += count;
1074 }
1075 }
442305b1 1076
67166af4 1077 return total;
442305b1
MCC
1078}
1079
194a40fe
MCC
1080/*
1081 * Sysfs struct
1082 */
1083static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
194a40fe
MCC
1084 {
1085 .attr = {
1086 .name = "inject_section",
1087 .mode = (S_IRUGO | S_IWUSR)
1088 },
1089 .show = i7core_inject_section_show,
1090 .store = i7core_inject_section_store,
1091 }, {
1092 .attr = {
1093 .name = "inject_type",
1094 .mode = (S_IRUGO | S_IWUSR)
1095 },
1096 .show = i7core_inject_type_show,
1097 .store = i7core_inject_type_store,
1098 }, {
1099 .attr = {
1100 .name = "inject_eccmask",
1101 .mode = (S_IRUGO | S_IWUSR)
1102 },
1103 .show = i7core_inject_eccmask_show,
1104 .store = i7core_inject_eccmask_store,
1105 }, {
1106 .attr = {
1107 .name = "inject_addrmatch",
1108 .mode = (S_IRUGO | S_IWUSR)
1109 },
1110 .show = i7core_inject_addrmatch_show,
1111 .store = i7core_inject_addrmatch_store,
1112 }, {
1113 .attr = {
1114 .name = "inject_enable",
1115 .mode = (S_IRUGO | S_IWUSR)
1116 },
1117 .show = i7core_inject_enable_show,
1118 .store = i7core_inject_enable_store,
442305b1
MCC
1119 }, {
1120 .attr = {
1121 .name = "corrected_error_counts",
1122 .mode = (S_IRUGO | S_IWUSR)
1123 },
1124 .show = i7core_ce_regs_show,
1125 .store = NULL,
194a40fe 1126 },
42538680 1127 { .attr = { .name = NULL } }
194a40fe
MCC
1128};
1129
a0c36a1f
MCC
1130/****************************************************************************
1131 Device initialization routines: put/get, init/exit
1132 ****************************************************************************/
1133
1134/*
1135 * i7core_put_devices 'put' all the devices that we have
1136 * reserved via 'get'
1137 */
13d6e9b6 1138static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1139{
13d6e9b6 1140 int i;
a0c36a1f 1141
22e6bcbd
MCC
1142 debugf0(__FILE__ ": %s()\n", __func__);
1143 for (i = 0; i < N_DEVS; i++) {
1144 struct pci_dev *pdev = i7core_dev->pdev[i];
1145 if (!pdev)
1146 continue;
1147 debugf0("Removing dev %02x:%02x.%d\n",
1148 pdev->bus->number,
1149 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1150 pci_dev_put(pdev);
1151 }
13d6e9b6 1152 kfree(i7core_dev->pdev);
22e6bcbd 1153 list_del(&i7core_dev->list);
13d6e9b6
MCC
1154 kfree(i7core_dev);
1155}
66607706 1156
13d6e9b6
MCC
1157static void i7core_put_all_devices(void)
1158{
42538680 1159 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1160
42538680 1161 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1162 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1163}
1164
bc2d7245
KM
1165static void i7core_xeon_pci_fixup(void)
1166{
1167 struct pci_dev *pdev = NULL;
1168 int i;
1169 /*
1170 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1171 * aren't announced by acpi. So, we need to use a legacy scan probing
1172 * to detect them
1173 */
1174 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1175 pci_dev_descr[0].dev_id, NULL);
bc2d7245 1176 if (unlikely(!pdev)) {
f4742949 1177 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1178 pcibios_scan_specific_bus(255-i);
1179 }
1180}
1181
a0c36a1f
MCC
1182/*
1183 * i7core_get_devices Find and perform 'get' operation on the MCH's
1184 * device/functions we want to reference for this driver
1185 *
1186 * Need to 'get' device 16 func 1 and func 2
1187 */
c77720b9 1188int i7core_get_onedevice(struct pci_dev **prev, int devno)
a0c36a1f 1189{
66607706
MCC
1190 struct i7core_dev *i7core_dev;
1191
8f331907 1192 struct pci_dev *pdev = NULL;
67166af4
MCC
1193 u8 bus = 0;
1194 u8 socket = 0;
a0c36a1f 1195
c77720b9 1196 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1197 pci_dev_descr[devno].dev_id, *prev);
c77720b9 1198
c77720b9
MCC
1199 /*
1200 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1201 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1202 * to probe for the alternate address in case of failure
1203 */
66607706 1204 if (pci_dev_descr[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
c77720b9
MCC
1205 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1206 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
d1fd4fb6 1207
c77720b9
MCC
1208 if (!pdev) {
1209 if (*prev) {
1210 *prev = pdev;
1211 return 0;
d1fd4fb6
MCC
1212 }
1213
310cbb72 1214 /*
c77720b9
MCC
1215 * Dev 3 function 2 only exists on chips with RDIMMs
1216 * so, it is ok to not found it
310cbb72 1217 */
66607706 1218 if ((pci_dev_descr[devno].dev == 3) && (pci_dev_descr[devno].func == 2)) {
c77720b9
MCC
1219 *prev = pdev;
1220 return 0;
1221 }
310cbb72 1222
c77720b9
MCC
1223 i7core_printk(KERN_ERR,
1224 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1225 pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1226 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
67166af4 1227
c77720b9
MCC
1228 /* End of list, leave */
1229 return -ENODEV;
1230 }
1231 bus = pdev->bus->number;
67166af4 1232
c77720b9
MCC
1233 if (bus == 0x3f)
1234 socket = 0;
1235 else
1236 socket = 255 - bus;
1237
66607706
MCC
1238 i7core_dev = get_i7core_dev(socket);
1239 if (!i7core_dev) {
1240 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1241 if (!i7core_dev)
1242 return -ENOMEM;
1243 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * N_DEVS,
1244 GFP_KERNEL);
1245 if (!i7core_dev->pdev)
1246 return -ENOMEM;
1247 i7core_dev->socket = socket;
1248 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1249 }
67166af4 1250
66607706 1251 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1252 i7core_printk(KERN_ERR,
1253 "Duplicated device for "
1254 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1255 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1256 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1257 pci_dev_put(pdev);
1258 return -ENODEV;
1259 }
67166af4 1260
66607706 1261 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1262
1263 /* Sanity check */
66607706
MCC
1264 if (unlikely(PCI_SLOT(pdev->devfn) != pci_dev_descr[devno].dev ||
1265 PCI_FUNC(pdev->devfn) != pci_dev_descr[devno].func)) {
c77720b9
MCC
1266 i7core_printk(KERN_ERR,
1267 "Device PCI ID %04x:%04x "
1268 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
66607706 1269 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id,
c77720b9 1270 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
66607706 1271 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func);
c77720b9
MCC
1272 return -ENODEV;
1273 }
ef708b53 1274
c77720b9
MCC
1275 /* Be sure that the device is enabled */
1276 if (unlikely(pci_enable_device(pdev) < 0)) {
1277 i7core_printk(KERN_ERR,
1278 "Couldn't enable "
1279 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1280 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1281 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1282 return -ENODEV;
1283 }
ef708b53 1284
d4c27795
MCC
1285 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1286 socket, bus, pci_dev_descr[devno].dev,
1287 pci_dev_descr[devno].func,
1288 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
8f331907 1289
c77720b9 1290 *prev = pdev;
ef708b53 1291
c77720b9
MCC
1292 return 0;
1293}
a0c36a1f 1294
f4742949 1295static int i7core_get_devices(void)
c77720b9
MCC
1296{
1297 int i;
1298 struct pci_dev *pdev = NULL;
ef708b53 1299
c77720b9
MCC
1300 for (i = 0; i < N_DEVS; i++) {
1301 pdev = NULL;
1302 do {
1303 if (i7core_get_onedevice(&pdev, i) < 0) {
13d6e9b6 1304 i7core_put_all_devices();
c77720b9
MCC
1305 return -ENODEV;
1306 }
1307 } while (pdev);
1308 }
66607706 1309
ef708b53 1310 return 0;
ef708b53
MCC
1311}
1312
f4742949
MCC
1313static int mci_bind_devs(struct mem_ctl_info *mci,
1314 struct i7core_dev *i7core_dev)
ef708b53
MCC
1315{
1316 struct i7core_pvt *pvt = mci->pvt_info;
1317 struct pci_dev *pdev;
f4742949 1318 int i, func, slot;
ef708b53 1319
f4742949
MCC
1320 /* Associates i7core_dev and mci for future usage */
1321 pvt->i7core_dev = i7core_dev;
1322 i7core_dev->mci = mci;
66607706 1323
f4742949
MCC
1324 pvt->is_registered = 0;
1325 for (i = 0; i < N_DEVS; i++) {
1326 pdev = i7core_dev->pdev[i];
1327 if (!pdev)
66607706
MCC
1328 continue;
1329
f4742949
MCC
1330 func = PCI_FUNC(pdev->devfn);
1331 slot = PCI_SLOT(pdev->devfn);
1332 if (slot == 3) {
1333 if (unlikely(func > MAX_MCR_FUNC))
1334 goto error;
1335 pvt->pci_mcr[func] = pdev;
1336 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1337 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1338 goto error;
f4742949
MCC
1339 pvt->pci_ch[slot - 4][func] = pdev;
1340 } else if (!slot && !func)
1341 pvt->pci_noncore = pdev;
1342 else
1343 goto error;
ef708b53 1344
f4742949
MCC
1345 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1346 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1347 pdev, i7core_dev->socket);
14d2c083 1348
f4742949
MCC
1349 if (PCI_SLOT(pdev->devfn) == 3 &&
1350 PCI_FUNC(pdev->devfn) == 2)
1351 pvt->is_registered = 1;
a0c36a1f 1352 }
e9bd2e73 1353
a0c36a1f 1354 return 0;
ef708b53
MCC
1355
1356error:
1357 i7core_printk(KERN_ERR, "Device %d, function %d "
1358 "is out of the expected range\n",
1359 slot, func);
1360 return -EINVAL;
a0c36a1f
MCC
1361}
1362
442305b1
MCC
1363/****************************************************************************
1364 Error check routines
1365 ****************************************************************************/
f4742949 1366static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1367 int chan, int dimm, int add)
1368{
1369 char *msg;
1370 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1371 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1372
1373 for (i = 0; i < add; i++) {
1374 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1375 "(Socket=%d channel=%d dimm=%d)",
1376 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1377
1378 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1379 kfree (msg);
1380 }
1381}
1382
1383static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1384 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1385{
1386 struct i7core_pvt *pvt = mci->pvt_info;
1387 int add0 = 0, add1 = 0, add2 = 0;
1388 /* Updates CE counters if it is not the first time here */
f4742949 1389 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1390 /* Updates CE counters */
1391
f4742949
MCC
1392 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1393 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1394 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1395
1396 if (add2 < 0)
1397 add2 += 0x7fff;
f4742949 1398 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1399
1400 if (add1 < 0)
1401 add1 += 0x7fff;
f4742949 1402 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1403
1404 if (add0 < 0)
1405 add0 += 0x7fff;
f4742949 1406 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1407 } else
f4742949 1408 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1409
1410 /* Store the new values */
f4742949
MCC
1411 pvt->rdimm_last_ce_count[chan][2] = new2;
1412 pvt->rdimm_last_ce_count[chan][1] = new1;
1413 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1414
1415 /*updated the edac core */
1416 if (add0 != 0)
f4742949 1417 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1418 if (add1 != 0)
f4742949 1419 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1420 if (add2 != 0)
f4742949 1421 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1422
1423}
1424
f4742949 1425static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1426{
1427 struct i7core_pvt *pvt = mci->pvt_info;
1428 u32 rcv[3][2];
1429 int i, new0, new1, new2;
1430
1431 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1432 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1433 &rcv[0][0]);
f4742949 1434 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1435 &rcv[0][1]);
f4742949 1436 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1437 &rcv[1][0]);
f4742949 1438 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1439 &rcv[1][1]);
f4742949 1440 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1441 &rcv[2][0]);
f4742949 1442 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1443 &rcv[2][1]);
1444 for (i = 0 ; i < 3; i++) {
1445 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1446 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1447 /*if the channel has 3 dimms*/
f4742949 1448 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1449 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1450 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1451 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1452 } else {
1453 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1454 DIMM_BOT_COR_ERR(rcv[i][0]);
1455 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1456 DIMM_BOT_COR_ERR(rcv[i][1]);
1457 new2 = 0;
1458 }
1459
f4742949 1460 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1461 }
1462}
442305b1
MCC
1463
1464/* This function is based on the device 3 function 4 registers as described on:
1465 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1466 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1467 * also available at:
1468 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1469 */
f4742949 1470static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1471{
1472 struct i7core_pvt *pvt = mci->pvt_info;
1473 u32 rcv1, rcv0;
1474 int new0, new1, new2;
1475
f4742949 1476 if (!pvt->pci_mcr[4]) {
b990538a 1477 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1478 return;
1479 }
1480
b4e8f0b6 1481 /* Corrected test errors */
f4742949
MCC
1482 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1483 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1484
1485 /* Store the new values */
1486 new2 = DIMM2_COR_ERR(rcv1);
1487 new1 = DIMM1_COR_ERR(rcv0);
1488 new0 = DIMM0_COR_ERR(rcv0);
1489
442305b1 1490 /* Updates CE counters if it is not the first time here */
f4742949 1491 if (pvt->ce_count_available) {
442305b1
MCC
1492 /* Updates CE counters */
1493 int add0, add1, add2;
1494
f4742949
MCC
1495 add2 = new2 - pvt->udimm_last_ce_count[2];
1496 add1 = new1 - pvt->udimm_last_ce_count[1];
1497 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1498
1499 if (add2 < 0)
1500 add2 += 0x7fff;
f4742949 1501 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1502
1503 if (add1 < 0)
1504 add1 += 0x7fff;
f4742949 1505 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1506
1507 if (add0 < 0)
1508 add0 += 0x7fff;
f4742949 1509 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1510
1511 if (add0 | add1 | add2)
1512 i7core_printk(KERN_ERR, "New Corrected error(s): "
1513 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1514 add0, add1, add2);
442305b1 1515 } else
f4742949 1516 pvt->ce_count_available = 1;
442305b1
MCC
1517
1518 /* Store the new values */
f4742949
MCC
1519 pvt->udimm_last_ce_count[2] = new2;
1520 pvt->udimm_last_ce_count[1] = new1;
1521 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1522}
1523
8a2f118e
MCC
1524/*
1525 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1526 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1527 * Nehalem are defined as family 0x06, model 0x1a
1528 *
1529 * The MCA registers used here are the following ones:
8a2f118e 1530 * struct mce field MCA Register
f237fcf2
MCC
1531 * m->status MSR_IA32_MC8_STATUS
1532 * m->addr MSR_IA32_MC8_ADDR
1533 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1534 * In the case of Nehalem, the error information is masked at .status and .misc
1535 * fields
1536 */
d5381642
MCC
1537static void i7core_mce_output_error(struct mem_ctl_info *mci,
1538 struct mce *m)
1539{
b4e8f0b6 1540 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1541 char *type, *optype, *err, *msg;
8a2f118e 1542 unsigned long error = m->status & 0x1ff0000l;
a639539f 1543 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1544 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1545 u32 dimm = (m->misc >> 16) & 0x3;
1546 u32 channel = (m->misc >> 18) & 0x3;
1547 u32 syndrome = m->misc >> 32;
1548 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1549 int csrow;
8a2f118e 1550
c5d34528
MCC
1551 if (m->mcgstatus & 1)
1552 type = "FATAL";
1553 else
1554 type = "NON_FATAL";
1555
a639539f 1556 switch (optypenum) {
b990538a
MCC
1557 case 0:
1558 optype = "generic undef request";
1559 break;
1560 case 1:
1561 optype = "read error";
1562 break;
1563 case 2:
1564 optype = "write error";
1565 break;
1566 case 3:
1567 optype = "addr/cmd error";
1568 break;
1569 case 4:
1570 optype = "scrubbing error";
1571 break;
1572 default:
1573 optype = "reserved";
1574 break;
a639539f
MCC
1575 }
1576
8a2f118e
MCC
1577 switch (errnum) {
1578 case 16:
1579 err = "read ECC error";
1580 break;
1581 case 17:
1582 err = "RAS ECC error";
1583 break;
1584 case 18:
1585 err = "write parity error";
1586 break;
1587 case 19:
1588 err = "redundacy loss";
1589 break;
1590 case 20:
1591 err = "reserved";
1592 break;
1593 case 21:
1594 err = "memory range error";
1595 break;
1596 case 22:
1597 err = "RTID out of range";
1598 break;
1599 case 23:
1600 err = "address parity error";
1601 break;
1602 case 24:
1603 err = "byte enable parity error";
1604 break;
1605 default:
1606 err = "unknown";
d5381642 1607 }
d5381642 1608
f237fcf2 1609 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1610 msg = kasprintf(GFP_ATOMIC,
f4742949 1611 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1612 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1613 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1614 syndrome, core_err_cnt, (long long)m->status,
1615 (long long)m->misc, optype, err);
8a2f118e
MCC
1616
1617 debugf0("%s", msg);
d5381642 1618
f4742949 1619 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1620
d5381642 1621 /* Call the helper to output message */
b4e8f0b6
MCC
1622 if (m->mcgstatus & 1)
1623 edac_mc_handle_fbd_ue(mci, csrow, 0,
1624 0 /* FIXME: should be channel here */, msg);
f4742949 1625 else if (!pvt->is_registered)
b4e8f0b6
MCC
1626 edac_mc_handle_fbd_ce(mci, csrow,
1627 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1628
1629 kfree(msg);
d5381642
MCC
1630}
1631
87d1d272
MCC
1632/*
1633 * i7core_check_error Retrieve and process errors reported by the
1634 * hardware. Called by the Core module.
1635 */
1636static void i7core_check_error(struct mem_ctl_info *mci)
1637{
d5381642
MCC
1638 struct i7core_pvt *pvt = mci->pvt_info;
1639 int i;
1640 unsigned count = 0;
1641 struct mce *m = NULL;
1642 unsigned long flags;
1643
d5381642
MCC
1644 /* Copy all mce errors into a temporary buffer */
1645 spin_lock_irqsave(&pvt->mce_lock, flags);
1646 if (pvt->mce_count) {
1647 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
f4742949 1648
d5381642
MCC
1649 if (m) {
1650 count = pvt->mce_count;
1651 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1652 }
1653 pvt->mce_count = 0;
1654 }
f4742949 1655
d5381642
MCC
1656 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1657
1658 /* proccess mcelog errors */
1659 for (i = 0; i < count; i++)
1660 i7core_mce_output_error(mci, &m[i]);
1661
1662 kfree(m);
1663
1664 /* check memory count errors */
f4742949
MCC
1665 if (!pvt->is_registered)
1666 i7core_udimm_check_mc_ecc_err(mci);
1667 else
1668 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1669}
1670
d5381642
MCC
1671/*
1672 * i7core_mce_check_error Replicates mcelog routine to get errors
1673 * This routine simply queues mcelog errors, and
1674 * return. The error itself should be handled later
1675 * by i7core_check_error.
1676 */
1677static int i7core_mce_check_error(void *priv, struct mce *mce)
1678{
c5d34528
MCC
1679 struct mem_ctl_info *mci = priv;
1680 struct i7core_pvt *pvt = mci->pvt_info;
d5381642
MCC
1681 unsigned long flags;
1682
8a2f118e
MCC
1683 /*
1684 * Just let mcelog handle it if the error is
1685 * outside the memory controller
1686 */
1687 if (((mce->status & 0xffff) >> 7) != 1)
1688 return 0;
1689
f237fcf2
MCC
1690 /* Bank 8 registers are the only ones that we know how to handle */
1691 if (mce->bank != 8)
1692 return 0;
1693
f4742949 1694 /* Only handle if it is the right mc controller */
6c6aa3af
MCC
1695 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) {
1696 debugf0("mc%d: ignoring mce log for socket %d. "
1697 "Another mc should get it.\n",
1698 pvt->i7core_dev->socket,
1699 cpu_data(mce->cpu).phys_proc_id);
f4742949 1700 return 0;
6c6aa3af 1701 }
f4742949 1702
d5381642
MCC
1703 spin_lock_irqsave(&pvt->mce_lock, flags);
1704 if (pvt->mce_count < MCE_LOG_LEN) {
1705 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1706 pvt->mce_count++;
1707 }
1708 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1709
c5d34528
MCC
1710 /* Handle fatal errors immediately */
1711 if (mce->mcgstatus & 1)
1712 i7core_check_error(mci);
1713
d5381642 1714 /* Advice mcelog that the error were handled */
8a2f118e 1715 return 1;
d5381642
MCC
1716}
1717
f4742949
MCC
1718static int i7core_register_mci(struct i7core_dev *i7core_dev,
1719 int num_channels, int num_csrows)
a0c36a1f
MCC
1720{
1721 struct mem_ctl_info *mci;
1722 struct i7core_pvt *pvt;
ba6c5c62 1723 int csrow = 0;
f4742949 1724 int rc;
a0c36a1f 1725
a0c36a1f 1726 /* allocate a new MC control structure */
d4c27795
MCC
1727 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1728 i7core_dev->socket);
f4742949
MCC
1729 if (unlikely(!mci))
1730 return -ENOMEM;
a0c36a1f
MCC
1731
1732 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1733
f4742949
MCC
1734 /* record ptr to the generic device */
1735 mci->dev = &i7core_dev->pdev[0]->dev;
1736
a0c36a1f 1737 pvt = mci->pvt_info;
ef708b53 1738 memset(pvt, 0, sizeof(*pvt));
67166af4 1739
41fcb7fe
MCC
1740 /*
1741 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1742 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1743 * memory channels
1744 */
1745 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1746 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1747 mci->edac_cap = EDAC_FLAG_NONE;
1748 mci->mod_name = "i7core_edac.c";
1749 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1750 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1751 i7core_dev->socket);
1752 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1753 mci->ctl_page_to_phys = NULL;
194a40fe 1754 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
87d1d272
MCC
1755 /* Set the function pointer to an actual operation function */
1756 mci->edac_check = i7core_check_error;
8f331907 1757
ef708b53 1758 /* Store pci devices at mci for faster access */
f4742949 1759 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1760 if (unlikely(rc < 0))
f4742949 1761 goto fail;
ef708b53
MCC
1762
1763 /* Get dimm basic config */
f4742949 1764 get_dimm_config(mci, &csrow);
ef708b53 1765
a0c36a1f 1766 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1767 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1768 debugf0("MC: " __FILE__
1769 ": %s(): failed edac_mc_add_mc()\n", __func__);
1770 /* FIXME: perhaps some code should go here that disables error
1771 * reporting if we just enabled it
1772 */
b7c76151
MCC
1773
1774 rc = -EINVAL;
f4742949 1775 goto fail;
a0c36a1f
MCC
1776 }
1777
1778 /* allocating generic PCI control info */
f4742949
MCC
1779 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1780 EDAC_MOD_STR);
41fcb7fe 1781 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1782 printk(KERN_WARNING
1783 "%s(): Unable to create PCI control\n",
1784 __func__);
1785 printk(KERN_WARNING
1786 "%s(): PCI error report via EDAC not setup\n",
1787 __func__);
1788 }
1789
194a40fe 1790 /* Default error mask is any memory */
ef708b53 1791 pvt->inject.channel = 0;
194a40fe
MCC
1792 pvt->inject.dimm = -1;
1793 pvt->inject.rank = -1;
1794 pvt->inject.bank = -1;
1795 pvt->inject.page = -1;
1796 pvt->inject.col = -1;
1797
d5381642 1798 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1799 pvt->edac_mce.priv = mci;
d5381642
MCC
1800 pvt->edac_mce.check_error = i7core_mce_check_error;
1801 spin_lock_init(&pvt->mce_lock);
1802
1803 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1804 if (unlikely(rc < 0)) {
d5381642
MCC
1805 debugf0("MC: " __FILE__
1806 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1807 }
1808
1809fail:
1810 edac_mc_free(mci);
1811 return rc;
1812}
1813
1814/*
1815 * i7core_probe Probe for ONE instance of device to see if it is
1816 * present.
1817 * return:
1818 * 0 for FOUND a device
1819 * < 0 for error code
1820 */
1821static int __devinit i7core_probe(struct pci_dev *pdev,
1822 const struct pci_device_id *id)
1823{
1824 int dev_idx = id->driver_data;
1825 int rc;
1826 struct i7core_dev *i7core_dev;
1827
1828 /*
d4c27795 1829 * All memory controllers are allocated at the first pass.
f4742949
MCC
1830 */
1831 if (unlikely(dev_idx >= 1))
1832 return -EINVAL;
1833
1834 /* get the pci devices we want to reserve for our use */
1835 mutex_lock(&i7core_edac_lock);
1836 rc = i7core_get_devices();
1837 if (unlikely(rc < 0))
1838 goto fail0;
1839
1840 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1841 int channels;
1842 int csrows;
1843
1844 /* Check the number of active and not disabled channels */
1845 rc = i7core_get_active_channels(i7core_dev->socket,
1846 &channels, &csrows);
1847 if (unlikely(rc < 0))
1848 goto fail1;
1849
d4c27795
MCC
1850 rc = i7core_register_mci(i7core_dev, channels, csrows);
1851 if (unlikely(rc < 0))
1852 goto fail1;
d5381642
MCC
1853 }
1854
ef708b53 1855 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1856
66607706 1857 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1858 return 0;
1859
66607706 1860fail1:
13d6e9b6 1861 i7core_put_all_devices();
66607706
MCC
1862fail0:
1863 mutex_unlock(&i7core_edac_lock);
b7c76151 1864 return rc;
a0c36a1f
MCC
1865}
1866
1867/*
1868 * i7core_remove destructor for one instance of device
1869 *
1870 */
1871static void __devexit i7core_remove(struct pci_dev *pdev)
1872{
1873 struct mem_ctl_info *mci;
22e6bcbd 1874 struct i7core_dev *i7core_dev, *tmp;
a0c36a1f
MCC
1875
1876 debugf0(__FILE__ ": %s()\n", __func__);
1877
1878 if (i7core_pci)
1879 edac_pci_release_generic_ctl(i7core_pci);
1880
22e6bcbd
MCC
1881 /*
1882 * we have a trouble here: pdev value for removal will be wrong, since
1883 * it will point to the X58 register used to detect that the machine
1884 * is a Nehalem or upper design. However, due to the way several PCI
1885 * devices are grouped together to provide MC functionality, we need
1886 * to use a different method for releasing the devices
1887 */
87d1d272 1888
66607706 1889 mutex_lock(&i7core_edac_lock);
22e6bcbd
MCC
1890 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1891 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1892 if (mci) {
1893 struct i7core_pvt *pvt = mci->pvt_info;
1894
1895 i7core_dev = pvt->i7core_dev;
1896 edac_mce_unregister(&pvt->edac_mce);
1897 kfree(mci->ctl_name);
1898 edac_mc_free(mci);
1899 i7core_put_devices(i7core_dev);
1900 } else {
1901 i7core_printk(KERN_ERR,
1902 "Couldn't find mci for socket %d\n",
1903 i7core_dev->socket);
1904 }
1905 }
66607706 1906 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1907}
1908
a0c36a1f
MCC
1909MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1910
1911/*
1912 * i7core_driver pci_driver structure for this module
1913 *
1914 */
1915static struct pci_driver i7core_driver = {
1916 .name = "i7core_edac",
1917 .probe = i7core_probe,
1918 .remove = __devexit_p(i7core_remove),
1919 .id_table = i7core_pci_tbl,
1920};
1921
1922/*
1923 * i7core_init Module entry function
1924 * Try to initialize this module for its devices
1925 */
1926static int __init i7core_init(void)
1927{
1928 int pci_rc;
1929
1930 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1931
1932 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1933 opstate_init();
1934
bc2d7245
KM
1935 i7core_xeon_pci_fixup();
1936
a0c36a1f
MCC
1937 pci_rc = pci_register_driver(&i7core_driver);
1938
3ef288a9
MCC
1939 if (pci_rc >= 0)
1940 return 0;
1941
1942 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1943 pci_rc);
1944
1945 return pci_rc;
a0c36a1f
MCC
1946}
1947
1948/*
1949 * i7core_exit() Module exit function
1950 * Unregister the driver
1951 */
1952static void __exit i7core_exit(void)
1953{
1954 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1955 pci_unregister_driver(&i7core_driver);
1956}
1957
1958module_init(i7core_init);
1959module_exit(i7core_exit);
1960
1961MODULE_LICENSE("GPL");
1962MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1963MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1964MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1965 I7CORE_REVISION);
1966
1967module_param(edac_op_state, int, 0444);
1968MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");