aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2016-08-20 19:27:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-21 13:58:34 -0400
commit4ec656bdf43a13a655a8259b79dd63bc1f0b1e41 (patch)
treee0c941b9985718eca0313830ed0702ffb216f1a9
parent6040e57658eee6eb1315a26119101ca832d1f854 (diff)
EDAC, skx_edac: Add EDAC driver for Skylake
This is an entirely new driver instead of yet another set of patches to sb_edac.c because: 1) Mapping from PCI devices to socket/memory controller is significantly different. Skylake scatters devices on a socket across a number of PCI buses. 2) There is an extra level of interleaving via the "mcroute" register that would be a little messy to squeeze into the old driver. 3) Validation is getting too expensive. Changes to sb_edac need to be checked against Sandy Bridge, Ivy Bridge, Haswell, Broadwell and Knights Landing. Acked-by: Aristeu Rozanski <aris@redhat.com> Acked-by: Borislav Petkov <bp@suse.de> Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--MAINTAINERS6
-rw-r--r--drivers/edac/Kconfig8
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/skx_edac.c1121
4 files changed, 1136 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index a306795a7450..0bbe4b105c34 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4525,6 +4525,12 @@ L: linux-edac@vger.kernel.org
4525S: Maintained 4525S: Maintained
4526F: drivers/edac/sb_edac.c 4526F: drivers/edac/sb_edac.c
4527 4527
4528EDAC-SKYLAKE
4529M: Tony Luck <tony.luck@intel.com>
4530L: linux-edac@vger.kernel.org
4531S: Maintained
4532F: drivers/edac/skx_edac.c
4533
4528EDAC-XGENE 4534EDAC-XGENE
4529APPLIED MICRO (APM) X-GENE SOC EDAC 4535APPLIED MICRO (APM) X-GENE SOC EDAC
4530M: Loc Ho <lho@apm.com> 4536M: Loc Ho <lho@apm.com>
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index d0c1dab9b435..dff1a4a6dc1b 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -251,6 +251,14 @@ config EDAC_SBRIDGE
251 Support for error detection and correction the Intel 251 Support for error detection and correction the Intel
252 Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. 252 Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
253 253
254config EDAC_SKX
255 tristate "Intel Skylake server Integrated MC"
256 depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
257 depends on PCI_MMCONFIG
258 help
259 Support for error detection and correction the Intel
260 Skylake server Integrated Memory Controllers.
261
254config EDAC_MPC85XX 262config EDAC_MPC85XX
255 tristate "Freescale MPC83xx / MPC85xx" 263 tristate "Freescale MPC83xx / MPC85xx"
256 depends on EDAC_MM_EDAC && FSL_SOC 264 depends on EDAC_MM_EDAC && FSL_SOC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index f9e4a3e0e6e9..986049925b08 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
31obj-$(CONFIG_EDAC_I7300) += i7300_edac.o 31obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
32obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o 32obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
33obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o 33obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
34obj-$(CONFIG_EDAC_SKX) += skx_edac.o
34obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o 35obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
35obj-$(CONFIG_EDAC_E752X) += e752x_edac.o 36obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
36obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o 37obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
new file mode 100644
index 000000000000..0ff4878c2aa1
--- /dev/null
+++ b/drivers/edac/skx_edac.c
@@ -0,0 +1,1121 @@
1/*
2 * EDAC driver for Intel(R) Xeon(R) Skylake processors
3 * Copyright (c) 2016, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/pci.h>
18#include <linux/pci_ids.h>
19#include <linux/slab.h>
20#include <linux/delay.h>
21#include <linux/edac.h>
22#include <linux/mmzone.h>
23#include <linux/smp.h>
24#include <linux/bitmap.h>
25#include <linux/math64.h>
26#include <linux/mod_devicetable.h>
27#include <asm/cpu_device_id.h>
28#include <asm/processor.h>
29#include <asm/mce.h>
30
31#include "edac_core.h"
32
33#define SKX_REVISION " Ver: 1.0 "
34
35/*
36 * Debug macros
37 */
38#define skx_printk(level, fmt, arg...) \
39 edac_printk(level, "skx", fmt, ##arg)
40
41#define skx_mc_printk(mci, level, fmt, arg...) \
42 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
43
44/*
45 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
46 */
47#define GET_BITFIELD(v, lo, hi) \
48 (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
49
50static LIST_HEAD(skx_edac_list);
51
52static u64 skx_tolm, skx_tohm;
53
54#define NUM_IMC 2 /* memory controllers per socket */
55#define NUM_CHANNELS 3 /* channels per memory controller */
56#define NUM_DIMMS 2 /* Max DIMMS per channel */
57
58#define MASK26 0x3FFFFFF /* Mask for 2^26 */
59#define MASK29 0x1FFFFFFF /* Mask for 2^29 */
60
61/*
62 * Each cpu socket contains some pci devices that provide global
63 * information, and also some that are local to each of the two
64 * memory controllers on the die.
65 */
66struct skx_dev {
67 struct list_head list;
68 u8 bus[4];
69 struct pci_dev *sad_all;
70 struct pci_dev *util_all;
71 u32 mcroute;
72 struct skx_imc {
73 struct mem_ctl_info *mci;
74 u8 mc; /* system wide mc# */
75 u8 lmc; /* socket relative mc# */
76 u8 src_id, node_id;
77 struct skx_channel {
78 struct pci_dev *cdev;
79 struct skx_dimm {
80 u8 close_pg;
81 u8 bank_xor_enable;
82 u8 fine_grain_bank;
83 u8 rowbits;
84 u8 colbits;
85 } dimms[NUM_DIMMS];
86 } chan[NUM_CHANNELS];
87 } imc[NUM_IMC];
88};
89static int skx_num_sockets;
90
91struct skx_pvt {
92 struct skx_imc *imc;
93};
94
95struct decoded_addr {
96 struct skx_dev *dev;
97 u64 addr;
98 int socket;
99 int imc;
100 int channel;
101 u64 chan_addr;
102 int sktways;
103 int chanways;
104 int dimm;
105 int rank;
106 int channel_rank;
107 u64 rank_address;
108 int row;
109 int column;
110 int bank_address;
111 int bank_group;
112};
113
114static struct skx_dev *get_skx_dev(u8 bus, u8 idx)
115{
116 struct skx_dev *d;
117
118 list_for_each_entry(d, &skx_edac_list, list) {
119 if (d->bus[idx] == bus)
120 return d;
121 }
122
123 return NULL;
124}
125
126enum munittype {
127 CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
128};
129
130struct munit {
131 u16 did;
132 u16 devfn[NUM_IMC];
133 u8 busidx;
134 u8 per_socket;
135 enum munittype mtype;
136};
137
138/*
139 * List of PCI device ids that we need together with some device
140 * number and function numbers to tell which memory controller the
141 * device belongs to.
142 */
143static const struct munit skx_all_munits[] = {
144 { 0x2054, { }, 1, 1, SAD_ALL },
145 { 0x2055, { }, 1, 1, UTIL_ALL },
146 { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
147 { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
148 { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
149 { 0x208e, { }, 1, 0, SAD },
150 { }
151};
152
153/*
154 * We use the per-socket device 0x2016 to count how many sockets are present,
155 * and to detemine which PCI buses are associated with each socket. Allocate
156 * and build the full list of all the skx_dev structures that we need here.
157 */
158static int get_all_bus_mappings(void)
159{
160 struct pci_dev *pdev, *prev;
161 struct skx_dev *d;
162 u32 reg;
163 int ndev = 0;
164
165 prev = NULL;
166 for (;;) {
167 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
168 if (!pdev)
169 break;
170 ndev++;
171 d = kzalloc(sizeof(*d), GFP_KERNEL);
172 if (!d) {
173 pci_dev_put(pdev);
174 return -ENOMEM;
175 }
176 pci_read_config_dword(pdev, 0xCC, &reg);
177 d->bus[0] = GET_BITFIELD(reg, 0, 7);
178 d->bus[1] = GET_BITFIELD(reg, 8, 15);
179 d->bus[2] = GET_BITFIELD(reg, 16, 23);
180 d->bus[3] = GET_BITFIELD(reg, 24, 31);
181 edac_dbg(2, "busses: %x, %x, %x, %x\n",
182 d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
183 list_add_tail(&d->list, &skx_edac_list);
184 skx_num_sockets++;
185 prev = pdev;
186 }
187
188 return ndev;
189}
190
191static int get_all_munits(const struct munit *m)
192{
193 struct pci_dev *pdev, *prev;
194 struct skx_dev *d;
195 u32 reg;
196 int i = 0, ndev = 0;
197
198 prev = NULL;
199 for (;;) {
200 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
201 if (!pdev)
202 break;
203 ndev++;
204 if (m->per_socket == NUM_IMC) {
205 for (i = 0; i < NUM_IMC; i++)
206 if (m->devfn[i] == pdev->devfn)
207 break;
208 if (i == NUM_IMC)
209 goto fail;
210 }
211 d = get_skx_dev(pdev->bus->number, m->busidx);
212 if (!d)
213 goto fail;
214
215 /* Be sure that the device is enabled */
216 if (unlikely(pci_enable_device(pdev) < 0)) {
217 skx_printk(KERN_ERR,
218 "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did);
219 goto fail;
220 }
221
222 switch (m->mtype) {
223 case CHAN0: case CHAN1: case CHAN2:
224 pci_dev_get(pdev);
225 d->imc[i].chan[m->mtype].cdev = pdev;
226 break;
227 case SAD_ALL:
228 pci_dev_get(pdev);
229 d->sad_all = pdev;
230 break;
231 case UTIL_ALL:
232 pci_dev_get(pdev);
233 d->util_all = pdev;
234 break;
235 case SAD:
236 /*
237 * one of these devices per core, including cores
238 * that don't exist on this SKU. Ignore any that
239 * read a route table of zero, make sure all the
240 * non-zero values match.
241 */
242 pci_read_config_dword(pdev, 0xB4, &reg);
243 if (reg != 0) {
244 if (d->mcroute == 0)
245 d->mcroute = reg;
246 else if (d->mcroute != reg) {
247 skx_printk(KERN_ERR,
248 "mcroute mismatch\n");
249 goto fail;
250 }
251 }
252 ndev--;
253 break;
254 }
255
256 prev = pdev;
257 }
258
259 return ndev;
260fail:
261 pci_dev_put(pdev);
262 return -ENODEV;
263}
264
265const struct x86_cpu_id skx_cpuids[] = {
266 { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */
267 { }
268};
269MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
270
271static u8 get_src_id(struct skx_dev *d)
272{
273 u32 reg;
274
275 pci_read_config_dword(d->util_all, 0xF0, &reg);
276
277 return GET_BITFIELD(reg, 12, 14);
278}
279
280static u8 skx_get_node_id(struct skx_dev *d)
281{
282 u32 reg;
283
284 pci_read_config_dword(d->util_all, 0xF4, &reg);
285
286 return GET_BITFIELD(reg, 0, 2);
287}
288
289static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
290 int maxval, char *name)
291{
292 u32 val = GET_BITFIELD(reg, lobit, hibit);
293
294 if (val < minval || val > maxval) {
295 edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg);
296 return -EINVAL;
297 }
298 return val + add;
299}
300
301#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15)
302
303#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks")
304#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
305#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
306
307static int get_width(u32 mtr)
308{
309 switch (GET_BITFIELD(mtr, 8, 9)) {
310 case 0:
311 return DEV_X4;
312 case 1:
313 return DEV_X8;
314 case 2:
315 return DEV_X16;
316 }
317 return DEV_UNKNOWN;
318}
319
320static int skx_get_hi_lo(void)
321{
322 struct pci_dev *pdev;
323 u32 reg;
324
325 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
326 if (!pdev) {
327 edac_dbg(0, "Can't get tolm/tohm\n");
328 return -ENODEV;
329 }
330
331 pci_read_config_dword(pdev, 0xD0, &reg);
332 skx_tolm = reg;
333 pci_read_config_dword(pdev, 0xD4, &reg);
334 skx_tohm = reg;
335 pci_read_config_dword(pdev, 0xD8, &reg);
336 skx_tohm |= (u64)reg << 32;
337
338 pci_dev_put(pdev);
339 edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm);
340
341 return 0;
342}
343
344static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
345 struct skx_imc *imc, int chan, int dimmno)
346{
347 int banks = 16, ranks, rows, cols, npages;
348 u64 size;
349
350 if (!IS_DIMM_PRESENT(mtr))
351 return 0;
352 ranks = numrank(mtr);
353 rows = numrow(mtr);
354 cols = numcol(mtr);
355
356 /*
357 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
358 */
359 size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
360 npages = MiB_TO_PAGES(size);
361
362 edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
363 imc->mc, chan, dimmno, size, npages,
364 banks, ranks, rows, cols);
365
366 imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
367 imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
368 imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
369 imc->chan[chan].dimms[dimmno].rowbits = rows;
370 imc->chan[chan].dimms[dimmno].colbits = cols;
371
372 dimm->nr_pages = npages;
373 dimm->grain = 32;
374 dimm->dtype = get_width(mtr);
375 dimm->mtype = MEM_DDR4;
376 dimm->edac_mode = EDAC_SECDED; /* likely better than this */
377 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
378 imc->src_id, imc->lmc, chan, dimmno);
379
380 return 1;
381}
382
383#define SKX_GET_MTMTR(dev, reg) \
384 pci_read_config_dword((dev), 0x87c, &reg)
385
386static bool skx_check_ecc(struct pci_dev *pdev)
387{
388 u32 mtmtr;
389
390 SKX_GET_MTMTR(pdev, mtmtr);
391
392 return !!GET_BITFIELD(mtmtr, 2, 2);
393}
394
395static int skx_get_dimm_config(struct mem_ctl_info *mci)
396{
397 struct skx_pvt *pvt = mci->pvt_info;
398 struct skx_imc *imc = pvt->imc;
399 struct dimm_info *dimm;
400 int i, j;
401 u32 mtr, amap;
402 int ndimms;
403
404 for (i = 0; i < NUM_CHANNELS; i++) {
405 ndimms = 0;
406 pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
407 for (j = 0; j < NUM_DIMMS; j++) {
408 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
409 mci->n_layers, i, j, 0);
410 pci_read_config_dword(imc->chan[i].cdev,
411 0x80 + 4*j, &mtr);
412 ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
413 }
414 if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
415 skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
416 return -ENODEV;
417 }
418 }
419
420 return 0;
421}
422
423static void skx_unregister_mci(struct skx_imc *imc)
424{
425 struct mem_ctl_info *mci = imc->mci;
426
427 if (!mci)
428 return;
429
430 edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
431
432 /* Remove MC sysfs nodes */
433 edac_mc_del_mc(mci->pdev);
434
435 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
436 kfree(mci->ctl_name);
437 edac_mc_free(mci);
438}
439
440static int skx_register_mci(struct skx_imc *imc)
441{
442 struct mem_ctl_info *mci;
443 struct edac_mc_layer layers[2];
444 struct pci_dev *pdev = imc->chan[0].cdev;
445 struct skx_pvt *pvt;
446 int rc;
447
448 /* allocate a new MC control structure */
449 layers[0].type = EDAC_MC_LAYER_CHANNEL;
450 layers[0].size = NUM_CHANNELS;
451 layers[0].is_virt_csrow = false;
452 layers[1].type = EDAC_MC_LAYER_SLOT;
453 layers[1].size = NUM_DIMMS;
454 layers[1].is_virt_csrow = true;
455 mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
456 sizeof(struct skx_pvt));
457
458 if (unlikely(!mci))
459 return -ENOMEM;
460
461 edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
462
463 /* Associate skx_dev and mci for future usage */
464 imc->mci = mci;
465 pvt = mci->pvt_info;
466 pvt->imc = imc;
467
468 mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
469 imc->node_id, imc->lmc);
470 mci->mtype_cap = MEM_FLAG_DDR4;
471 mci->edac_ctl_cap = EDAC_FLAG_NONE;
472 mci->edac_cap = EDAC_FLAG_NONE;
473 mci->mod_name = "skx_edac.c";
474 mci->dev_name = pci_name(imc->chan[0].cdev);
475 mci->mod_ver = SKX_REVISION;
476 mci->ctl_page_to_phys = NULL;
477
478 rc = skx_get_dimm_config(mci);
479 if (rc < 0)
480 goto fail;
481
482 /* record ptr to the generic device */
483 mci->pdev = &pdev->dev;
484
485 /* add this new MC control structure to EDAC's list of MCs */
486 if (unlikely(edac_mc_add_mc(mci))) {
487 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
488 rc = -EINVAL;
489 goto fail;
490 }
491
492 return 0;
493
494fail:
495 kfree(mci->ctl_name);
496 edac_mc_free(mci);
497 imc->mci = NULL;
498 return rc;
499}
500
501#define SKX_MAX_SAD 24
502
503#define SKX_GET_SAD(d, i, reg) \
504 pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg)
505#define SKX_GET_ILV(d, i, reg) \
506 pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg)
507
508#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31)
509#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27)
510#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
511#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
512#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4)
513#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
514#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0)
515
516#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0)
517#define SKX_ILV_TARGET(tgt) ((tgt) & 7)
518
519static bool skx_sad_decode(struct decoded_addr *res)
520{
521 struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
522 u64 addr = res->addr;
523 int i, idx, tgt, lchan, shift;
524 u32 sad, ilv;
525 u64 limit, prev_limit;
526 int remote = 0;
527
528 /* Simple sanity check for I/O space or out of range */
529 if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
530 edac_dbg(0, "Address %llx out of range\n", addr);
531 return false;
532 }
533
534restart:
535 prev_limit = 0;
536 for (i = 0; i < SKX_MAX_SAD; i++) {
537 SKX_GET_SAD(d, i, sad);
538 limit = SKX_SAD_LIMIT(sad);
539 if (SKX_SAD_ENABLE(sad)) {
540 if (addr >= prev_limit && addr <= limit)
541 goto sad_found;
542 }
543 prev_limit = limit + 1;
544 }
545 edac_dbg(0, "No SAD entry for %llx\n", addr);
546 return false;
547
548sad_found:
549 SKX_GET_ILV(d, i, ilv);
550
551 switch (SKX_SAD_INTERLEAVE(sad)) {
552 case 0:
553 idx = GET_BITFIELD(addr, 6, 8);
554 break;
555 case 1:
556 idx = GET_BITFIELD(addr, 8, 10);
557 break;
558 case 2:
559 idx = GET_BITFIELD(addr, 12, 14);
560 break;
561 case 3:
562 idx = GET_BITFIELD(addr, 30, 32);
563 break;
564 }
565
566 tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
567
568 /* If point to another node, find it and start over */
569 if (SKX_ILV_REMOTE(tgt)) {
570 if (remote) {
571 edac_dbg(0, "Double remote!\n");
572 return false;
573 }
574 remote = 1;
575 list_for_each_entry(d, &skx_edac_list, list) {
576 if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
577 goto restart;
578 }
579 edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
580 return false;
581 }
582
583 if (SKX_SAD_MOD3(sad) == 0)
584 lchan = SKX_ILV_TARGET(tgt);
585 else {
586 switch (SKX_SAD_MOD3MODE(sad)) {
587 case 0:
588 shift = 6;
589 break;
590 case 1:
591 shift = 8;
592 break;
593 case 2:
594 shift = 12;
595 break;
596 default:
597 edac_dbg(0, "illegal mod3mode\n");
598 return false;
599 }
600 switch (SKX_SAD_MOD3ASMOD2(sad)) {
601 case 0:
602 lchan = (addr >> shift) % 3;
603 break;
604 case 1:
605 lchan = (addr >> shift) % 2;
606 break;
607 case 2:
608 lchan = (addr >> shift) % 2;
609 lchan = (lchan << 1) | ~lchan;
610 break;
611 case 3:
612 lchan = ((addr >> shift) % 2) << 1;
613 break;
614 }
615 lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
616 }
617
618 res->dev = d;
619 res->socket = d->imc[0].src_id;
620 res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
621 res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
622
623 edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
624 res->addr, res->socket, res->imc, res->channel);
625 return true;
626}
627
628#define SKX_MAX_TAD 8
629
630#define SKX_GET_TADBASE(d, mc, i, reg) \
631 pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg)
632#define SKX_GET_TADWAYNESS(d, mc, i, reg) \
633 pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg)
634#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \
635 pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg)
636
637#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26)
638#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5)
639#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7)
640#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
641#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26)
642#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11))
643#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1)
644
645/* which bit used for both socket and channel interleave */
646static int skx_granularity[] = { 6, 8, 12, 30 };
647
648static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
649{
650 addr >>= shift;
651 addr /= ways;
652 addr <<= shift;
653
654 return addr | (lowbits & ((1ull << shift) - 1));
655}
656
657static bool skx_tad_decode(struct decoded_addr *res)
658{
659 int i;
660 u32 base, wayness, chnilvoffset;
661 int skt_interleave_bit, chn_interleave_bit;
662 u64 channel_addr;
663
664 for (i = 0; i < SKX_MAX_TAD; i++) {
665 SKX_GET_TADBASE(res->dev, res->imc, i, base);
666 SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
667 if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
668 goto tad_found;
669 }
670 edac_dbg(0, "No TAD entry for %llx\n", res->addr);
671 return false;
672
673tad_found:
674 res->sktways = SKX_TAD_SKTWAYS(wayness);
675 res->chanways = SKX_TAD_CHNWAYS(wayness);
676 skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
677 chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
678
679 SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
680 channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
681
682 if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
683 /* Must handle channel first, then socket */
684 channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
685 res->chanways, channel_addr);
686 channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
687 res->sktways, channel_addr);
688 } else {
689 /* Handle socket then channel. Preserve low bits from original address */
690 channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
691 res->sktways, res->addr);
692 channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
693 res->chanways, res->addr);
694 }
695
696 res->chan_addr = channel_addr;
697
698 edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n",
699 res->addr, res->chan_addr, res->sktways, res->chanways);
700 return true;
701}
702
703#define SKX_MAX_RIR 4
704
705#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \
706 pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
707 0x108 + 4 * (i), &reg)
708#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \
709 pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
710 0x120 + 16 * idx + 4 * (i), &reg)
711
712#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
713#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
714#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
715#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
716#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
717
718static bool skx_rir_decode(struct decoded_addr *res)
719{
720 int i, idx, chan_rank;
721 int shift;
722 u32 rirway, rirlv;
723 u64 rank_addr, prev_limit = 0, limit;
724
725 if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
726 shift = 6;
727 else
728 shift = 13;
729
730 for (i = 0; i < SKX_MAX_RIR; i++) {
731 SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
732 limit = SKX_RIR_LIMIT(rirway);
733 if (SKX_RIR_VALID(rirway)) {
734 if (prev_limit <= res->chan_addr &&
735 res->chan_addr <= limit)
736 goto rir_found;
737 }
738 prev_limit = limit;
739 }
740 edac_dbg(0, "No RIR entry for %llx\n", res->addr);
741 return false;
742
743rir_found:
744 rank_addr = res->chan_addr >> shift;
745 rank_addr /= SKX_RIR_WAYS(rirway);
746 rank_addr <<= shift;
747 rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
748
749 res->rank_address = rank_addr;
750 idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
751
752 SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
753 res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
754 chan_rank = SKX_RIR_CHAN_RANK(rirlv);
755 res->channel_rank = chan_rank;
756 res->dimm = chan_rank / 4;
757 res->rank = chan_rank % 4;
758
759 edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
760 res->addr, res->dimm, res->rank,
761 res->channel_rank, res->rank_address);
762 return true;
763}
764
765static u8 skx_close_row[] = {
766 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
767};
768static u8 skx_close_column[] = {
769 3, 4, 5, 14, 19, 23, 24, 25, 26, 27
770};
771static u8 skx_open_row[] = {
772 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
773};
774static u8 skx_open_column[] = {
775 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
776};
777static u8 skx_open_fine_column[] = {
778 3, 4, 5, 7, 8, 9, 10, 11, 12, 13
779};
780
781static int skx_bits(u64 addr, int nbits, u8 *bits)
782{
783 int i, res = 0;
784
785 for (i = 0; i < nbits; i++)
786 res |= ((addr >> bits[i]) & 1) << i;
787 return res;
788}
789
790static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
791{
792 int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
793
794 if (do_xor)
795 ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
796
797 return ret;
798}
799
800static bool skx_mad_decode(struct decoded_addr *r)
801{
802 struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
803 int bg0 = dimm->fine_grain_bank ? 6 : 13;
804
805 if (dimm->close_pg) {
806 r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
807 r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
808 r->column |= 0x400; /* C10 is autoprecharge, always set */
809 r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
810 r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
811 } else {
812 r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
813 if (dimm->fine_grain_bank)
814 r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
815 else
816 r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
817 r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
818 r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
819 }
820 r->row &= (1u << dimm->rowbits) - 1;
821
822 edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n",
823 r->addr, r->row, r->column, r->bank_address,
824 r->bank_group);
825 return true;
826}
827
828static bool skx_decode(struct decoded_addr *res)
829{
830
831 return skx_sad_decode(res) && skx_tad_decode(res) &&
832 skx_rir_decode(res) && skx_mad_decode(res);
833}
834
835#ifdef CONFIG_EDAC_DEBUG
836/*
837 * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr.
838 * Write an address to this file to exercise the address decode
839 * logic in this driver.
840 */
841static struct dentry *skx_test;
842static u64 skx_fake_addr;
843
844static int debugfs_u64_set(void *data, u64 val)
845{
846 struct decoded_addr res;
847
848 res.addr = val;
849 skx_decode(&res);
850
851 return 0;
852}
853
854DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
855
856static struct dentry *mydebugfs_create(const char *name, umode_t mode,
857 struct dentry *parent, u64 *value)
858{
859 return debugfs_create_file(name, mode, parent, value, &fops_u64_wo);
860}
861
862static void setup_skx_debug(void)
863{
864 skx_test = debugfs_create_dir("skx_edac_test", NULL);
865 mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr);
866}
867
868static void teardown_skx_debug(void)
869{
870 debugfs_remove_recursive(skx_test);
871}
872#else
873static void setup_skx_debug(void)
874{
875}
876
877static void teardown_skx_debug(void)
878{
879}
880#endif /*CONFIG_EDAC_DEBUG*/
881
882static void skx_mce_output_error(struct mem_ctl_info *mci,
883 const struct mce *m,
884 struct decoded_addr *res)
885{
886 enum hw_event_mc_err_type tp_event;
887 char *type, *optype, msg[256];
888 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
889 bool overflow = GET_BITFIELD(m->status, 62, 62);
890 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
891 bool recoverable;
892 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
893 u32 mscod = GET_BITFIELD(m->status, 16, 31);
894 u32 errcode = GET_BITFIELD(m->status, 0, 15);
895 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
896
897 recoverable = GET_BITFIELD(m->status, 56, 56);
898
899 if (uncorrected_error) {
900 if (ripv) {
901 type = "FATAL";
902 tp_event = HW_EVENT_ERR_FATAL;
903 } else {
904 type = "NON_FATAL";
905 tp_event = HW_EVENT_ERR_UNCORRECTED;
906 }
907 } else {
908 type = "CORRECTED";
909 tp_event = HW_EVENT_ERR_CORRECTED;
910 }
911
912 /*
913 * According with Table 15-9 of the Intel Architecture spec vol 3A,
914 * memory errors should fit in this mask:
915 * 000f 0000 1mmm cccc (binary)
916 * where:
917 * f = Correction Report Filtering Bit. If 1, subsequent errors
918 * won't be shown
919 * mmm = error type
920 * cccc = channel
921 * If the mask doesn't match, report an error to the parsing logic
922 */
923 if (!((errcode & 0xef80) == 0x80)) {
924 optype = "Can't parse: it is not a mem";
925 } else {
926 switch (optypenum) {
927 case 0:
928 optype = "generic undef request error";
929 break;
930 case 1:
931 optype = "memory read error";
932 break;
933 case 2:
934 optype = "memory write error";
935 break;
936 case 3:
937 optype = "addr/cmd error";
938 break;
939 case 4:
940 optype = "memory scrubbing error";
941 break;
942 default:
943 optype = "reserved";
944 break;
945 }
946 }
947
948 snprintf(msg, sizeof(msg),
949 "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
950 overflow ? " OVERFLOW" : "",
951 (uncorrected_error && recoverable) ? " recoverable" : "",
952 mscod, errcode,
953 res->socket, res->imc, res->rank,
954 res->bank_group, res->bank_address, res->row, res->column);
955
956 edac_dbg(0, "%s\n", msg);
957
958 /* Call the helper to output message */
959 edac_mc_handle_error(tp_event, mci, core_err_cnt,
960 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
961 res->channel, res->dimm, -1,
962 optype, msg);
963}
964
965static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
966 void *data)
967{
968 struct mce *mce = (struct mce *)data;
969 struct decoded_addr res;
970 struct mem_ctl_info *mci;
971 char *type;
972
973 if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
974 return NOTIFY_DONE;
975
976 /* ignore unless this is memory related with an address */
977 if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
978 return NOTIFY_DONE;
979
980 res.addr = mce->addr;
981 if (!skx_decode(&res))
982 return NOTIFY_DONE;
983 mci = res.dev->imc[res.imc].mci;
984
985 if (mce->mcgstatus & MCG_STATUS_MCIP)
986 type = "Exception";
987 else
988 type = "Event";
989
990 skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
991
992 skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
993 "Bank %d: %016Lx\n", mce->extcpu, type,
994 mce->mcgstatus, mce->bank, mce->status);
995 skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
996 skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
997 skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
998
999 skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
1000 "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
1001 mce->time, mce->socketid, mce->apicid);
1002
1003 skx_mce_output_error(mci, mce, &res);
1004
1005 return NOTIFY_DONE;
1006}
1007
1008static struct notifier_block skx_mce_dec = {
1009 .notifier_call = skx_mce_check_error,
1010};
1011
1012static void skx_remove(void)
1013{
1014 int i, j;
1015 struct skx_dev *d, *tmp;
1016
1017 edac_dbg(0, "\n");
1018
1019 list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
1020 list_del(&d->list);
1021 for (i = 0; i < NUM_IMC; i++) {
1022 skx_unregister_mci(&d->imc[i]);
1023 for (j = 0; j < NUM_CHANNELS; j++)
1024 pci_dev_put(d->imc[i].chan[j].cdev);
1025 }
1026 pci_dev_put(d->util_all);
1027 pci_dev_put(d->sad_all);
1028
1029 kfree(d);
1030 }
1031}
1032
1033/*
1034 * skx_init:
1035 * make sure we are running on the correct cpu model
1036 * search for all the devices we need
1037 * check which DIMMs are present.
1038 */
1039int __init skx_init(void)
1040{
1041 const struct x86_cpu_id *id;
1042 const struct munit *m;
1043 int rc = 0, i;
1044 u8 mc = 0, src_id, node_id;
1045 struct skx_dev *d;
1046
1047 edac_dbg(2, "\n");
1048
1049 id = x86_match_cpu(skx_cpuids);
1050 if (!id)
1051 return -ENODEV;
1052
1053 rc = skx_get_hi_lo();
1054 if (rc)
1055 return rc;
1056
1057 rc = get_all_bus_mappings();
1058 if (rc < 0)
1059 goto fail;
1060 if (rc == 0) {
1061 edac_dbg(2, "No memory controllers found\n");
1062 return -ENODEV;
1063 }
1064
1065 for (m = skx_all_munits; m->did; m++) {
1066 rc = get_all_munits(m);
1067 if (rc < 0)
1068 goto fail;
1069 if (rc != m->per_socket * skx_num_sockets) {
1070 edac_dbg(2, "Expected %d, got %d of %x\n",
1071 m->per_socket * skx_num_sockets, rc, m->did);
1072 rc = -ENODEV;
1073 goto fail;
1074 }
1075 }
1076
1077 list_for_each_entry(d, &skx_edac_list, list) {
1078 src_id = get_src_id(d);
1079 node_id = skx_get_node_id(d);
1080 edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
1081 for (i = 0; i < NUM_IMC; i++) {
1082 d->imc[i].mc = mc++;
1083 d->imc[i].lmc = i;
1084 d->imc[i].src_id = src_id;
1085 d->imc[i].node_id = node_id;
1086 rc = skx_register_mci(&d->imc[i]);
1087 if (rc < 0)
1088 goto fail;
1089 }
1090 }
1091
1092 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1093 opstate_init();
1094
1095 setup_skx_debug();
1096
1097 mce_register_decode_chain(&skx_mce_dec);
1098
1099 return 0;
1100fail:
1101 skx_remove();
1102 return rc;
1103}
1104
1105static void __exit skx_exit(void)
1106{
1107 edac_dbg(2, "\n");
1108 mce_unregister_decode_chain(&skx_mce_dec);
1109 skx_remove();
1110 teardown_skx_debug();
1111}
1112
1113module_init(skx_init);
1114module_exit(skx_exit);
1115
1116module_param(edac_op_state, int, 0444);
1117MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1118
1119MODULE_LICENSE("GPL v2");
1120MODULE_AUTHOR("Tony Luck");
1121MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");