aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/edac_mc.c
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-07-29 20:11:05 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-07-29 20:11:05 -0400
commitc2078e4c9120e7b38b1a02cd9fc6dd4f792110bf (patch)
treea30b29c0bf8cf2288a32ceaeb75013cb0b5d5865 /drivers/edac/edac_mc.c
parent73bcc49959e4e40911dd0dd634bf1b353827df66 (diff)
parentf58d0dee07fe6328f775669eb6aa3a123efad6c2 (diff)
Merge branch 'devel'
* devel: (33 commits) edac i5000, i5400: fix pointer math in i5000_get_mc_regs() edac: allow specifying the error count with fake_inject edac: add support for Calxeda highbank L2 cache ecc edac: add support for Calxeda highbank memory controller edac: create top-level debugfs directory sb_edac: properly handle error count i7core_edac: properly handle error count edac: edac_mc_handle_error(): add an error_count parameter edac: remove arch-specific parameter for the error handler amd64_edac: Don't pass driver name as an error parameter edac_mc: check for allocation failure in edac_mc_alloc() edac: Increase version to 3.0.0 edac_mc: Cleanup per-dimm_info debug messages edac: Convert debugfX to edac_dbg(X, edac: Use more normal debugging macro style edac: Don't add __func__ or __FILE__ for debugf[0-9] msgs Edac: Add ABI Documentation for the new device nodes edac: move documentation ABI to ABI/testing/sysfs-devices-edac i7core_edac: change the mem allocation scheme to make Documentation/kobject.txt happy edac: change the mem allocation scheme to make Documentation/kobject.txt happy ...
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r--drivers/edac/edac_mc.c395
1 files changed, 241 insertions, 154 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index de5ba86e8b89..616d90bcb3a4 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -27,70 +27,95 @@
27#include <linux/list.h> 27#include <linux/list.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/edac.h> 29#include <linux/edac.h>
30#include <linux/bitops.h>
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/page.h> 32#include <asm/page.h>
32#include <asm/edac.h> 33#include <asm/edac.h>
33#include "edac_core.h" 34#include "edac_core.h"
34#include "edac_module.h" 35#include "edac_module.h"
35 36
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
36/* lock to memory controller's control array */ 41/* lock to memory controller's control array */
37static DEFINE_MUTEX(mem_ctls_mutex); 42static DEFINE_MUTEX(mem_ctls_mutex);
38static LIST_HEAD(mc_devices); 43static LIST_HEAD(mc_devices);
39 44
45unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
46 unsigned len)
47{
48 struct mem_ctl_info *mci = dimm->mci;
49 int i, n, count = 0;
50 char *p = buf;
51
52 for (i = 0; i < mci->n_layers; i++) {
53 n = snprintf(p, len, "%s %d ",
54 edac_layer_name[mci->layers[i].type],
55 dimm->location[i]);
56 p += n;
57 len -= n;
58 count += n;
59 if (!len)
60 break;
61 }
62
63 return count;
64}
65
40#ifdef CONFIG_EDAC_DEBUG 66#ifdef CONFIG_EDAC_DEBUG
41 67
42static void edac_mc_dump_channel(struct rank_info *chan) 68static void edac_mc_dump_channel(struct rank_info *chan)
43{ 69{
44 debugf4("\tchannel = %p\n", chan); 70 edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 71 edac_dbg(4, " channel = %p\n", chan);
46 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 72 edac_dbg(4, " channel->csrow = %p\n", chan->csrow);
47 debugf4("\tchannel->dimm = %p\n", chan->dimm); 73 edac_dbg(4, " channel->dimm = %p\n", chan->dimm);
48} 74}
49 75
50static void edac_mc_dump_dimm(struct dimm_info *dimm) 76static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
51{ 77{
52 int i; 78 char location[80];
53 79
54 debugf4("\tdimm = %p\n", dimm); 80 edac_dimm_info_location(dimm, location, sizeof(location));
55 debugf4("\tdimm->label = '%s'\n", dimm->label); 81
56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 82 edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
57 debugf4("\tdimm location "); 83 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
58 for (i = 0; i < dimm->mci->n_layers; i++) { 84 number, location, dimm->csrow, dimm->cschannel);
59 printk(KERN_CONT "%d", dimm->location[i]); 85 edac_dbg(4, " dimm = %p\n", dimm);
60 if (i < dimm->mci->n_layers - 1) 86 edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
61 printk(KERN_CONT "."); 87 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
62 } 88 edac_dbg(4, " dimm->grain = %d\n", dimm->grain);
63 printk(KERN_CONT "\n"); 89 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
64 debugf4("\tdimm->grain = %d\n", dimm->grain);
65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
66} 90}
67 91
68static void edac_mc_dump_csrow(struct csrow_info *csrow) 92static void edac_mc_dump_csrow(struct csrow_info *csrow)
69{ 93{
70 debugf4("\tcsrow = %p\n", csrow); 94 edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 95 edac_dbg(4, " csrow = %p\n", csrow);
72 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 96 edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page);
73 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 97 edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page);
74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 98 edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask);
75 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 99 edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels);
76 debugf4("\tcsrow->channels = %p\n", csrow->channels); 100 edac_dbg(4, " csrow->channels = %p\n", csrow->channels);
77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 101 edac_dbg(4, " csrow->mci = %p\n", csrow->mci);
78} 102}
79 103
80static void edac_mc_dump_mci(struct mem_ctl_info *mci) 104static void edac_mc_dump_mci(struct mem_ctl_info *mci)
81{ 105{
82 debugf3("\tmci = %p\n", mci); 106 edac_dbg(3, "\tmci = %p\n", mci);
83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 107 edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 108 edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 109 edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
86 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 110 edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 111 edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
88 mci->nr_csrows, mci->csrows); 112 mci->nr_csrows, mci->csrows);
89 debugf3("\tmci->nr_dimms = %d, dimms = %p\n", 113 edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
90 mci->tot_dimms, mci->dimms); 114 mci->tot_dimms, mci->dimms);
91 debugf3("\tdev = %p\n", mci->dev); 115 edac_dbg(3, "\tdev = %p\n", mci->pdev);
92 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 116 edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
93 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 117 mci->mod_name, mci->ctl_name);
118 edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
94} 119}
95 120
96#endif /* CONFIG_EDAC_DEBUG */ 121#endif /* CONFIG_EDAC_DEBUG */
@@ -205,15 +230,15 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
205{ 230{
206 struct mem_ctl_info *mci; 231 struct mem_ctl_info *mci;
207 struct edac_mc_layer *layer; 232 struct edac_mc_layer *layer;
208 struct csrow_info *csi, *csr; 233 struct csrow_info *csr;
209 struct rank_info *chi, *chp, *chan; 234 struct rank_info *chan;
210 struct dimm_info *dimm; 235 struct dimm_info *dimm;
211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 236 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 unsigned pos[EDAC_MAX_LAYERS]; 237 unsigned pos[EDAC_MAX_LAYERS];
213 unsigned size, tot_dimms = 1, count = 1; 238 unsigned size, tot_dimms = 1, count = 1;
214 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; 239 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
215 void *pvt, *p, *ptr = NULL; 240 void *pvt, *p, *ptr = NULL;
216 int i, j, err, row, chn, n, len; 241 int i, j, row, chn, n, len, off;
217 bool per_rank = false; 242 bool per_rank = false;
218 243
219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); 244 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
@@ -239,26 +264,24 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
239 */ 264 */
240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1); 265 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); 266 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245 for (i = 0; i < n_layers; i++) { 267 for (i = 0; i < n_layers; i++) {
246 count *= layers[i].size; 268 count *= layers[i].size;
247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count); 269 edac_dbg(4, "errcount layer %d size %d\n", i, count);
248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 270 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 271 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250 tot_errcount += 2 * count; 272 tot_errcount += 2 * count;
251 } 273 }
252 274
253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount); 275 edac_dbg(4, "allocating %d error counters\n", tot_errcount);
254 pvt = edac_align_ptr(&ptr, sz_pvt, 1); 276 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
255 size = ((unsigned long)pvt) + sz_pvt; 277 size = ((unsigned long)pvt) + sz_pvt;
256 278
257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", 279 edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258 __func__, size, 280 size,
259 tot_dimms, 281 tot_dimms,
260 per_rank ? "ranks" : "dimms", 282 per_rank ? "ranks" : "dimms",
261 tot_csrows * tot_channels); 283 tot_csrows * tot_channels);
284
262 mci = kzalloc(size, GFP_KERNEL); 285 mci = kzalloc(size, GFP_KERNEL);
263 if (mci == NULL) 286 if (mci == NULL)
264 return NULL; 287 return NULL;
@@ -267,9 +290,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
267 * rather than an imaginary chunk of memory located at address 0. 290 * rather than an imaginary chunk of memory located at address 0.
268 */ 291 */
269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); 292 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
273 for (i = 0; i < n_layers; i++) { 293 for (i = 0; i < n_layers; i++) {
274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); 294 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); 295 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
@@ -278,8 +298,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
278 298
279 /* setup index and various internal pointers */ 299 /* setup index and various internal pointers */
280 mci->mc_idx = mc_num; 300 mci->mc_idx = mc_num;
281 mci->csrows = csi;
282 mci->dimms = dimm;
283 mci->tot_dimms = tot_dimms; 301 mci->tot_dimms = tot_dimms;
284 mci->pvt_info = pvt; 302 mci->pvt_info = pvt;
285 mci->n_layers = n_layers; 303 mci->n_layers = n_layers;
@@ -290,40 +308,57 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
290 mci->mem_is_per_rank = per_rank; 308 mci->mem_is_per_rank = per_rank;
291 309
292 /* 310 /*
293 * Fill the csrow struct 311 * Alocate and fill the csrow/channels structs
294 */ 312 */
313 mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
314 if (!mci->csrows)
315 goto error;
295 for (row = 0; row < tot_csrows; row++) { 316 for (row = 0; row < tot_csrows; row++) {
296 csr = &csi[row]; 317 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
318 if (!csr)
319 goto error;
320 mci->csrows[row] = csr;
297 csr->csrow_idx = row; 321 csr->csrow_idx = row;
298 csr->mci = mci; 322 csr->mci = mci;
299 csr->nr_channels = tot_channels; 323 csr->nr_channels = tot_channels;
300 chp = &chi[row * tot_channels]; 324 csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
301 csr->channels = chp; 325 GFP_KERNEL);
326 if (!csr->channels)
327 goto error;
302 328
303 for (chn = 0; chn < tot_channels; chn++) { 329 for (chn = 0; chn < tot_channels; chn++) {
304 chan = &chp[chn]; 330 chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
331 if (!chan)
332 goto error;
333 csr->channels[chn] = chan;
305 chan->chan_idx = chn; 334 chan->chan_idx = chn;
306 chan->csrow = csr; 335 chan->csrow = csr;
307 } 336 }
308 } 337 }
309 338
310 /* 339 /*
311 * Fill the dimm struct 340 * Allocate and fill the dimm structs
312 */ 341 */
342 mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
343 if (!mci->dimms)
344 goto error;
345
313 memset(&pos, 0, sizeof(pos)); 346 memset(&pos, 0, sizeof(pos));
314 row = 0; 347 row = 0;
315 chn = 0; 348 chn = 0;
316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317 per_rank ? "ranks" : "dimms");
318 for (i = 0; i < tot_dimms; i++) { 349 for (i = 0; i < tot_dimms; i++) {
319 chan = &csi[row].channels[chn]; 350 chan = mci->csrows[row]->channels[chn];
320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers, 351 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
321 pos[0], pos[1], pos[2]); 352 if (off < 0 || off >= tot_dimms) {
322 dimm->mci = mci; 353 edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
354 goto error;
355 }
323 356
324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__, 357 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms), 358 if (!dimm)
326 pos[0], pos[1], pos[2], row, chn); 359 goto error;
360 mci->dimms[off] = dimm;
361 dimm->mci = mci;
327 362
328 /* 363 /*
329 * Copy DIMM location and initialize it. 364 * Copy DIMM location and initialize it.
@@ -367,16 +402,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
367 } 402 }
368 403
369 mci->op_state = OP_ALLOC; 404 mci->op_state = OP_ALLOC;
370 INIT_LIST_HEAD(&mci->grp_kobj_list);
371
372 /*
373 * Initialize the 'root' kobj for the edac_mc controller
374 */
375 err = edac_mc_register_sysfs_main_kobj(mci);
376 if (err) {
377 kfree(mci);
378 return NULL;
379 }
380 405
381 /* at this point, the root kobj is valid, and in order to 406 /* at this point, the root kobj is valid, and in order to
382 * 'free' the object, then the function: 407 * 'free' the object, then the function:
@@ -384,7 +409,30 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
384 * which will perform kobj unregistration and the actual free 409 * which will perform kobj unregistration and the actual free
385 * will occur during the kobject callback operation 410 * will occur during the kobject callback operation
386 */ 411 */
412
387 return mci; 413 return mci;
414
415error:
416 if (mci->dimms) {
417 for (i = 0; i < tot_dimms; i++)
418 kfree(mci->dimms[i]);
419 kfree(mci->dimms);
420 }
421 if (mci->csrows) {
422 for (chn = 0; chn < tot_channels; chn++) {
423 csr = mci->csrows[chn];
424 if (csr) {
425 for (chn = 0; chn < tot_channels; chn++)
426 kfree(csr->channels[chn]);
427 kfree(csr);
428 }
429 kfree(mci->csrows[i]);
430 }
431 kfree(mci->csrows);
432 }
433 kfree(mci);
434
435 return NULL;
388} 436}
389EXPORT_SYMBOL_GPL(edac_mc_alloc); 437EXPORT_SYMBOL_GPL(edac_mc_alloc);
390 438
@@ -395,12 +443,10 @@ EXPORT_SYMBOL_GPL(edac_mc_alloc);
395 */ 443 */
396void edac_mc_free(struct mem_ctl_info *mci) 444void edac_mc_free(struct mem_ctl_info *mci)
397{ 445{
398 debugf1("%s()\n", __func__); 446 edac_dbg(1, "\n");
399 447
400 edac_mc_unregister_sysfs_main_kobj(mci); 448 /* the mci instance is freed here, when the sysfs object is dropped */
401 449 edac_unregister_sysfs(mci);
402 /* free the mci instance memory here */
403 kfree(mci);
404} 450}
405EXPORT_SYMBOL_GPL(edac_mc_free); 451EXPORT_SYMBOL_GPL(edac_mc_free);
406 452
@@ -417,12 +463,12 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
417 struct mem_ctl_info *mci; 463 struct mem_ctl_info *mci;
418 struct list_head *item; 464 struct list_head *item;
419 465
420 debugf3("%s()\n", __func__); 466 edac_dbg(3, "\n");
421 467
422 list_for_each(item, &mc_devices) { 468 list_for_each(item, &mc_devices) {
423 mci = list_entry(item, struct mem_ctl_info, link); 469 mci = list_entry(item, struct mem_ctl_info, link);
424 470
425 if (mci->dev == dev) 471 if (mci->pdev == dev)
426 return mci; 472 return mci;
427 } 473 }
428 474
@@ -485,7 +531,7 @@ static void edac_mc_workq_function(struct work_struct *work_req)
485 */ 531 */
486static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 532static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
487{ 533{
488 debugf0("%s()\n", __func__); 534 edac_dbg(0, "\n");
489 535
490 /* if this instance is not in the POLL state, then simply return */ 536 /* if this instance is not in the POLL state, then simply return */
491 if (mci->op_state != OP_RUNNING_POLL) 537 if (mci->op_state != OP_RUNNING_POLL)
@@ -512,8 +558,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
512 558
513 status = cancel_delayed_work(&mci->work); 559 status = cancel_delayed_work(&mci->work);
514 if (status == 0) { 560 if (status == 0) {
515 debugf0("%s() not canceled, flush the queue\n", 561 edac_dbg(0, "not canceled, flush the queue\n");
516 __func__);
517 562
518 /* workq instance might be running, wait for it */ 563 /* workq instance might be running, wait for it */
519 flush_workqueue(edac_workqueue); 564 flush_workqueue(edac_workqueue);
@@ -574,7 +619,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
574 619
575 insert_before = &mc_devices; 620 insert_before = &mc_devices;
576 621
577 p = find_mci_by_dev(mci->dev); 622 p = find_mci_by_dev(mci->pdev);
578 if (unlikely(p != NULL)) 623 if (unlikely(p != NULL))
579 goto fail0; 624 goto fail0;
580 625
@@ -596,7 +641,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
596 641
597fail0: 642fail0:
598 edac_printk(KERN_WARNING, EDAC_MC, 643 edac_printk(KERN_WARNING, EDAC_MC,
599 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 644 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
600 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 645 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
601 return 1; 646 return 1;
602 647
@@ -660,7 +705,7 @@ EXPORT_SYMBOL(edac_mc_find);
660/* FIXME - should a warning be printed if no error detection? correction? */ 705/* FIXME - should a warning be printed if no error detection? correction? */
661int edac_mc_add_mc(struct mem_ctl_info *mci) 706int edac_mc_add_mc(struct mem_ctl_info *mci)
662{ 707{
663 debugf0("%s()\n", __func__); 708 edac_dbg(0, "\n");
664 709
665#ifdef CONFIG_EDAC_DEBUG 710#ifdef CONFIG_EDAC_DEBUG
666 if (edac_debug_level >= 3) 711 if (edac_debug_level >= 3)
@@ -670,15 +715,22 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
670 int i; 715 int i;
671 716
672 for (i = 0; i < mci->nr_csrows; i++) { 717 for (i = 0; i < mci->nr_csrows; i++) {
718 struct csrow_info *csrow = mci->csrows[i];
719 u32 nr_pages = 0;
673 int j; 720 int j;
674 721
675 edac_mc_dump_csrow(&mci->csrows[i]); 722 for (j = 0; j < csrow->nr_channels; j++)
676 for (j = 0; j < mci->csrows[i].nr_channels; j++) 723 nr_pages += csrow->channels[j]->dimm->nr_pages;
677 edac_mc_dump_channel(&mci->csrows[i]. 724 if (!nr_pages)
678 channels[j]); 725 continue;
726 edac_mc_dump_csrow(csrow);
727 for (j = 0; j < csrow->nr_channels; j++)
728 if (csrow->channels[j]->dimm->nr_pages)
729 edac_mc_dump_channel(csrow->channels[j]);
679 } 730 }
680 for (i = 0; i < mci->tot_dimms; i++) 731 for (i = 0; i < mci->tot_dimms; i++)
681 edac_mc_dump_dimm(&mci->dimms[i]); 732 if (mci->dimms[i]->nr_pages)
733 edac_mc_dump_dimm(mci->dimms[i], i);
682 } 734 }
683#endif 735#endif
684 mutex_lock(&mem_ctls_mutex); 736 mutex_lock(&mem_ctls_mutex);
@@ -732,7 +784,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
732{ 784{
733 struct mem_ctl_info *mci; 785 struct mem_ctl_info *mci;
734 786
735 debugf0("%s()\n", __func__); 787 edac_dbg(0, "\n");
736 788
737 mutex_lock(&mem_ctls_mutex); 789 mutex_lock(&mem_ctls_mutex);
738 790
@@ -770,7 +822,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
770 void *virt_addr; 822 void *virt_addr;
771 unsigned long flags = 0; 823 unsigned long flags = 0;
772 824
773 debugf3("%s()\n", __func__); 825 edac_dbg(3, "\n");
774 826
775 /* ECC error page was not in our memory. Ignore it. */ 827 /* ECC error page was not in our memory. Ignore it. */
776 if (!pfn_valid(page)) 828 if (!pfn_valid(page))
@@ -797,26 +849,26 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
797/* FIXME - should return -1 */ 849/* FIXME - should return -1 */
798int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 850int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
799{ 851{
800 struct csrow_info *csrows = mci->csrows; 852 struct csrow_info **csrows = mci->csrows;
801 int row, i, j, n; 853 int row, i, j, n;
802 854
803 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 855 edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
804 row = -1; 856 row = -1;
805 857
806 for (i = 0; i < mci->nr_csrows; i++) { 858 for (i = 0; i < mci->nr_csrows; i++) {
807 struct csrow_info *csrow = &csrows[i]; 859 struct csrow_info *csrow = csrows[i];
808 n = 0; 860 n = 0;
809 for (j = 0; j < csrow->nr_channels; j++) { 861 for (j = 0; j < csrow->nr_channels; j++) {
810 struct dimm_info *dimm = csrow->channels[j].dimm; 862 struct dimm_info *dimm = csrow->channels[j]->dimm;
811 n += dimm->nr_pages; 863 n += dimm->nr_pages;
812 } 864 }
813 if (n == 0) 865 if (n == 0)
814 continue; 866 continue;
815 867
816 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 868 edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
817 "mask(0x%lx)\n", mci->mc_idx, __func__, 869 mci->mc_idx,
818 csrow->first_page, page, csrow->last_page, 870 csrow->first_page, page, csrow->last_page,
819 csrow->page_mask); 871 csrow->page_mask);
820 872
821 if ((page >= csrow->first_page) && 873 if ((page >= csrow->first_page) &&
822 (page <= csrow->last_page) && 874 (page <= csrow->last_page) &&
@@ -845,15 +897,16 @@ const char *edac_layer_name[] = {
845EXPORT_SYMBOL_GPL(edac_layer_name); 897EXPORT_SYMBOL_GPL(edac_layer_name);
846 898
847static void edac_inc_ce_error(struct mem_ctl_info *mci, 899static void edac_inc_ce_error(struct mem_ctl_info *mci,
848 bool enable_per_layer_report, 900 bool enable_per_layer_report,
849 const int pos[EDAC_MAX_LAYERS]) 901 const int pos[EDAC_MAX_LAYERS],
902 const u16 count)
850{ 903{
851 int i, index = 0; 904 int i, index = 0;
852 905
853 mci->ce_mc++; 906 mci->ce_mc += count;
854 907
855 if (!enable_per_layer_report) { 908 if (!enable_per_layer_report) {
856 mci->ce_noinfo_count++; 909 mci->ce_noinfo_count += count;
857 return; 910 return;
858 } 911 }
859 912
@@ -861,7 +914,7 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci,
861 if (pos[i] < 0) 914 if (pos[i] < 0)
862 break; 915 break;
863 index += pos[i]; 916 index += pos[i];
864 mci->ce_per_layer[i][index]++; 917 mci->ce_per_layer[i][index] += count;
865 918
866 if (i < mci->n_layers - 1) 919 if (i < mci->n_layers - 1)
867 index *= mci->layers[i + 1].size; 920 index *= mci->layers[i + 1].size;
@@ -870,14 +923,15 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci,
870 923
871static void edac_inc_ue_error(struct mem_ctl_info *mci, 924static void edac_inc_ue_error(struct mem_ctl_info *mci,
872 bool enable_per_layer_report, 925 bool enable_per_layer_report,
873 const int pos[EDAC_MAX_LAYERS]) 926 const int pos[EDAC_MAX_LAYERS],
927 const u16 count)
874{ 928{
875 int i, index = 0; 929 int i, index = 0;
876 930
877 mci->ue_mc++; 931 mci->ue_mc += count;
878 932
879 if (!enable_per_layer_report) { 933 if (!enable_per_layer_report) {
880 mci->ce_noinfo_count++; 934 mci->ce_noinfo_count += count;
881 return; 935 return;
882 } 936 }
883 937
@@ -885,7 +939,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
885 if (pos[i] < 0) 939 if (pos[i] < 0)
886 break; 940 break;
887 index += pos[i]; 941 index += pos[i];
888 mci->ue_per_layer[i][index]++; 942 mci->ue_per_layer[i][index] += count;
889 943
890 if (i < mci->n_layers - 1) 944 if (i < mci->n_layers - 1)
891 index *= mci->layers[i + 1].size; 945 index *= mci->layers[i + 1].size;
@@ -893,6 +947,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
893} 947}
894 948
895static void edac_ce_error(struct mem_ctl_info *mci, 949static void edac_ce_error(struct mem_ctl_info *mci,
950 const u16 error_count,
896 const int pos[EDAC_MAX_LAYERS], 951 const int pos[EDAC_MAX_LAYERS],
897 const char *msg, 952 const char *msg,
898 const char *location, 953 const char *location,
@@ -902,23 +957,25 @@ static void edac_ce_error(struct mem_ctl_info *mci,
902 const bool enable_per_layer_report, 957 const bool enable_per_layer_report,
903 const unsigned long page_frame_number, 958 const unsigned long page_frame_number,
904 const unsigned long offset_in_page, 959 const unsigned long offset_in_page,
905 u32 grain) 960 long grain)
906{ 961{
907 unsigned long remapped_page; 962 unsigned long remapped_page;
908 963
909 if (edac_mc_get_log_ce()) { 964 if (edac_mc_get_log_ce()) {
910 if (other_detail && *other_detail) 965 if (other_detail && *other_detail)
911 edac_mc_printk(mci, KERN_WARNING, 966 edac_mc_printk(mci, KERN_WARNING,
912 "CE %s on %s (%s%s - %s)\n", 967 "%d CE %s on %s (%s %s - %s)\n",
968 error_count,
913 msg, label, location, 969 msg, label, location,
914 detail, other_detail); 970 detail, other_detail);
915 else 971 else
916 edac_mc_printk(mci, KERN_WARNING, 972 edac_mc_printk(mci, KERN_WARNING,
917 "CE %s on %s (%s%s)\n", 973 "%d CE %s on %s (%s %s)\n",
974 error_count,
918 msg, label, location, 975 msg, label, location,
919 detail); 976 detail);
920 } 977 }
921 edac_inc_ce_error(mci, enable_per_layer_report, pos); 978 edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
922 979
923 if (mci->scrub_mode & SCRUB_SW_SRC) { 980 if (mci->scrub_mode & SCRUB_SW_SRC) {
924 /* 981 /*
@@ -942,6 +999,7 @@ static void edac_ce_error(struct mem_ctl_info *mci,
942} 999}
943 1000
944static void edac_ue_error(struct mem_ctl_info *mci, 1001static void edac_ue_error(struct mem_ctl_info *mci,
1002 const u16 error_count,
945 const int pos[EDAC_MAX_LAYERS], 1003 const int pos[EDAC_MAX_LAYERS],
946 const char *msg, 1004 const char *msg,
947 const char *location, 1005 const char *location,
@@ -953,12 +1011,14 @@ static void edac_ue_error(struct mem_ctl_info *mci,
953 if (edac_mc_get_log_ue()) { 1011 if (edac_mc_get_log_ue()) {
954 if (other_detail && *other_detail) 1012 if (other_detail && *other_detail)
955 edac_mc_printk(mci, KERN_WARNING, 1013 edac_mc_printk(mci, KERN_WARNING,
956 "UE %s on %s (%s%s - %s)\n", 1014 "%d UE %s on %s (%s %s - %s)\n",
1015 error_count,
957 msg, label, location, detail, 1016 msg, label, location, detail,
958 other_detail); 1017 other_detail);
959 else 1018 else
960 edac_mc_printk(mci, KERN_WARNING, 1019 edac_mc_printk(mci, KERN_WARNING,
961 "UE %s on %s (%s%s)\n", 1020 "%d UE %s on %s (%s %s)\n",
1021 error_count,
962 msg, label, location, detail); 1022 msg, label, location, detail);
963 } 1023 }
964 1024
@@ -971,33 +1031,53 @@ static void edac_ue_error(struct mem_ctl_info *mci,
971 msg, label, location, detail); 1031 msg, label, location, detail);
972 } 1032 }
973 1033
974 edac_inc_ue_error(mci, enable_per_layer_report, pos); 1034 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
975} 1035}
976 1036
977#define OTHER_LABEL " or " 1037#define OTHER_LABEL " or "
1038
1039/**
1040 * edac_mc_handle_error - reports a memory event to userspace
1041 *
1042 * @type: severity of the error (CE/UE/Fatal)
1043 * @mci: a struct mem_ctl_info pointer
1044 * @error_count: Number of errors of the same type
1045 * @page_frame_number: mem page where the error occurred
1046 * @offset_in_page: offset of the error inside the page
1047 * @syndrome: ECC syndrome
1048 * @top_layer: Memory layer[0] position
1049 * @mid_layer: Memory layer[1] position
1050 * @low_layer: Memory layer[2] position
1051 * @msg: Message meaningful to the end users that
1052 * explains the event
1053 * @other_detail: Technical details about the event that
1054 * may help hardware manufacturers and
1055 * EDAC developers to analyse the event
1056 */
978void edac_mc_handle_error(const enum hw_event_mc_err_type type, 1057void edac_mc_handle_error(const enum hw_event_mc_err_type type,
979 struct mem_ctl_info *mci, 1058 struct mem_ctl_info *mci,
1059 const u16 error_count,
980 const unsigned long page_frame_number, 1060 const unsigned long page_frame_number,
981 const unsigned long offset_in_page, 1061 const unsigned long offset_in_page,
982 const unsigned long syndrome, 1062 const unsigned long syndrome,
983 const int layer0, 1063 const int top_layer,
984 const int layer1, 1064 const int mid_layer,
985 const int layer2, 1065 const int low_layer,
986 const char *msg, 1066 const char *msg,
987 const char *other_detail, 1067 const char *other_detail)
988 const void *mcelog)
989{ 1068{
990 /* FIXME: too much for stack: move it to some pre-alocated area */ 1069 /* FIXME: too much for stack: move it to some pre-alocated area */
991 char detail[80], location[80]; 1070 char detail[80], location[80];
992 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; 1071 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
993 char *p; 1072 char *p;
994 int row = -1, chan = -1; 1073 int row = -1, chan = -1;
995 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 }; 1074 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
996 int i; 1075 int i;
997 u32 grain; 1076 long grain;
998 bool enable_per_layer_report = false; 1077 bool enable_per_layer_report = false;
1078 u8 grain_bits;
999 1079
1000 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 1080 edac_dbg(3, "MC%d\n", mci->mc_idx);
1001 1081
1002 /* 1082 /*
1003 * Check if the event report is consistent and if the memory 1083 * Check if the event report is consistent and if the memory
@@ -1043,13 +1123,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1043 p = label; 1123 p = label;
1044 *p = '\0'; 1124 *p = '\0';
1045 for (i = 0; i < mci->tot_dimms; i++) { 1125 for (i = 0; i < mci->tot_dimms; i++) {
1046 struct dimm_info *dimm = &mci->dimms[i]; 1126 struct dimm_info *dimm = mci->dimms[i];
1047 1127
1048 if (layer0 >= 0 && layer0 != dimm->location[0]) 1128 if (top_layer >= 0 && top_layer != dimm->location[0])
1049 continue; 1129 continue;
1050 if (layer1 >= 0 && layer1 != dimm->location[1]) 1130 if (mid_layer >= 0 && mid_layer != dimm->location[1])
1051 continue; 1131 continue;
1052 if (layer2 >= 0 && layer2 != dimm->location[2]) 1132 if (low_layer >= 0 && low_layer != dimm->location[2])
1053 continue; 1133 continue;
1054 1134
1055 /* get the max grain, over the error match range */ 1135 /* get the max grain, over the error match range */
@@ -1075,11 +1155,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1075 * get csrow/channel of the DIMM, in order to allow 1155 * get csrow/channel of the DIMM, in order to allow
1076 * incrementing the compat API counters 1156 * incrementing the compat API counters
1077 */ 1157 */
1078 debugf4("%s: %s csrows map: (%d,%d)\n", 1158 edac_dbg(4, "%s csrows map: (%d,%d)\n",
1079 __func__, 1159 mci->mem_is_per_rank ? "rank" : "dimm",
1080 mci->mem_is_per_rank ? "rank" : "dimm", 1160 dimm->csrow, dimm->cschannel);
1081 dimm->csrow, dimm->cschannel);
1082
1083 if (row == -1) 1161 if (row == -1)
1084 row = dimm->csrow; 1162 row = dimm->csrow;
1085 else if (row >= 0 && row != dimm->csrow) 1163 else if (row >= 0 && row != dimm->csrow)
@@ -1095,19 +1173,18 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1095 if (!enable_per_layer_report) { 1173 if (!enable_per_layer_report) {
1096 strcpy(label, "any memory"); 1174 strcpy(label, "any memory");
1097 } else { 1175 } else {
1098 debugf4("%s: csrow/channel to increment: (%d,%d)\n", 1176 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1099 __func__, row, chan);
1100 if (p == label) 1177 if (p == label)
1101 strcpy(label, "unknown memory"); 1178 strcpy(label, "unknown memory");
1102 if (type == HW_EVENT_ERR_CORRECTED) { 1179 if (type == HW_EVENT_ERR_CORRECTED) {
1103 if (row >= 0) { 1180 if (row >= 0) {
1104 mci->csrows[row].ce_count++; 1181 mci->csrows[row]->ce_count += error_count;
1105 if (chan >= 0) 1182 if (chan >= 0)
1106 mci->csrows[row].channels[chan].ce_count++; 1183 mci->csrows[row]->channels[chan]->ce_count += error_count;
1107 } 1184 }
1108 } else 1185 } else
1109 if (row >= 0) 1186 if (row >= 0)
1110 mci->csrows[row].ue_count++; 1187 mci->csrows[row]->ue_count += error_count;
1111 } 1188 }
1112 1189
1113 /* Fill the RAM location data */ 1190 /* Fill the RAM location data */
@@ -1120,23 +1197,33 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1120 edac_layer_name[mci->layers[i].type], 1197 edac_layer_name[mci->layers[i].type],
1121 pos[i]); 1198 pos[i]);
1122 } 1199 }
1200 if (p > location)
1201 *(p - 1) = '\0';
1202
1203 /* Report the error via the trace interface */
1204
1205 grain_bits = fls_long(grain) + 1;
1206 trace_mc_event(type, msg, label, error_count,
1207 mci->mc_idx, top_layer, mid_layer, low_layer,
1208 PAGES_TO_MiB(page_frame_number) | offset_in_page,
1209 grain_bits, syndrome, other_detail);
1123 1210
1124 /* Memory type dependent details about the error */ 1211 /* Memory type dependent details about the error */
1125 if (type == HW_EVENT_ERR_CORRECTED) { 1212 if (type == HW_EVENT_ERR_CORRECTED) {
1126 snprintf(detail, sizeof(detail), 1213 snprintf(detail, sizeof(detail),
1127 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx", 1214 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1128 page_frame_number, offset_in_page, 1215 page_frame_number, offset_in_page,
1129 grain, syndrome); 1216 grain, syndrome);
1130 edac_ce_error(mci, pos, msg, location, label, detail, 1217 edac_ce_error(mci, error_count, pos, msg, location, label,
1131 other_detail, enable_per_layer_report, 1218 detail, other_detail, enable_per_layer_report,
1132 page_frame_number, offset_in_page, grain); 1219 page_frame_number, offset_in_page, grain);
1133 } else { 1220 } else {
1134 snprintf(detail, sizeof(detail), 1221 snprintf(detail, sizeof(detail),
1135 "page:0x%lx offset:0x%lx grain:%d", 1222 "page:0x%lx offset:0x%lx grain:%ld",
1136 page_frame_number, offset_in_page, grain); 1223 page_frame_number, offset_in_page, grain);
1137 1224
1138 edac_ue_error(mci, pos, msg, location, label, detail, 1225 edac_ue_error(mci, error_count, pos, msg, location, label,
1139 other_detail, enable_per_layer_report); 1226 detail, other_detail, enable_per_layer_report);
1140 } 1227 }
1141} 1228}
1142EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1229EXPORT_SYMBOL_GPL(edac_mc_handle_error);