aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/mce_amd.c
diff options
context:
space:
mode:
authorJacob Shin <jacob.shin@amd.com>2005-11-05 11:25:53 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-14 22:55:13 -0500
commit89b831ef8bf5cfbb357dbc0a2e07700d7f20eec5 (patch)
tree25118081599eab69bd20d1a1b34ba0f8f679f24f /arch/x86_64/kernel/mce_amd.c
parent979edfadbae2286eec5b46143c00e81bca96498e (diff)
[PATCH] x86_64: Support for AMD specific MCE Threshold.
MC4_MISC - DRAM Errors Threshold Register realized under AMD K8 Rev F. This register is used to count correctable and uncorrectable ECC errors that occur during DRAM read operations. The user may interface through sysfs files in order to change the threshold configuration. bank%d/error_count - reads current error count, write to clear. bank%d/interrupt_enable - set/clear interrupt enable. bank%d/threshold_limit - read/write the threshold limit. APIC vector 0xF9 in hw_irq.h. 5 software defined bank ids in mce.h. new apic.c function to setup threshold apic lvt. defaults to interrupt off, count enabled, and threshold limit max. sysfs interface created on /sys/devices/system/threshold. AK: added some ifdefs to make it compile on UP Signed-off-by: Jacob Shin <jacob.shin@amd.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel/mce_amd.c')
-rw-r--r--arch/x86_64/kernel/mce_amd.c538
1 files changed, 538 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
new file mode 100644
index 000000000000..1f76175ace02
--- /dev/null
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -0,0 +1,538 @@
1/*
2 * (c) 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Written by Jacob Shin - AMD, Inc.
8 *
9 * Support : jacob.shin@amd.com
10 *
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
12 * MC4_MISC0 exists per physical processor.
13 *
14 */
15
16#include <linux/cpu.h>
17#include <linux/errno.h>
18#include <linux/init.h>
19#include <linux/interrupt.h>
20#include <linux/kobject.h>
21#include <linux/notifier.h>
22#include <linux/sched.h>
23#include <linux/smp.h>
24#include <linux/sysdev.h>
25#include <linux/sysfs.h>
26#include <asm/apic.h>
27#include <asm/mce.h>
28#include <asm/msr.h>
29#include <asm/percpu.h>
30
31#define PFX "mce_threshold: "
32#define VERSION "version 1.00.9"
33#define NR_BANKS 5
34#define THRESHOLD_MAX 0xFFF
35#define INT_TYPE_APIC 0x00020000
36#define MASK_VALID_HI 0x80000000
37#define MASK_LVTOFF_HI 0x00F00000
38#define MASK_COUNT_EN_HI 0x00080000
39#define MASK_INT_TYPE_HI 0x00060000
40#define MASK_OVERFLOW_HI 0x00010000
41#define MASK_ERR_COUNT_HI 0x00000FFF
42#define MASK_OVERFLOW 0x0001000000000000L
43
44struct threshold_bank {
45 unsigned int cpu;
46 u8 bank;
47 u8 interrupt_enable;
48 u16 threshold_limit;
49 struct kobject kobj;
50};
51
52static struct threshold_bank threshold_defaults = {
53 .interrupt_enable = 0,
54 .threshold_limit = THRESHOLD_MAX,
55};
56
57#ifdef CONFIG_SMP
58static unsigned char shared_bank[NR_BANKS] = {
59 0, 0, 0, 0, 1
60};
61#endif
62
63static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
64
65/*
66 * CPU Initialization
67 */
68
69/* must be called with correct cpu affinity */
70static void threshold_restart_bank(struct threshold_bank *b,
71 int reset, u16 old_limit)
72{
73 u32 mci_misc_hi, mci_misc_lo;
74
75 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
76
77 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
78 reset = 1; /* limit cannot be lower than err count */
79
80 if (reset) { /* reset err count and overflow bit */
81 mci_misc_hi =
82 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
83 (THRESHOLD_MAX - b->threshold_limit);
84 } else if (old_limit) { /* change limit w/o reset */
85 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
86 (old_limit - b->threshold_limit);
87 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
88 (new_count & THRESHOLD_MAX);
89 }
90
91 b->interrupt_enable ?
92 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
93 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
94
95 mci_misc_hi |= MASK_COUNT_EN_HI;
96 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
97}
98
99void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
100{
101 int bank;
102 u32 mci_misc_lo, mci_misc_hi;
103 unsigned int cpu = smp_processor_id();
104
105 for (bank = 0; bank < NR_BANKS; ++bank) {
106 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
107
108 /* !valid, !counter present, bios locked */
109 if (!(mci_misc_hi & MASK_VALID_HI) ||
110 !(mci_misc_hi & MASK_VALID_HI >> 1) ||
111 (mci_misc_hi & MASK_VALID_HI >> 2))
112 continue;
113
114 per_cpu(bank_map, cpu) |= (1 << bank);
115
116#ifdef CONFIG_SMP
117 if (shared_bank[bank] && cpu_core_id[cpu])
118 continue;
119#endif
120
121 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
122 threshold_defaults.cpu = cpu;
123 threshold_defaults.bank = bank;
124 threshold_restart_bank(&threshold_defaults, 0, 0);
125 }
126}
127
128/*
129 * APIC Interrupt Handler
130 */
131
132/*
133 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
134 * the interrupt goes off when error_count reaches threshold_limit.
135 * the handler will simply log mcelog w/ software defined bank number.
136 */
137asmlinkage void mce_threshold_interrupt(void)
138{
139 int bank;
140 struct mce m;
141
142 ack_APIC_irq();
143 irq_enter();
144
145 memset(&m, 0, sizeof(m));
146 rdtscll(m.tsc);
147 m.cpu = smp_processor_id();
148
149 /* assume first bank caused it */
150 for (bank = 0; bank < NR_BANKS; ++bank) {
151 m.bank = MCE_THRESHOLD_BASE + bank;
152 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
153
154 if (m.misc & MASK_OVERFLOW) {
155 mce_log(&m);
156 goto out;
157 }
158 }
159 out:
160 irq_exit();
161}
162
163/*
164 * Sysfs Interface
165 */
166
167static struct sysdev_class threshold_sysclass = {
168 set_kset_name("threshold"),
169};
170
171static DEFINE_PER_CPU(struct sys_device, device_threshold);
172
173struct threshold_attr {
174 struct attribute attr;
175 ssize_t(*show) (struct threshold_bank *, char *);
176 ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
177};
178
179static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
180
181static cpumask_t affinity_set(unsigned int cpu)
182{
183 cpumask_t oldmask = current->cpus_allowed;
184 cpumask_t newmask = CPU_MASK_NONE;
185 cpu_set(cpu, newmask);
186 set_cpus_allowed(current, newmask);
187 return oldmask;
188}
189
190static void affinity_restore(cpumask_t oldmask)
191{
192 set_cpus_allowed(current, oldmask);
193}
194
195#define SHOW_FIELDS(name) \
196 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
197 { \
198 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
199 }
200SHOW_FIELDS(interrupt_enable)
201SHOW_FIELDS(threshold_limit)
202
203static ssize_t store_interrupt_enable(struct threshold_bank *b,
204 const char *buf, size_t count)
205{
206 char *end;
207 cpumask_t oldmask;
208 unsigned long new = simple_strtoul(buf, &end, 0);
209 if (end == buf)
210 return -EINVAL;
211 b->interrupt_enable = !!new;
212
213 oldmask = affinity_set(b->cpu);
214 threshold_restart_bank(b, 0, 0);
215 affinity_restore(oldmask);
216
217 return end - buf;
218}
219
220static ssize_t store_threshold_limit(struct threshold_bank *b,
221 const char *buf, size_t count)
222{
223 char *end;
224 cpumask_t oldmask;
225 u16 old;
226 unsigned long new = simple_strtoul(buf, &end, 0);
227 if (end == buf)
228 return -EINVAL;
229 if (new > THRESHOLD_MAX)
230 new = THRESHOLD_MAX;
231 if (new < 1)
232 new = 1;
233 old = b->threshold_limit;
234 b->threshold_limit = new;
235
236 oldmask = affinity_set(b->cpu);
237 threshold_restart_bank(b, 0, old);
238 affinity_restore(oldmask);
239
240 return end - buf;
241}
242
243static ssize_t show_error_count(struct threshold_bank *b, char *buf)
244{
245 u32 high, low;
246 cpumask_t oldmask;
247 oldmask = affinity_set(b->cpu);
248 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
249 affinity_restore(oldmask);
250 return sprintf(buf, "%x\n",
251 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
252}
253
254static ssize_t store_error_count(struct threshold_bank *b,
255 const char *buf, size_t count)
256{
257 cpumask_t oldmask;
258 oldmask = affinity_set(b->cpu);
259 threshold_restart_bank(b, 1, 0);
260 affinity_restore(oldmask);
261 return 1;
262}
263
264#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
265 .attr = {.name = __stringify(_name), .mode = _mode }, \
266 .show = _show, \
267 .store = _store, \
268};
269
270#define ATTR_FIELDS(name) \
271 static struct threshold_attr name = \
272 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
273
274ATTR_FIELDS(interrupt_enable);
275ATTR_FIELDS(threshold_limit);
276ATTR_FIELDS(error_count);
277
278static struct attribute *default_attrs[] = {
279 &interrupt_enable.attr,
280 &threshold_limit.attr,
281 &error_count.attr,
282 NULL
283};
284
285#define to_bank(k) container_of(k,struct threshold_bank,kobj)
286#define to_attr(a) container_of(a,struct threshold_attr,attr)
287
288static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
289{
290 struct threshold_bank *b = to_bank(kobj);
291 struct threshold_attr *a = to_attr(attr);
292 ssize_t ret;
293 ret = a->show ? a->show(b, buf) : -EIO;
294 return ret;
295}
296
297static ssize_t store(struct kobject *kobj, struct attribute *attr,
298 const char *buf, size_t count)
299{
300 struct threshold_bank *b = to_bank(kobj);
301 struct threshold_attr *a = to_attr(attr);
302 ssize_t ret;
303 ret = a->store ? a->store(b, buf, count) : -EIO;
304 return ret;
305}
306
307static struct sysfs_ops threshold_ops = {
308 .show = show,
309 .store = store,
310};
311
312static struct kobj_type threshold_ktype = {
313 .sysfs_ops = &threshold_ops,
314 .default_attrs = default_attrs,
315};
316
317/* symlinks sibling shared banks to first core. first core owns dir/files. */
318static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
319{
320 int err = 0;
321 struct threshold_bank *b = 0;
322
323#ifdef CONFIG_SMP
324 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */
325 char name[16];
326 unsigned lcpu = first_cpu(cpu_core_map[cpu]);
327 if (cpu_core_id[lcpu])
328 goto out; /* first core not up yet */
329
330 b = per_cpu(threshold_banks, lcpu)[bank];
331 if (!b)
332 goto out;
333 sprintf(name, "bank%i", bank);
334 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
335 &b->kobj, name);
336 if (err)
337 goto out;
338 per_cpu(threshold_banks, cpu)[bank] = b;
339 goto out;
340 }
341#endif
342
343 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
344 if (!b) {
345 err = -ENOMEM;
346 goto out;
347 }
348 memset(b, 0, sizeof(struct threshold_bank));
349
350 b->cpu = cpu;
351 b->bank = bank;
352 b->interrupt_enable = 0;
353 b->threshold_limit = THRESHOLD_MAX;
354 kobject_set_name(&b->kobj, "bank%i", bank);
355 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
356 b->kobj.ktype = &threshold_ktype;
357
358 err = kobject_register(&b->kobj);
359 if (err) {
360 kfree(b);
361 goto out;
362 }
363 per_cpu(threshold_banks, cpu)[bank] = b;
364 out:
365 return err;
366}
367
368/* create dir/files for all valid threshold banks */
369static __cpuinit int threshold_create_device(unsigned int cpu)
370{
371 int bank;
372 int err = 0;
373
374 per_cpu(device_threshold, cpu).id = cpu;
375 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
376 err = sysdev_register(&per_cpu(device_threshold, cpu));
377 if (err)
378 goto out;
379
380 for (bank = 0; bank < NR_BANKS; ++bank) {
381 if (!(per_cpu(bank_map, cpu) & 1 << bank))
382 continue;
383 err = threshold_create_bank(cpu, bank);
384 if (err)
385 goto out;
386 }
387 out:
388 return err;
389}
390
391#ifdef CONFIG_HOTPLUG_CPU
392/*
393 * let's be hotplug friendly.
394 * in case of multiple core processors, the first core always takes ownership
395 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
396 */
397
398/* cpu hotplug call removes all symlinks before first core dies */
399static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
400{
401 struct threshold_bank *b;
402 char name[16];
403
404 b = per_cpu(threshold_banks, cpu)[bank];
405 if (!b)
406 return;
407 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
408 sprintf(name, "bank%i", bank);
409 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
410 per_cpu(threshold_banks, cpu)[bank] = 0;
411 } else {
412 kobject_unregister(&b->kobj);
413 kfree(per_cpu(threshold_banks, cpu)[bank]);
414 }
415}
416
417static __cpuinit void threshold_remove_device(unsigned int cpu)
418{
419 int bank;
420
421 for (bank = 0; bank < NR_BANKS; ++bank) {
422 if (!(per_cpu(bank_map, cpu) & 1 << bank))
423 continue;
424 threshold_remove_bank(cpu, bank);
425 }
426 sysdev_unregister(&per_cpu(device_threshold, cpu));
427}
428
429/* link all existing siblings when first core comes up */
430static __cpuinit int threshold_create_symlinks(unsigned int cpu)
431{
432 int bank, err = 0;
433 unsigned int lcpu = 0;
434
435 if (cpu_core_id[cpu])
436 return 0;
437 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
438 if (lcpu == cpu)
439 continue;
440 for (bank = 0; bank < NR_BANKS; ++bank) {
441 if (!(per_cpu(bank_map, cpu) & 1 << bank))
442 continue;
443 if (!shared_bank[bank])
444 continue;
445 err = threshold_create_bank(lcpu, bank);
446 }
447 }
448 return err;
449}
450
451/* remove all symlinks before first core dies. */
452static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
453{
454 int bank;
455 unsigned int lcpu = 0;
456 if (cpu_core_id[cpu])
457 return;
458 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
459 if (lcpu == cpu)
460 continue;
461 for (bank = 0; bank < NR_BANKS; ++bank) {
462 if (!(per_cpu(bank_map, cpu) & 1 << bank))
463 continue;
464 if (!shared_bank[bank])
465 continue;
466 threshold_remove_bank(lcpu, bank);
467 }
468 }
469}
470#else /* !CONFIG_HOTPLUG_CPU */
471static __cpuinit void threshold_create_symlinks(unsigned int cpu)
472{
473}
474static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
475{
476}
477static void threshold_remove_device(unsigned int cpu)
478{
479}
480#endif
481
482/* get notified when a cpu comes on/off */
483static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
484 unsigned long action, void *hcpu)
485{
486 /* cpu was unsigned int to begin with */
487 unsigned int cpu = (unsigned long)hcpu;
488
489 if (cpu >= NR_CPUS)
490 goto out;
491
492 switch (action) {
493 case CPU_ONLINE:
494 threshold_create_device(cpu);
495 threshold_create_symlinks(cpu);
496 break;
497 case CPU_DOWN_PREPARE:
498 threshold_remove_symlinks(cpu);
499 break;
500 case CPU_DOWN_FAILED:
501 threshold_create_symlinks(cpu);
502 break;
503 case CPU_DEAD:
504 threshold_remove_device(cpu);
505 break;
506 default:
507 break;
508 }
509 out:
510 return NOTIFY_OK;
511}
512
513static struct notifier_block threshold_cpu_notifier = {
514 .notifier_call = threshold_cpu_callback,
515};
516
517static __init int threshold_init_device(void)
518{
519 int err;
520 int lcpu = 0;
521
522 err = sysdev_class_register(&threshold_sysclass);
523 if (err)
524 goto out;
525
526 /* to hit CPUs online before the notifier is up */
527 for_each_online_cpu(lcpu) {
528 err = threshold_create_device(lcpu);
529 if (err)
530 goto out;
531 }
532 register_cpu_notifier(&threshold_cpu_notifier);
533
534 out:
535 return err;
536}
537
538device_initcall(threshold_init_device);