aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-22 19:07:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-22 19:07:45 -0400
commit5b160bd426946c85f32b15e5d34d62d2618a5a87 (patch)
treed12e53fc438587d726f5dbdb0e7f2f4742d51300 /arch/x86
parent7100e505b76b4e2efd88b2459d1a932214e29f8a (diff)
parentbb65a764de59b76323e0b72abbd9fc31401a53fa (diff)
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/mce changes from Ingo Molnar: "This tree improves the AMD thresholding bank code and includes a memory fault signal handling fixlet." * 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Fix siginfo_t->si_addr value for non-recoverable memory faults x86, MCE, AMD: Update copyrights and boilerplate x86, MCE, AMD: Give proper names to the thresholding banks x86, MCE, AMD: Make error_count read only x86, MCE, AMD: Cleanup reading of error_count x86, MCE, AMD: Print decimal thresholding values x86, MCE, AMD: Move shared bank to node descriptor x86, MCE, AMD: Remove local_allocate_... wrapper x86, MCE, AMD: Remove shared banks sysfs linking x86, amd_nb: Export model 0x10 and later PCI id
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/amd_nb.h21
-rw-r--r--arch/x86/kernel/amd_nb.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c264
4 files changed, 149 insertions, 143 deletions
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 49ad773f4b9f..b3341e9cd8fd 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -26,10 +26,31 @@ struct amd_l3_cache {
26 u8 subcaches[4]; 26 u8 subcaches[4];
27}; 27};
28 28
29struct threshold_block {
30 unsigned int block;
31 unsigned int bank;
32 unsigned int cpu;
33 u32 address;
34 u16 interrupt_enable;
35 bool interrupt_capable;
36 u16 threshold_limit;
37 struct kobject kobj;
38 struct list_head miscj;
39};
40
41struct threshold_bank {
42 struct kobject *kobj;
43 struct threshold_block *blocks;
44
45 /* initialized to the number of CPUs on the node sharing this bank */
46 atomic_t cpus;
47};
48
29struct amd_northbridge { 49struct amd_northbridge {
30 struct pci_dev *misc; 50 struct pci_dev *misc;
31 struct pci_dev *link; 51 struct pci_dev *link;
32 struct amd_l3_cache l3_cache; 52 struct amd_l3_cache l3_cache;
53 struct threshold_bank *bank4;
33}; 54};
34 55
35struct amd_northbridge_info { 56struct amd_northbridge_info {
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index f29f6dd6bc08..aadf3359e2a7 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -19,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, 21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
22 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
22 {} 23 {}
23}; 24};
24EXPORT_SYMBOL(amd_nb_misc_ids); 25EXPORT_SYMBOL(amd_nb_misc_ids);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5a5a5dc1ff15..9473e8772fd1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1190,6 +1190,7 @@ void mce_notify_process(void)
1190{ 1190{
1191 unsigned long pfn; 1191 unsigned long pfn;
1192 struct mce_info *mi = mce_find_info(); 1192 struct mce_info *mi = mce_find_info();
1193 int flags = MF_ACTION_REQUIRED;
1193 1194
1194 if (!mi) 1195 if (!mi)
1195 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); 1196 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
@@ -1204,8 +1205,9 @@ void mce_notify_process(void)
1204 * doomed. We still need to mark the page as poisoned and alert any 1205 * doomed. We still need to mark the page as poisoned and alert any
1205 * other users of the page. 1206 * other users of the page.
1206 */ 1207 */
1207 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || 1208 if (!mi->restartable)
1208 mi->restartable == 0) { 1209 flags |= MF_MUST_KILL;
1210 if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
1209 pr_err("Memory error not recovered"); 1211 pr_err("Memory error not recovered");
1210 force_sig(SIGBUS, current); 1212 force_sig(SIGBUS, current);
1211 } 1213 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f4873a64f46d..671b95a2ffb5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,15 +1,17 @@
1/* 1/*
2 * (c) 2005, 2006 Advanced Micro Devices, Inc. 2 * (c) 2005-2012 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
6 * 6 *
7 * Written by Jacob Shin - AMD, Inc. 7 * Written by Jacob Shin - AMD, Inc.
8 * 8 *
9 * Support : jacob.shin@amd.com 9 * Support: borislav.petkov@amd.com
10 * 10 *
11 * April 2006 11 * April 2006
12 * - added support for AMD Family 0x10 processors 12 * - added support for AMD Family 0x10 processors
13 * May 2012
14 * - major scrubbing
13 * 15 *
14 * All MC4_MISCi registers are shared between multi-cores 16 * All MC4_MISCi registers are shared between multi-cores
15 */ 17 */
@@ -25,6 +27,7 @@
25#include <linux/cpu.h> 27#include <linux/cpu.h>
26#include <linux/smp.h> 28#include <linux/smp.h>
27 29
30#include <asm/amd_nb.h>
28#include <asm/apic.h> 31#include <asm/apic.h>
29#include <asm/idle.h> 32#include <asm/idle.h>
30#include <asm/mce.h> 33#include <asm/mce.h>
@@ -45,23 +48,15 @@
45#define MASK_BLKPTR_LO 0xFF000000 48#define MASK_BLKPTR_LO 0xFF000000
46#define MCG_XBLK_ADDR 0xC0000400 49#define MCG_XBLK_ADDR 0xC0000400
47 50
48struct threshold_block { 51static const char * const th_names[] = {
49 unsigned int block; 52 "load_store",
50 unsigned int bank; 53 "insn_fetch",
51 unsigned int cpu; 54 "combined_unit",
52 u32 address; 55 "",
53 u16 interrupt_enable; 56 "northbridge",
54 bool interrupt_capable; 57 "execution_unit",
55 u16 threshold_limit;
56 struct kobject kobj;
57 struct list_head miscj;
58}; 58};
59 59
60struct threshold_bank {
61 struct kobject *kobj;
62 struct threshold_block *blocks;
63 cpumask_var_t cpus;
64};
65static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); 60static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
66 61
67static unsigned char shared_bank[NR_BANKS] = { 62static unsigned char shared_bank[NR_BANKS] = {
@@ -84,6 +79,26 @@ struct thresh_restart {
84 u16 old_limit; 79 u16 old_limit;
85}; 80};
86 81
82static const char * const bank4_names(struct threshold_block *b)
83{
84 switch (b->address) {
85 /* MSR4_MISC0 */
86 case 0x00000413:
87 return "dram";
88
89 case 0xc0000408:
90 return "ht_links";
91
92 case 0xc0000409:
93 return "l3_cache";
94
95 default:
96 WARN(1, "Funny MSR: 0x%08x\n", b->address);
97 return "";
98 }
99};
100
101
87static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) 102static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
88{ 103{
89 /* 104 /*
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
224 239
225 if (!block) 240 if (!block)
226 per_cpu(bank_map, cpu) |= (1 << bank); 241 per_cpu(bank_map, cpu) |= (1 << bank);
227 if (shared_bank[bank] && c->cpu_core_id)
228 break;
229 242
230 memset(&b, 0, sizeof(b)); 243 memset(&b, 0, sizeof(b));
231 b.cpu = cpu; 244 b.cpu = cpu;
@@ -326,7 +339,7 @@ struct threshold_attr {
326#define SHOW_FIELDS(name) \ 339#define SHOW_FIELDS(name) \
327static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ 340static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
328{ \ 341{ \
329 return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 342 return sprintf(buf, "%lu\n", (unsigned long) b->name); \
330} 343}
331SHOW_FIELDS(interrupt_enable) 344SHOW_FIELDS(interrupt_enable)
332SHOW_FIELDS(threshold_limit) 345SHOW_FIELDS(threshold_limit)
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
377 return size; 390 return size;
378} 391}
379 392
380struct threshold_block_cross_cpu {
381 struct threshold_block *tb;
382 long retval;
383};
384
385static void local_error_count_handler(void *_tbcc)
386{
387 struct threshold_block_cross_cpu *tbcc = _tbcc;
388 struct threshold_block *b = tbcc->tb;
389 u32 low, high;
390
391 rdmsr(b->address, low, high);
392 tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
393}
394
395static ssize_t show_error_count(struct threshold_block *b, char *buf) 393static ssize_t show_error_count(struct threshold_block *b, char *buf)
396{ 394{
397 struct threshold_block_cross_cpu tbcc = { .tb = b, }; 395 u32 lo, hi;
398 396
399 smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); 397 rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
400 return sprintf(buf, "%lx\n", tbcc.retval);
401}
402 398
403static ssize_t store_error_count(struct threshold_block *b, 399 return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
404 const char *buf, size_t count) 400 (THRESHOLD_MAX - b->threshold_limit)));
405{
406 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
407
408 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
409 return 1;
410} 401}
411 402
403static struct threshold_attr error_count = {
404 .attr = {.name = __stringify(error_count), .mode = 0444 },
405 .show = show_error_count,
406};
407
412#define RW_ATTR(val) \ 408#define RW_ATTR(val) \
413static struct threshold_attr val = { \ 409static struct threshold_attr val = { \
414 .attr = {.name = __stringify(val), .mode = 0644 }, \ 410 .attr = {.name = __stringify(val), .mode = 0644 }, \
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \
418 414
419RW_ATTR(interrupt_enable); 415RW_ATTR(interrupt_enable);
420RW_ATTR(threshold_limit); 416RW_ATTR(threshold_limit);
421RW_ATTR(error_count);
422 417
423static struct attribute *default_attrs[] = { 418static struct attribute *default_attrs[] = {
424 &threshold_limit.attr, 419 &threshold_limit.attr,
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
517 512
518 err = kobject_init_and_add(&b->kobj, &threshold_ktype, 513 err = kobject_init_and_add(&b->kobj, &threshold_ktype,
519 per_cpu(threshold_banks, cpu)[bank]->kobj, 514 per_cpu(threshold_banks, cpu)[bank]->kobj,
520 "misc%i", block); 515 (bank == 4 ? bank4_names(b) : th_names[bank]));
521 if (err) 516 if (err)
522 goto out_free; 517 goto out_free;
523recurse: 518recurse:
@@ -548,98 +543,91 @@ out_free:
548 return err; 543 return err;
549} 544}
550 545
551static __cpuinit long 546static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
552local_allocate_threshold_blocks(int cpu, unsigned int bank)
553{ 547{
554 return allocate_threshold_blocks(cpu, bank, 0, 548 struct list_head *head = &b->blocks->miscj;
555 MSR_IA32_MC0_MISC + bank * 4); 549 struct threshold_block *pos = NULL;
550 struct threshold_block *tmp = NULL;
551 int err = 0;
552
553 err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
554 if (err)
555 return err;
556
557 list_for_each_entry_safe(pos, tmp, head, miscj) {
558
559 err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
560 if (err) {
561 list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
562 kobject_del(&pos->kobj);
563
564 return err;
565 }
566 }
567 return err;
556} 568}
557 569
558/* symlinks sibling shared banks to first core. first core owns dir/files. */
559static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 570static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
560{ 571{
561 int i, err = 0;
562 struct threshold_bank *b = NULL;
563 struct device *dev = per_cpu(mce_device, cpu); 572 struct device *dev = per_cpu(mce_device, cpu);
564 char name[32]; 573 struct amd_northbridge *nb = NULL;
565 574 struct threshold_bank *b = NULL;
566 sprintf(name, "threshold_bank%i", bank); 575 const char *name = th_names[bank];
576 int err = 0;
567 577
568#ifdef CONFIG_SMP 578 if (shared_bank[bank]) {
569 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
570 i = cpumask_first(cpu_llc_shared_mask(cpu));
571 579
572 /* first core not up yet */ 580 nb = node_to_amd_nb(amd_get_nb_id(cpu));
573 if (cpu_data(i).cpu_core_id) 581 WARN_ON(!nb);
574 goto out;
575 582
576 /* already linked */ 583 /* threshold descriptor already initialized on this node? */
577 if (per_cpu(threshold_banks, cpu)[bank]) 584 if (nb->bank4) {
578 goto out; 585 /* yes, use it */
586 b = nb->bank4;
587 err = kobject_add(b->kobj, &dev->kobj, name);
588 if (err)
589 goto out;
579 590
580 b = per_cpu(threshold_banks, i)[bank]; 591 per_cpu(threshold_banks, cpu)[bank] = b;
592 atomic_inc(&b->cpus);
581 593
582 if (!b) 594 err = __threshold_add_blocks(b);
583 goto out;
584 595
585 err = sysfs_create_link(&dev->kobj, b->kobj, name);
586 if (err)
587 goto out; 596 goto out;
588 597 }
589 cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
590 per_cpu(threshold_banks, cpu)[bank] = b;
591
592 goto out;
593 } 598 }
594#endif
595 599
596 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 600 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
597 if (!b) { 601 if (!b) {
598 err = -ENOMEM; 602 err = -ENOMEM;
599 goto out; 603 goto out;
600 } 604 }
601 if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
602 kfree(b);
603 err = -ENOMEM;
604 goto out;
605 }
606 605
607 b->kobj = kobject_create_and_add(name, &dev->kobj); 606 b->kobj = kobject_create_and_add(name, &dev->kobj);
608 if (!b->kobj) 607 if (!b->kobj) {
608 err = -EINVAL;
609 goto out_free; 609 goto out_free;
610 610 }
611#ifndef CONFIG_SMP
612 cpumask_setall(b->cpus);
613#else
614 cpumask_set_cpu(cpu, b->cpus);
615#endif
616 611
617 per_cpu(threshold_banks, cpu)[bank] = b; 612 per_cpu(threshold_banks, cpu)[bank] = b;
618 613
619 err = local_allocate_threshold_blocks(cpu, bank); 614 if (shared_bank[bank]) {
620 if (err) 615 atomic_set(&b->cpus, 1);
621 goto out_free;
622
623 for_each_cpu(i, b->cpus) {
624 if (i == cpu)
625 continue;
626 616
627 dev = per_cpu(mce_device, i); 617 /* nb is already initialized, see above */
628 if (dev) 618 WARN_ON(nb->bank4);
629 err = sysfs_create_link(&dev->kobj,b->kobj, name); 619 nb->bank4 = b;
630 if (err)
631 goto out;
632
633 per_cpu(threshold_banks, i)[bank] = b;
634 } 620 }
635 621
636 goto out; 622 err = allocate_threshold_blocks(cpu, bank, 0,
623 MSR_IA32_MC0_MISC + bank * 4);
624 if (!err)
625 goto out;
637 626
638out_free: 627 out_free:
639 per_cpu(threshold_banks, cpu)[bank] = NULL;
640 free_cpumask_var(b->cpus);
641 kfree(b); 628 kfree(b);
642out: 629
630 out:
643 return err; 631 return err;
644} 632}
645 633
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
660 return err; 648 return err;
661} 649}
662 650
663/*
664 * let's be hotplug friendly.
665 * in case of multiple core processors, the first core always takes ownership
666 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
667 */
668
669static void deallocate_threshold_block(unsigned int cpu, 651static void deallocate_threshold_block(unsigned int cpu,
670 unsigned int bank) 652 unsigned int bank)
671{ 653{
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu,
686 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; 668 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
687} 669}
688 670
671static void __threshold_remove_blocks(struct threshold_bank *b)
672{
673 struct threshold_block *pos = NULL;
674 struct threshold_block *tmp = NULL;
675
676 kobject_del(b->kobj);
677
678 list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
679 kobject_del(&pos->kobj);
680}
681
689static void threshold_remove_bank(unsigned int cpu, int bank) 682static void threshold_remove_bank(unsigned int cpu, int bank)
690{ 683{
684 struct amd_northbridge *nb;
691 struct threshold_bank *b; 685 struct threshold_bank *b;
692 struct device *dev;
693 char name[32];
694 int i = 0;
695 686
696 b = per_cpu(threshold_banks, cpu)[bank]; 687 b = per_cpu(threshold_banks, cpu)[bank];
697 if (!b) 688 if (!b)
698 return; 689 return;
690
699 if (!b->blocks) 691 if (!b->blocks)
700 goto free_out; 692 goto free_out;
701 693
702 sprintf(name, "threshold_bank%i", bank); 694 if (shared_bank[bank]) {
703 695 if (!atomic_dec_and_test(&b->cpus)) {
704#ifdef CONFIG_SMP 696 __threshold_remove_blocks(b);
705 /* sibling symlink */ 697 per_cpu(threshold_banks, cpu)[bank] = NULL;
706 if (shared_bank[bank] && b->blocks->cpu != cpu) { 698 return;
707 dev = per_cpu(mce_device, cpu); 699 } else {
708 sysfs_remove_link(&dev->kobj, name); 700 /*
709 per_cpu(threshold_banks, cpu)[bank] = NULL; 701 * the last CPU on this node using the shared bank is
710 702 * going away, remove that bank now.
711 return; 703 */
712 } 704 nb = node_to_amd_nb(amd_get_nb_id(cpu));
713#endif 705 nb->bank4 = NULL;
714 706 }
715 /* remove all sibling symlinks before unregistering */
716 for_each_cpu(i, b->cpus) {
717 if (i == cpu)
718 continue;
719
720 dev = per_cpu(mce_device, i);
721 if (dev)
722 sysfs_remove_link(&dev->kobj, name);
723 per_cpu(threshold_banks, i)[bank] = NULL;
724 } 707 }
725 708
726 deallocate_threshold_block(cpu, bank); 709 deallocate_threshold_block(cpu, bank);
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
728free_out: 711free_out:
729 kobject_del(b->kobj); 712 kobject_del(b->kobj);
730 kobject_put(b->kobj); 713 kobject_put(b->kobj);
731 free_cpumask_var(b->cpus);
732 kfree(b); 714 kfree(b);
733 per_cpu(threshold_banks, cpu)[bank] = NULL; 715 per_cpu(threshold_banks, cpu)[bank] = NULL;
734} 716}