aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYazen Ghannam <Yazen.Ghannam@amd.com>2016-09-12 03:59:34 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-09-13 09:23:10 -0400
commit5896820e0aa32572ad03b30563c539655b6c6375 (patch)
tree5ba0adc4ecbf9cca585cb1eeb8c4c53a7b5bdc7a
parent856095b1794be487527771dbd2fe28e34e94b266 (diff)
x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types
Scalable MCA defines a number of IP types. An MCA bank on an SMCA system is defined as one of these IP types. A bank's type is uniquely identified by the combination of the HWID and MCATYPE values read from its MCA_IPID register. Add the required tables in order to be able to lookup error descriptions based on a bank's type and the error's extended error code. [ bp: Align comments, simplify a bit. ] Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Link: http://lkml.kernel.org/r/1472741832-1690-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/include/asm/mce.h61
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c104
-rw-r--r--drivers/edac/mce_amd.c194
3 files changed, 147 insertions, 212 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 21bc5a3a4c89..9bd7ff5ffbcc 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -337,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
337 * Scalable MCA. 337 * Scalable MCA.
338 */ 338 */
339#ifdef CONFIG_X86_MCE_AMD 339#ifdef CONFIG_X86_MCE_AMD
340enum amd_ip_types {
341 SMCA_F17H_CORE = 0, /* Core errors */
342 SMCA_DF, /* Data Fabric */
343 SMCA_UMC, /* Unified Memory Controller */
344 SMCA_PB, /* Parameter Block */
345 SMCA_PSP, /* Platform Security Processor */
346 SMCA_SMU, /* System Management Unit */
347 N_AMD_IP_TYPES
348};
349
350struct amd_hwid {
351 const char *name;
352 unsigned int hwid;
353};
354 340
355extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; 341/* These may be used by multiple smca_hwid_mcatypes */
356 342enum smca_bank_types {
357enum amd_core_mca_blocks {
358 SMCA_LS = 0, /* Load Store */ 343 SMCA_LS = 0, /* Load Store */
359 SMCA_IF, /* Instruction Fetch */ 344 SMCA_IF, /* Instruction Fetch */
360 SMCA_L2_CACHE, /* L2 cache */ 345 SMCA_L2_CACHE, /* L2 Cache */
361 SMCA_DE, /* Decoder unit */ 346 SMCA_DE, /* Decoder Unit */
362 RES, /* Reserved */ 347 SMCA_EX, /* Execution Unit */
363 SMCA_EX, /* Execution unit */
364 SMCA_FP, /* Floating Point */ 348 SMCA_FP, /* Floating Point */
365 SMCA_L3_CACHE, /* L3 cache */ 349 SMCA_L3_CACHE, /* L3 Cache */
366 N_CORE_MCA_BLOCKS 350 SMCA_CS, /* Coherent Slave */
351 SMCA_PIE, /* Power, Interrupts, etc. */
352 SMCA_UMC, /* Unified Memory Controller */
353 SMCA_PB, /* Parameter Block */
354 SMCA_PSP, /* Platform Security Processor */
355 SMCA_SMU, /* System Management Unit */
356 N_SMCA_BANK_TYPES
357};
358
359struct smca_bank_name {
360 const char *name; /* Short name for sysfs */
361 const char *long_name; /* Long name for pretty-printing */
367}; 362};
368 363
369extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; 364extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
365
366#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
370 367
371enum amd_df_mca_blocks { 368struct smca_hwid_mcatype {
372 SMCA_CS = 0, /* Coherent Slave */ 369 unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
373 SMCA_PIE, /* Power management, Interrupts, etc */ 370 u32 hwid_mcatype; /* (hwid,mcatype) tuple */
374 N_DF_BLOCKS 371 u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
375}; 372};
376 373
377extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; 374struct smca_bank_info {
375 struct smca_hwid_mcatype *type;
376 u32 type_instance;
377};
378
379extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
380
378#endif 381#endif
379 382
380#endif /* _ASM_X86_MCE_H */ 383#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9da92fb2e073..3b74b62d0808 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -63,34 +63,55 @@ static const char * const th_names[] = {
63 "execution_unit", 63 "execution_unit",
64}; 64};
65 65
66/* Define HWID to IP type mappings for Scalable MCA */ 66struct smca_bank_name smca_bank_names[] = {
67struct amd_hwid amd_hwids[] = { 67 [SMCA_LS] = { "load_store", "Load Store Unit" },
68 [SMCA_F17H_CORE] = { "f17h_core", 0xB0 }, 68 [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
69 [SMCA_DF] = { "data_fabric", 0x2E }, 69 [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
70 [SMCA_UMC] = { "umc", 0x96 }, 70 [SMCA_DE] = { "decode_unit", "Decode Unit" },
71 [SMCA_PB] = { "param_block", 0x5 }, 71 [SMCA_EX] = { "execution_unit", "Execution Unit" },
72 [SMCA_PSP] = { "psp", 0xFF }, 72 [SMCA_FP] = { "floating_point", "Floating Point Unit" },
73 [SMCA_SMU] = { "smu", 0x1 }, 73 [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
74 [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
75 [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
76 [SMCA_UMC] = { "umc", "Unified Memory Controller" },
77 [SMCA_PB] = { "param_block", "Parameter Block" },
78 [SMCA_PSP] = { "psp", "Platform Security Processor" },
79 [SMCA_SMU] = { "smu", "System Management Unit" },
74}; 80};
75EXPORT_SYMBOL_GPL(amd_hwids); 81EXPORT_SYMBOL_GPL(smca_bank_names);
76 82
77const char * const amd_core_mcablock_names[] = { 83static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
78 [SMCA_LS] = "load_store", 84 /* { bank_type, hwid_mcatype, xec_bitmap } */
79 [SMCA_IF] = "insn_fetch", 85
80 [SMCA_L2_CACHE] = "l2_cache", 86 /* ZN Core (HWID=0xB0) MCA types */
81 [SMCA_DE] = "decode_unit", 87 { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
82 [RES] = "", 88 { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
83 [SMCA_EX] = "execution_unit", 89 { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
84 [SMCA_FP] = "floating_point", 90 { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
85 [SMCA_L3_CACHE] = "l3_cache", 91 /* HWID 0xB0 MCATYPE 0x4 is Reserved */
86}; 92 { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
87EXPORT_SYMBOL_GPL(amd_core_mcablock_names); 93 { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
94 { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
95
96 /* Data Fabric MCA types */
97 { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
98 { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
99
100 /* Unified Memory Controller MCA type */
101 { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
102
103 /* Parameter Block MCA type */
104 { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
88 105
89const char * const amd_df_mcablock_names[] = { 106 /* Platform Security Processor MCA type */
90 [SMCA_CS] = "coherent_slave", 107 { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
91 [SMCA_PIE] = "pie", 108
109 /* System Management Unit MCA type */
110 { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
92}; 111};
93EXPORT_SYMBOL_GPL(amd_df_mcablock_names); 112
113struct smca_bank_info smca_banks[MAX_NR_BANKS];
114EXPORT_SYMBOL_GPL(smca_banks);
94 115
95static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); 116static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
96static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ 117static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
@@ -108,6 +129,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
108 * CPU Initialization 129 * CPU Initialization
109 */ 130 */
110 131
132static void get_smca_bank_info(unsigned int bank)
133{
134 unsigned int i, hwid_mcatype, cpu = smp_processor_id();
135 struct smca_hwid_mcatype *type;
136 u32 high, instanceId;
137 u16 hwid, mcatype;
138
139 /* Collect bank_info using CPU 0 for now. */
140 if (cpu)
141 return;
142
143 if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
144 pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
145 return;
146 }
147
148 hwid = high & MCI_IPID_HWID;
149 mcatype = (high & MCI_IPID_MCATYPE) >> 16;
150 hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
151
152 for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
153 type = &smca_hwid_mcatypes[i];
154 if (hwid_mcatype == type->hwid_mcatype) {
155 smca_banks[bank].type = type;
156 smca_banks[bank].type_instance = instanceId;
157 break;
158 }
159 }
160}
161
111struct thresh_restart { 162struct thresh_restart {
112 struct threshold_block *b; 163 struct threshold_block *b;
113 int reset; 164 int reset;
@@ -425,6 +476,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
425 int offset = -1; 476 int offset = -1;
426 477
427 for (bank = 0; bank < mca_cfg.banks; ++bank) { 478 for (bank = 0; bank < mca_cfg.banks; ++bank) {
479 if (mce_flags.smca)
480 get_smca_bank_info(bank);
481
428 for (block = 0; block < NR_BLOCKS; ++block) { 482 for (block = 0; block < NR_BLOCKS; ++block) {
429 address = get_block_address(cpu, address, low, high, bank, block); 483 address = get_block_address(cpu, address, low, high, bank, block);
430 if (!address) 484 if (!address)
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea549a94361b..99b3bf3f4182 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -283,6 +283,27 @@ static const char * const smca_smu_mce_desc[] = {
283 "SMU RAM ECC or parity error", 283 "SMU RAM ECC or parity error",
284}; 284};
285 285
286struct smca_mce_desc {
287 const char * const *descs;
288 unsigned int num_descs;
289};
290
291static struct smca_mce_desc smca_mce_descs[] = {
292 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
293 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
294 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
295 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
296 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
297 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
298 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
299 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
300 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
301 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
302 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
303 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
304 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
305};
306
286static bool f12h_mc0_mce(u16 ec, u8 xec) 307static bool f12h_mc0_mce(u16 ec, u8 xec)
287{ 308{
288 bool ret = false; 309 bool ret = false;
@@ -827,175 +848,32 @@ static void decode_mc6_mce(struct mce *m)
827 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); 848 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
828} 849}
829 850
830static void decode_f17h_core_errors(const char *ip_name, u8 xec,
831 unsigned int mca_type)
832{
833 const char * const *error_desc_array;
834 size_t len;
835
836 pr_emerg(HW_ERR "%s Error: ", ip_name);
837
838 switch (mca_type) {
839 case SMCA_LS:
840 error_desc_array = smca_ls_mce_desc;
841 len = ARRAY_SIZE(smca_ls_mce_desc) - 1;
842
843 if (xec == 0x4) {
844 pr_cont("Unrecognized LS MCA error code.\n");
845 return;
846 }
847 break;
848
849 case SMCA_IF:
850 error_desc_array = smca_if_mce_desc;
851 len = ARRAY_SIZE(smca_if_mce_desc) - 1;
852 break;
853
854 case SMCA_L2_CACHE:
855 error_desc_array = smca_l2_mce_desc;
856 len = ARRAY_SIZE(smca_l2_mce_desc) - 1;
857 break;
858
859 case SMCA_DE:
860 error_desc_array = smca_de_mce_desc;
861 len = ARRAY_SIZE(smca_de_mce_desc) - 1;
862 break;
863
864 case SMCA_EX:
865 error_desc_array = smca_ex_mce_desc;
866 len = ARRAY_SIZE(smca_ex_mce_desc) - 1;
867 break;
868
869 case SMCA_FP:
870 error_desc_array = smca_fp_mce_desc;
871 len = ARRAY_SIZE(smca_fp_mce_desc) - 1;
872 break;
873
874 case SMCA_L3_CACHE:
875 error_desc_array = smca_l3_mce_desc;
876 len = ARRAY_SIZE(smca_l3_mce_desc) - 1;
877 break;
878
879 default:
880 pr_cont("Corrupted MCA core error info.\n");
881 return;
882 }
883
884 if (xec > len) {
885 pr_cont("Unrecognized %s MCA bank error code.\n",
886 amd_core_mcablock_names[mca_type]);
887 return;
888 }
889
890 pr_cont("%s.\n", error_desc_array[xec]);
891}
892
893static void decode_df_errors(u8 xec, unsigned int mca_type)
894{
895 const char * const *error_desc_array;
896 size_t len;
897
898 pr_emerg(HW_ERR "Data Fabric Error: ");
899
900 switch (mca_type) {
901 case SMCA_CS:
902 error_desc_array = smca_cs_mce_desc;
903 len = ARRAY_SIZE(smca_cs_mce_desc) - 1;
904 break;
905
906 case SMCA_PIE:
907 error_desc_array = smca_pie_mce_desc;
908 len = ARRAY_SIZE(smca_pie_mce_desc) - 1;
909 break;
910
911 default:
912 pr_cont("Corrupted MCA Data Fabric info.\n");
913 return;
914 }
915
916 if (xec > len) {
917 pr_cont("Unrecognized %s MCA bank error code.\n",
918 amd_df_mcablock_names[mca_type]);
919 return;
920 }
921
922 pr_cont("%s.\n", error_desc_array[xec]);
923}
924
925/* Decode errors according to Scalable MCA specification */ 851/* Decode errors according to Scalable MCA specification */
926static void decode_smca_errors(struct mce *m) 852static void decode_smca_errors(struct mce *m)
927{ 853{
928 u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank); 854 struct smca_hwid_mcatype *type;
929 unsigned int hwid, mca_type, i; 855 unsigned int bank_type;
930 u8 xec = XEC(m->status, xec_mask);
931 const char * const *error_desc_array;
932 const char *ip_name; 856 const char *ip_name;
933 u32 low, high; 857 u8 xec = XEC(m->status, xec_mask);
934 size_t len;
935 858
936 if (rdmsr_safe(addr, &low, &high)) { 859 if (m->bank >= ARRAY_SIZE(smca_banks))
937 pr_emerg(HW_ERR "Invalid IP block specified.\n");
938 return; 860 return;
939 }
940
941 hwid = high & MCI_IPID_HWID;
942 mca_type = (high & MCI_IPID_MCATYPE) >> 16;
943 861
944 pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low); 862 type = smca_banks[m->bank].type;
945 863 if (!type)
946 /*
947 * Based on hwid and mca_type values, decode errors from respective IPs.
948 * Note: mca_type values make sense only in the context of an hwid.
949 */
950 for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
951 if (amd_hwids[i].hwid == hwid)
952 break;
953
954 switch (i) {
955 case SMCA_F17H_CORE:
956 ip_name = (mca_type == SMCA_L3_CACHE) ?
957 "L3 Cache" : "F17h Core";
958 return decode_f17h_core_errors(ip_name, xec, mca_type);
959 break;
960
961 case SMCA_DF:
962 return decode_df_errors(xec, mca_type);
963 break;
964
965 case SMCA_UMC:
966 error_desc_array = smca_umc_mce_desc;
967 len = ARRAY_SIZE(smca_umc_mce_desc) - 1;
968 break;
969
970 case SMCA_PB:
971 error_desc_array = smca_pb_mce_desc;
972 len = ARRAY_SIZE(smca_pb_mce_desc) - 1;
973 break;
974
975 case SMCA_PSP:
976 error_desc_array = smca_psp_mce_desc;
977 len = ARRAY_SIZE(smca_psp_mce_desc) - 1;
978 break;
979
980 case SMCA_SMU:
981 error_desc_array = smca_smu_mce_desc;
982 len = ARRAY_SIZE(smca_smu_mce_desc) - 1;
983 break;
984
985 default:
986 pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
987 return; 864 return;
988 }
989 865
990 ip_name = amd_hwids[i].name; 866 bank_type = type->bank_type;
991 pr_emerg(HW_ERR "%s Error: ", ip_name); 867 ip_name = smca_bank_names[bank_type].long_name;
992 868
993 if (xec > len) { 869 pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
994 pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
995 return;
996 }
997 870
998 pr_cont("%s.\n", error_desc_array[xec]); 871 /* Only print the decode of valid error codes */
872 if (xec < smca_mce_descs[bank_type].num_descs &&
873 (type->xec_bitmap & BIT_ULL(xec))) {
874 pr_emerg(HW_ERR "%s Error: ", ip_name);
875 pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
876 }
999} 877}
1000 878
1001static inline void amd_decode_err_code(u16 ec) 879static inline void amd_decode_err_code(u16 ec)