aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:07:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:07:04 -0400
commitb20c99eb668f10b855a9fd87e0a2f5db3fb3637d (patch)
tree87cb380f2006a1c5ee2c612fead142d261c64c4e /arch/x86
parentbb8c4701704d81ef98657dc51adb99aa5a0c5ac9 (diff)
parentead6fa95b7e9d38b4526503403ba1c029b03dd72 (diff)
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar: "[ The reason for drivers/ updates is that Boris asked for the drivers/edac/ changes to go via x86/ras in this cycle ] Main changes: - AMD CPUs: . Add ECC event decoding support for new F15h models . Various erratum fixes . Fix single-channel on dual-channel-controllers bug. - Intel CPUs: . UC uncorrectable memory error parsing fix . Add support for CMC (Corrected Machine Check) 'FF' (Firmware First) flag in the APEI HEST - Various cleanups and fixes" * 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: amd64_edac: Fix incorrect wraparounds amd64_edac: Correct erratum 505 range cpc925_edac: Use proper array termination x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured amd64_edac: Get rid of boot_cpu_data accesses amd64_edac: Add ECC decoding support for newer F15h models x86, amd_nb: Clarify F15h, model 30h GART and L3 support pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models. x38_edac: Make a local function static i3200_edac: Make a local function static x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors APEI/ERST: Fix error message formatting amd64_edac: Fix single-channel setups EDAC: Replace strict_strtol() with kstrtol() mce: acpi/apei: Soft-offline a page on firmware GHES notification mce: acpi/apei: Add a boot option to disable ff mode for corrected errors mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/mce.h16
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c28
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c42
7 files changed, 95 insertions, 14 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2dfac58f3b11..b1977bad5435 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -86,6 +86,7 @@ extern int acpi_pci_disabled;
86extern int acpi_skip_timer_override; 86extern int acpi_skip_timer_override;
87extern int acpi_use_timer_override; 87extern int acpi_use_timer_override;
88extern int acpi_fix_pin2_polarity; 88extern int acpi_fix_pin2_polarity;
89extern int acpi_disable_cmcff;
89 90
90extern u8 acpi_sci_flags; 91extern u8 acpi_sci_flags;
91extern int acpi_sci_override_gsi; 92extern int acpi_sci_override_gsi;
@@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
168 169
169#define acpi_lapic 0 170#define acpi_lapic 0
170#define acpi_ioapic 0 171#define acpi_ioapic 0
172#define acpi_disable_cmcff 0
171static inline void acpi_noirq_set(void) { } 173static inline void acpi_noirq_set(void) { }
172static inline void acpi_disable_pci(void) { } 174static inline void acpi_disable_pci(void) { }
173static inline void disable_acpi(void) { } 175static inline void disable_acpi(void) { }
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 29e3093bbd21..cbe6b9e404ce 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -32,11 +32,20 @@
32#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ 32#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
33#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ 33#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
34#define MCI_STATUS_AR (1ULL<<55) /* Action required */ 34#define MCI_STATUS_AR (1ULL<<55) /* Action required */
35#define MCACOD 0xffff /* MCA Error Code */ 35
36/*
37 * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
38 * bits 15:0. But bit 12 is the 'F' bit, defined for corrected
39 * errors to indicate that errors are being filtered by hardware.
40 * We should mask out bit 12 when looking for specific signatures
41 * of uncorrected errors - so the F bit is deliberately skipped
42 * in this #define.
43 */
44#define MCACOD 0xefff /* MCA Error Code */
36 45
37/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ 46/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
38#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ 47#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
39#define MCACOD_SCRUBMSK 0xfff0 48#define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */
40#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ 49#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
41#define MCACOD_DATA 0x0134 /* Data Load */ 50#define MCACOD_DATA 0x0134 /* Data Load */
42#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ 51#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
@@ -188,6 +197,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
188 const char __user *ubuf, 197 const char __user *ubuf,
189 size_t usize, loff_t *off)); 198 size_t usize, loff_t *off));
190 199
200/* Disable CMCI/polling for MCA bank claimed by firmware */
201extern void mce_disable_bank(int bank);
202
191/* 203/*
192 * Exception handler 204 * Exception handler
193 */ 205 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8e594a489d75..40c76604199f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
67int acpi_lapic; 67int acpi_lapic;
68int acpi_ioapic; 68int acpi_ioapic;
69int acpi_strict; 69int acpi_strict;
70int acpi_disable_cmcff;
70 71
71u8 acpi_sci_flags __initdata; 72u8 acpi_sci_flags __initdata;
72int acpi_sci_override_gsi __initdata; 73int acpi_sci_override_gsi __initdata;
@@ -1622,6 +1623,10 @@ static int __init parse_acpi(char *arg)
1622 /* "acpi=copy_dsdt" copys DSDT */ 1623 /* "acpi=copy_dsdt" copys DSDT */
1623 else if (strcmp(arg, "copy_dsdt") == 0) { 1624 else if (strcmp(arg, "copy_dsdt") == 0) {
1624 acpi_gbl_copy_dsdt_locally = 1; 1625 acpi_gbl_copy_dsdt_locally = 1;
1626 }
1627 /* "acpi=nocmcff" disables FF mode for corrected errors */
1628 else if (strcmp(arg, "nocmcff") == 0) {
1629 acpi_disable_cmcff = 1;
1625 } else { 1630 } else {
1626 /* Core will printk when we return error. */ 1631 /* Core will printk when we return error. */
1627 return -EINVAL; 1632 return -EINVAL;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3048ded1b598..59554dca96ec 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, 21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
22 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, 22 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
23 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
23 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, 24 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
24 {} 25 {}
25}; 26};
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
27 28
28static const struct pci_device_id amd_nb_link_ids[] = { 29static const struct pci_device_id amd_nb_link_ids[] = {
29 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, 30 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
31 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
30 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, 32 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
31 {} 33 {}
32}; 34};
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void)
81 next_northbridge(misc, amd_nb_misc_ids); 83 next_northbridge(misc, amd_nb_misc_ids);
82 node_to_amd_nb(i)->link = link = 84 node_to_amd_nb(i)->link = link =
83 next_northbridge(link, amd_nb_link_ids); 85 next_northbridge(link, amd_nb_link_ids);
84 } 86 }
85 87
88 /* GART present only on Fam15h upto model 0fh */
86 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 89 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
87 boot_cpu_data.x86 == 0x15) 90 (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
88 amd_northbridges.flags |= AMD_NB_GART; 91 amd_northbridges.flags |= AMD_NB_GART;
89 92
90 /* 93 /*
94 * Check for L3 cache presence.
95 */
96 if (!cpuid_edx(0x80000006))
97 return 0;
98
99 /*
91 * Some CPU families support L3 Cache Index Disable. There are some 100 * Some CPU families support L3 Cache Index Disable. There are some
92 * limitations because of E382 and E388 on family 0x10. 101 * limitations because of E382 and E388 on family 0x10.
93 */ 102 */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 5b7d4fa5d3b7..09edd0b65fef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
25struct dentry *mce_get_debugfs_dir(void); 25struct dentry *mce_get_debugfs_dir(void);
26 26
27extern struct mce_bank *mce_banks; 27extern struct mce_bank *mce_banks;
28extern mce_banks_t mce_banks_ce_disabled;
28 29
29#ifdef CONFIG_X86_MCE_INTEL 30#ifdef CONFIG_X86_MCE_INTEL
30unsigned long mce_intel_adjust_timer(unsigned long interval); 31unsigned long mce_intel_adjust_timer(unsigned long interval);
31void mce_intel_cmci_poll(void); 32void mce_intel_cmci_poll(void);
32void mce_intel_hcpu_update(unsigned long cpu); 33void mce_intel_hcpu_update(unsigned long cpu);
34void cmci_disable_bank(int bank);
33#else 35#else
34# define mce_intel_adjust_timer mce_adjust_timer_default 36# define mce_intel_adjust_timer mce_adjust_timer_default
35static inline void mce_intel_cmci_poll(void) { } 37static inline void mce_intel_cmci_poll(void) { }
36static inline void mce_intel_hcpu_update(unsigned long cpu) { } 38static inline void mce_intel_hcpu_update(unsigned long cpu) { }
39static inline void cmci_disable_bank(int bank) { }
37#endif 40#endif
38 41
39void mce_timer_kick(unsigned long interval); 42void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 87a65c939bcd..b3218cdee95f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
98}; 98};
99 99
100/*
101 * MCA banks controlled through firmware first for corrected errors.
102 * This is a global list of banks for which we won't enable CMCI and we
103 * won't poll. Firmware controls these banks and is responsible for
104 * reporting corrected errors through GHES. Uncorrected/recoverable
105 * errors are still notified through a machine check.
106 */
107mce_banks_t mce_banks_ce_disabled;
108
100static DEFINE_PER_CPU(struct work_struct, mce_work); 109static DEFINE_PER_CPU(struct work_struct, mce_work);
101 110
102static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); 111static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
1935 &mce_chrdev_ops, 1944 &mce_chrdev_ops,
1936}; 1945};
1937 1946
1947static void __mce_disable_bank(void *arg)
1948{
1949 int bank = *((int *)arg);
1950 __clear_bit(bank, __get_cpu_var(mce_poll_banks));
1951 cmci_disable_bank(bank);
1952}
1953
1954void mce_disable_bank(int bank)
1955{
1956 if (bank >= mca_cfg.banks) {
1957 pr_warn(FW_BUG
1958 "Ignoring request to disable invalid MCA bank %d.\n",
1959 bank);
1960 return;
1961 }
1962 set_bit(bank, mce_banks_ce_disabled);
1963 on_each_cpu(__mce_disable_bank, &bank, 1);
1964}
1965
1938/* 1966/*
1939 * mce=off Disables machine check 1967 * mce=off Disables machine check
1940 * mce=no_cmci Disables CMCI 1968 * mce=no_cmci Disables CMCI
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index d56405309dc1..4cfe0458ca66 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -203,6 +203,10 @@ static void cmci_discover(int banks)
203 if (test_bit(i, owned)) 203 if (test_bit(i, owned))
204 continue; 204 continue;
205 205
206 /* Skip banks in firmware first mode */
207 if (test_bit(i, mce_banks_ce_disabled))
208 continue;
209
206 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 210 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
207 211
208 /* Already owned by someone else? */ 212 /* Already owned by someone else? */
@@ -271,6 +275,19 @@ void cmci_recheck(void)
271 local_irq_restore(flags); 275 local_irq_restore(flags);
272} 276}
273 277
278/* Caller must hold the lock on cmci_discover_lock */
279static void __cmci_disable_bank(int bank)
280{
281 u64 val;
282
283 if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
284 return;
285 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
286 val &= ~MCI_CTL2_CMCI_EN;
287 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
288 __clear_bit(bank, __get_cpu_var(mce_banks_owned));
289}
290
274/* 291/*
275 * Disable CMCI on this CPU for all banks it owns when it goes down. 292 * Disable CMCI on this CPU for all banks it owns when it goes down.
276 * This allows other CPUs to claim the banks on rediscovery. 293 * This allows other CPUs to claim the banks on rediscovery.
@@ -280,20 +297,12 @@ void cmci_clear(void)
280 unsigned long flags; 297 unsigned long flags;
281 int i; 298 int i;
282 int banks; 299 int banks;
283 u64 val;
284 300
285 if (!cmci_supported(&banks)) 301 if (!cmci_supported(&banks))
286 return; 302 return;
287 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 303 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
288 for (i = 0; i < banks; i++) { 304 for (i = 0; i < banks; i++)
289 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 305 __cmci_disable_bank(i);
290 continue;
291 /* Disable CMCI */
292 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
293 val &= ~MCI_CTL2_CMCI_EN;
294 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
295 __clear_bit(i, __get_cpu_var(mce_banks_owned));
296 }
297 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 306 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
298} 307}
299 308
@@ -327,6 +336,19 @@ void cmci_reenable(void)
327 cmci_discover(banks); 336 cmci_discover(banks);
328} 337}
329 338
339void cmci_disable_bank(int bank)
340{
341 int banks;
342 unsigned long flags;
343
344 if (!cmci_supported(&banks))
345 return;
346
347 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
348 __cmci_disable_bank(bank);
349 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
350}
351
330static void intel_init_cmci(void) 352static void intel_init_cmci(void)
331{ 353{
332 int banks; 354 int banks;