diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 14:07:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 14:07:04 -0400 |
commit | b20c99eb668f10b855a9fd87e0a2f5db3fb3637d (patch) | |
tree | 87cb380f2006a1c5ee2c612fead142d261c64c4e /arch/x86 | |
parent | bb8c4701704d81ef98657dc51adb99aa5a0c5ac9 (diff) | |
parent | ead6fa95b7e9d38b4526503403ba1c029b03dd72 (diff) |
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar:
"[ The reason for drivers/ updates is that Boris asked for the
drivers/edac/ changes to go via x86/ras in this cycle ]
Main changes:
- AMD CPUs:
. Add ECC event decoding support for new F15h models
. Various erratum fixes
. Fix single-channel on dual-channel-controllers bug.
- Intel CPUs:
. UC uncorrectable memory error parsing fix
. Add support for CMC (Corrected Machine Check) 'FF' (Firmware
First) flag in the APEI HEST
- Various cleanups and fixes"
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
amd64_edac: Fix incorrect wraparounds
amd64_edac: Correct erratum 505 range
cpc925_edac: Use proper array termination
x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured
amd64_edac: Get rid of boot_cpu_data accesses
amd64_edac: Add ECC decoding support for newer F15h models
x86, amd_nb: Clarify F15h, model 30h GART and L3 support
pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models.
x38_edac: Make a local function static
i3200_edac: Make a local function static
x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors
APEI/ERST: Fix error message formatting
amd64_edac: Fix single-channel setups
EDAC: Replace strict_strtol() with kstrtol()
mce: acpi/apei: Soft-offline a page on firmware GHES notification
mce: acpi/apei: Add a boot option to disable ff mode for corrected errors
mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/acpi.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 16 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/amd_nb.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 42 |
7 files changed, 95 insertions, 14 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 2dfac58f3b11..b1977bad5435 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -86,6 +86,7 @@ extern int acpi_pci_disabled; | |||
86 | extern int acpi_skip_timer_override; | 86 | extern int acpi_skip_timer_override; |
87 | extern int acpi_use_timer_override; | 87 | extern int acpi_use_timer_override; |
88 | extern int acpi_fix_pin2_polarity; | 88 | extern int acpi_fix_pin2_polarity; |
89 | extern int acpi_disable_cmcff; | ||
89 | 90 | ||
90 | extern u8 acpi_sci_flags; | 91 | extern u8 acpi_sci_flags; |
91 | extern int acpi_sci_override_gsi; | 92 | extern int acpi_sci_override_gsi; |
@@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) | |||
168 | 169 | ||
169 | #define acpi_lapic 0 | 170 | #define acpi_lapic 0 |
170 | #define acpi_ioapic 0 | 171 | #define acpi_ioapic 0 |
172 | #define acpi_disable_cmcff 0 | ||
171 | static inline void acpi_noirq_set(void) { } | 173 | static inline void acpi_noirq_set(void) { } |
172 | static inline void acpi_disable_pci(void) { } | 174 | static inline void acpi_disable_pci(void) { } |
173 | static inline void disable_acpi(void) { } | 175 | static inline void disable_acpi(void) { } |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 29e3093bbd21..cbe6b9e404ce 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -32,11 +32,20 @@ | |||
32 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ | 32 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ |
33 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | 33 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ |
34 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | 34 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ |
35 | #define MCACOD 0xffff /* MCA Error Code */ | 35 | |
36 | /* | ||
37 | * Note that the full MCACOD field of IA32_MCi_STATUS MSR is | ||
38 | * bits 15:0. But bit 12 is the 'F' bit, defined for corrected | ||
39 | * errors to indicate that errors are being filtered by hardware. | ||
40 | * We should mask out bit 12 when looking for specific signatures | ||
41 | * of uncorrected errors - so the F bit is deliberately skipped | ||
42 | * in this #define. | ||
43 | */ | ||
44 | #define MCACOD 0xefff /* MCA Error Code */ | ||
36 | 45 | ||
37 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ | 46 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ |
38 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ | 47 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ |
39 | #define MCACOD_SCRUBMSK 0xfff0 | 48 | #define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */ |
40 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ | 49 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ |
41 | #define MCACOD_DATA 0x0134 /* Data Load */ | 50 | #define MCACOD_DATA 0x0134 /* Data Load */ |
42 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ | 51 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ |
@@ -188,6 +197,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp, | |||
188 | const char __user *ubuf, | 197 | const char __user *ubuf, |
189 | size_t usize, loff_t *off)); | 198 | size_t usize, loff_t *off)); |
190 | 199 | ||
200 | /* Disable CMCI/polling for MCA bank claimed by firmware */ | ||
201 | extern void mce_disable_bank(int bank); | ||
202 | |||
191 | /* | 203 | /* |
192 | * Exception handler | 204 | * Exception handler |
193 | */ | 205 | */ |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8e594a489d75..40c76604199f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled); | |||
67 | int acpi_lapic; | 67 | int acpi_lapic; |
68 | int acpi_ioapic; | 68 | int acpi_ioapic; |
69 | int acpi_strict; | 69 | int acpi_strict; |
70 | int acpi_disable_cmcff; | ||
70 | 71 | ||
71 | u8 acpi_sci_flags __initdata; | 72 | u8 acpi_sci_flags __initdata; |
72 | int acpi_sci_override_gsi __initdata; | 73 | int acpi_sci_override_gsi __initdata; |
@@ -1622,6 +1623,10 @@ static int __init parse_acpi(char *arg) | |||
1622 | /* "acpi=copy_dsdt" copys DSDT */ | 1623 | /* "acpi=copy_dsdt" copys DSDT */ |
1623 | else if (strcmp(arg, "copy_dsdt") == 0) { | 1624 | else if (strcmp(arg, "copy_dsdt") == 0) { |
1624 | acpi_gbl_copy_dsdt_locally = 1; | 1625 | acpi_gbl_copy_dsdt_locally = 1; |
1626 | } | ||
1627 | /* "acpi=nocmcff" disables FF mode for corrected errors */ | ||
1628 | else if (strcmp(arg, "nocmcff") == 0) { | ||
1629 | acpi_disable_cmcff = 1; | ||
1625 | } else { | 1630 | } else { |
1626 | /* Core will printk when we return error. */ | 1631 | /* Core will printk when we return error. */ |
1627 | return -EINVAL; | 1632 | return -EINVAL; |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 3048ded1b598..59554dca96ec 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | 22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, |
23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, | ||
23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, | 24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, |
24 | {} | 25 | {} |
25 | }; | 26 | }; |
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids); | |||
27 | 28 | ||
28 | static const struct pci_device_id amd_nb_link_ids[] = { | 29 | static const struct pci_device_id amd_nb_link_ids[] = { |
29 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, | 30 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
31 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, | ||
30 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, | 32 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, |
31 | {} | 33 | {} |
32 | }; | 34 | }; |
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void) | |||
81 | next_northbridge(misc, amd_nb_misc_ids); | 83 | next_northbridge(misc, amd_nb_misc_ids); |
82 | node_to_amd_nb(i)->link = link = | 84 | node_to_amd_nb(i)->link = link = |
83 | next_northbridge(link, amd_nb_link_ids); | 85 | next_northbridge(link, amd_nb_link_ids); |
84 | } | 86 | } |
85 | 87 | ||
88 | /* GART present only on Fam15h upto model 0fh */ | ||
86 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || | 89 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || |
87 | boot_cpu_data.x86 == 0x15) | 90 | (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) |
88 | amd_northbridges.flags |= AMD_NB_GART; | 91 | amd_northbridges.flags |= AMD_NB_GART; |
89 | 92 | ||
90 | /* | 93 | /* |
94 | * Check for L3 cache presence. | ||
95 | */ | ||
96 | if (!cpuid_edx(0x80000006)) | ||
97 | return 0; | ||
98 | |||
99 | /* | ||
91 | * Some CPU families support L3 Cache Index Disable. There are some | 100 | * Some CPU families support L3 Cache Index Disable. There are some |
92 | * limitations because of E382 and E388 on family 0x10. | 101 | * limitations because of E382 and E388 on family 0x10. |
93 | */ | 102 | */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 5b7d4fa5d3b7..09edd0b65fef 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg); | |||
25 | struct dentry *mce_get_debugfs_dir(void); | 25 | struct dentry *mce_get_debugfs_dir(void); |
26 | 26 | ||
27 | extern struct mce_bank *mce_banks; | 27 | extern struct mce_bank *mce_banks; |
28 | extern mce_banks_t mce_banks_ce_disabled; | ||
28 | 29 | ||
29 | #ifdef CONFIG_X86_MCE_INTEL | 30 | #ifdef CONFIG_X86_MCE_INTEL |
30 | unsigned long mce_intel_adjust_timer(unsigned long interval); | 31 | unsigned long mce_intel_adjust_timer(unsigned long interval); |
31 | void mce_intel_cmci_poll(void); | 32 | void mce_intel_cmci_poll(void); |
32 | void mce_intel_hcpu_update(unsigned long cpu); | 33 | void mce_intel_hcpu_update(unsigned long cpu); |
34 | void cmci_disable_bank(int bank); | ||
33 | #else | 35 | #else |
34 | # define mce_intel_adjust_timer mce_adjust_timer_default | 36 | # define mce_intel_adjust_timer mce_adjust_timer_default |
35 | static inline void mce_intel_cmci_poll(void) { } | 37 | static inline void mce_intel_cmci_poll(void) { } |
36 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | 38 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } |
39 | static inline void cmci_disable_bank(int bank) { } | ||
37 | #endif | 40 | #endif |
38 | 41 | ||
39 | void mce_timer_kick(unsigned long interval); | 42 | void mce_timer_kick(unsigned long interval); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 87a65c939bcd..b3218cdee95f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
97 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 97 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
98 | }; | 98 | }; |
99 | 99 | ||
100 | /* | ||
101 | * MCA banks controlled through firmware first for corrected errors. | ||
102 | * This is a global list of banks for which we won't enable CMCI and we | ||
103 | * won't poll. Firmware controls these banks and is responsible for | ||
104 | * reporting corrected errors through GHES. Uncorrected/recoverable | ||
105 | * errors are still notified through a machine check. | ||
106 | */ | ||
107 | mce_banks_t mce_banks_ce_disabled; | ||
108 | |||
100 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 109 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
101 | 110 | ||
102 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | 111 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); |
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = { | |||
1935 | &mce_chrdev_ops, | 1944 | &mce_chrdev_ops, |
1936 | }; | 1945 | }; |
1937 | 1946 | ||
1947 | static void __mce_disable_bank(void *arg) | ||
1948 | { | ||
1949 | int bank = *((int *)arg); | ||
1950 | __clear_bit(bank, __get_cpu_var(mce_poll_banks)); | ||
1951 | cmci_disable_bank(bank); | ||
1952 | } | ||
1953 | |||
1954 | void mce_disable_bank(int bank) | ||
1955 | { | ||
1956 | if (bank >= mca_cfg.banks) { | ||
1957 | pr_warn(FW_BUG | ||
1958 | "Ignoring request to disable invalid MCA bank %d.\n", | ||
1959 | bank); | ||
1960 | return; | ||
1961 | } | ||
1962 | set_bit(bank, mce_banks_ce_disabled); | ||
1963 | on_each_cpu(__mce_disable_bank, &bank, 1); | ||
1964 | } | ||
1965 | |||
1938 | /* | 1966 | /* |
1939 | * mce=off Disables machine check | 1967 | * mce=off Disables machine check |
1940 | * mce=no_cmci Disables CMCI | 1968 | * mce=no_cmci Disables CMCI |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d56405309dc1..4cfe0458ca66 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -203,6 +203,10 @@ static void cmci_discover(int banks) | |||
203 | if (test_bit(i, owned)) | 203 | if (test_bit(i, owned)) |
204 | continue; | 204 | continue; |
205 | 205 | ||
206 | /* Skip banks in firmware first mode */ | ||
207 | if (test_bit(i, mce_banks_ce_disabled)) | ||
208 | continue; | ||
209 | |||
206 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 210 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
207 | 211 | ||
208 | /* Already owned by someone else? */ | 212 | /* Already owned by someone else? */ |
@@ -271,6 +275,19 @@ void cmci_recheck(void) | |||
271 | local_irq_restore(flags); | 275 | local_irq_restore(flags); |
272 | } | 276 | } |
273 | 277 | ||
278 | /* Caller must hold the lock on cmci_discover_lock */ | ||
279 | static void __cmci_disable_bank(int bank) | ||
280 | { | ||
281 | u64 val; | ||
282 | |||
283 | if (!test_bit(bank, __get_cpu_var(mce_banks_owned))) | ||
284 | return; | ||
285 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
286 | val &= ~MCI_CTL2_CMCI_EN; | ||
287 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
288 | __clear_bit(bank, __get_cpu_var(mce_banks_owned)); | ||
289 | } | ||
290 | |||
274 | /* | 291 | /* |
275 | * Disable CMCI on this CPU for all banks it owns when it goes down. | 292 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
276 | * This allows other CPUs to claim the banks on rediscovery. | 293 | * This allows other CPUs to claim the banks on rediscovery. |
@@ -280,20 +297,12 @@ void cmci_clear(void) | |||
280 | unsigned long flags; | 297 | unsigned long flags; |
281 | int i; | 298 | int i; |
282 | int banks; | 299 | int banks; |
283 | u64 val; | ||
284 | 300 | ||
285 | if (!cmci_supported(&banks)) | 301 | if (!cmci_supported(&banks)) |
286 | return; | 302 | return; |
287 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 303 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
288 | for (i = 0; i < banks; i++) { | 304 | for (i = 0; i < banks; i++) |
289 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | 305 | __cmci_disable_bank(i); |
290 | continue; | ||
291 | /* Disable CMCI */ | ||
292 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | ||
293 | val &= ~MCI_CTL2_CMCI_EN; | ||
294 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | ||
295 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
296 | } | ||
297 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 306 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
298 | } | 307 | } |
299 | 308 | ||
@@ -327,6 +336,19 @@ void cmci_reenable(void) | |||
327 | cmci_discover(banks); | 336 | cmci_discover(banks); |
328 | } | 337 | } |
329 | 338 | ||
339 | void cmci_disable_bank(int bank) | ||
340 | { | ||
341 | int banks; | ||
342 | unsigned long flags; | ||
343 | |||
344 | if (!cmci_supported(&banks)) | ||
345 | return; | ||
346 | |||
347 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | ||
348 | __cmci_disable_bank(bank); | ||
349 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
350 | } | ||
351 | |||
330 | static void intel_init_cmci(void) | 352 | static void intel_init_cmci(void) |
331 | { | 353 | { |
332 | int banks; | 354 | int banks; |