diff options
| -rw-r--r-- | arch/x86/include/asm/mce.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 133 | ||||
| -rw-r--r-- | arch/x86/kernel/signal.c | 2 |
3 files changed, 135 insertions, 1 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 713926b62cbb..82978ad12072 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -160,6 +160,7 @@ enum mcp_flags { | |||
| 160 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 160 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
| 161 | 161 | ||
| 162 | int mce_notify_irq(void); | 162 | int mce_notify_irq(void); |
| 163 | void mce_notify_process(void); | ||
| 163 | 164 | ||
| 164 | DECLARE_PER_CPU(struct mce, injectm); | 165 | DECLARE_PER_CPU(struct mce, injectm); |
| 165 | extern struct file_operations mce_chrdev_ops; | 166 | extern struct file_operations mce_chrdev_ops; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 13e1b7ffe73a..d4e7b5947a0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
| 34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
| 35 | #include <linux/fs.h> | 35 | #include <linux/fs.h> |
| 36 | #include <linux/mm.h> | ||
| 36 | 37 | ||
| 37 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
| 38 | #include <asm/hw_irq.h> | 39 | #include <asm/hw_irq.h> |
| @@ -105,6 +106,8 @@ static inline int skip_bank_init(int i) | |||
| 105 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | 106 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); |
| 106 | } | 107 | } |
| 107 | 108 | ||
| 109 | static DEFINE_PER_CPU(struct work_struct, mce_work); | ||
| 110 | |||
| 108 | /* Do initial initialization of a struct mce */ | 111 | /* Do initial initialization of a struct mce */ |
| 109 | void mce_setup(struct mce *m) | 112 | void mce_setup(struct mce *m) |
| 110 | { | 113 | { |
| @@ -312,6 +315,61 @@ static void mce_wrmsrl(u32 msr, u64 v) | |||
| 312 | wrmsrl(msr, v); | 315 | wrmsrl(msr, v); |
| 313 | } | 316 | } |
| 314 | 317 | ||
| 318 | /* | ||
| 319 | * Simple lockless ring to communicate PFNs from the exception handler with the | ||
| 320 | * process context work function. This is vastly simplified because there's | ||
| 321 | * only a single reader and a single writer. | ||
| 322 | */ | ||
| 323 | #define MCE_RING_SIZE 16 /* we use one entry less */ | ||
| 324 | |||
| 325 | struct mce_ring { | ||
| 326 | unsigned short start; | ||
| 327 | unsigned short end; | ||
| 328 | unsigned long ring[MCE_RING_SIZE]; | ||
| 329 | }; | ||
| 330 | static DEFINE_PER_CPU(struct mce_ring, mce_ring); | ||
| 331 | |||
| 332 | /* Runs with CPU affinity in workqueue */ | ||
| 333 | static int mce_ring_empty(void) | ||
| 334 | { | ||
| 335 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
| 336 | |||
| 337 | return r->start == r->end; | ||
| 338 | } | ||
| 339 | |||
| 340 | static int mce_ring_get(unsigned long *pfn) | ||
| 341 | { | ||
| 342 | struct mce_ring *r; | ||
| 343 | int ret = 0; | ||
| 344 | |||
| 345 | *pfn = 0; | ||
| 346 | get_cpu(); | ||
| 347 | r = &__get_cpu_var(mce_ring); | ||
| 348 | if (r->start == r->end) | ||
| 349 | goto out; | ||
| 350 | *pfn = r->ring[r->start]; | ||
| 351 | r->start = (r->start + 1) % MCE_RING_SIZE; | ||
| 352 | ret = 1; | ||
| 353 | out: | ||
| 354 | put_cpu(); | ||
| 355 | return ret; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* Always runs in MCE context with preempt off */ | ||
| 359 | static int mce_ring_add(unsigned long pfn) | ||
| 360 | { | ||
| 361 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
| 362 | unsigned next; | ||
| 363 | |||
| 364 | next = (r->end + 1) % MCE_RING_SIZE; | ||
| 365 | if (next == r->start) | ||
| 366 | return -1; | ||
| 367 | r->ring[r->end] = pfn; | ||
| 368 | wmb(); | ||
| 369 | r->end = next; | ||
| 370 | return 0; | ||
| 371 | } | ||
| 372 | |||
| 315 | int mce_available(struct cpuinfo_x86 *c) | 373 | int mce_available(struct cpuinfo_x86 *c) |
| 316 | { | 374 | { |
| 317 | if (mce_disabled) | 375 | if (mce_disabled) |
| @@ -319,6 +377,15 @@ int mce_available(struct cpuinfo_x86 *c) | |||
| 319 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | 377 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); |
| 320 | } | 378 | } |
| 321 | 379 | ||
| 380 | static void mce_schedule_work(void) | ||
| 381 | { | ||
| 382 | if (!mce_ring_empty()) { | ||
| 383 | struct work_struct *work = &__get_cpu_var(mce_work); | ||
| 384 | if (!work_pending(work)) | ||
| 385 | schedule_work(work); | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 322 | /* | 389 | /* |
| 323 | * Get the address of the instruction at the time of the machine check | 390 | * Get the address of the instruction at the time of the machine check |
| 324 | * error. | 391 | * error. |
| @@ -349,6 +416,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) | |||
| 349 | exit_idle(); | 416 | exit_idle(); |
| 350 | irq_enter(); | 417 | irq_enter(); |
| 351 | mce_notify_irq(); | 418 | mce_notify_irq(); |
| 419 | mce_schedule_work(); | ||
| 352 | irq_exit(); | 420 | irq_exit(); |
| 353 | } | 421 | } |
| 354 | #endif | 422 | #endif |
| @@ -357,6 +425,13 @@ static void mce_report_event(struct pt_regs *regs) | |||
| 357 | { | 425 | { |
| 358 | if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { | 426 | if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { |
| 359 | mce_notify_irq(); | 427 | mce_notify_irq(); |
| 428 | /* | ||
| 429 | * Triggering the work queue here is just an insurance | ||
| 430 | * policy in case the syscall exit notify handler | ||
| 431 | * doesn't run soon enough or ends up running on the | ||
| 432 | * wrong CPU (can happen when audit sleeps) | ||
| 433 | */ | ||
| 434 | mce_schedule_work(); | ||
| 360 | return; | 435 | return; |
| 361 | } | 436 | } |
| 362 | 437 | ||
| @@ -731,6 +806,23 @@ reset: | |||
| 731 | return ret; | 806 | return ret; |
| 732 | } | 807 | } |
| 733 | 808 | ||
| 809 | /* | ||
| 810 | * Check if the address reported by the CPU is in a format we can parse. | ||
| 811 | * It would be possible to add code for most other cases, but all would | ||
| 812 | * be somewhat complicated (e.g. segment offset would require an instruction | ||
| 813 | * parser). So only support physical addresses upto page granuality for now. | ||
| 814 | */ | ||
| 815 | static int mce_usable_address(struct mce *m) | ||
| 816 | { | ||
| 817 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | ||
| 818 | return 0; | ||
| 819 | if ((m->misc & 0x3f) > PAGE_SHIFT) | ||
| 820 | return 0; | ||
| 821 | if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) | ||
| 822 | return 0; | ||
| 823 | return 1; | ||
| 824 | } | ||
| 825 | |||
| 734 | static void mce_clear_state(unsigned long *toclear) | 826 | static void mce_clear_state(unsigned long *toclear) |
| 735 | { | 827 | { |
| 736 | int i; | 828 | int i; |
| @@ -865,6 +957,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 865 | if (m.status & MCI_STATUS_ADDRV) | 957 | if (m.status & MCI_STATUS_ADDRV) |
| 866 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 958 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); |
| 867 | 959 | ||
| 960 | /* | ||
| 961 | * Action optional error. Queue address for later processing. | ||
| 962 | * When the ring overflows we just ignore the AO error. | ||
| 963 | * RED-PEN add some logging mechanism when | ||
| 964 | * usable_address or mce_add_ring fails. | ||
| 965 | * RED-PEN don't ignore overflow for tolerant == 0 | ||
| 966 | */ | ||
| 967 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | ||
| 968 | mce_ring_add(m.addr >> PAGE_SHIFT); | ||
| 969 | |||
| 868 | mce_get_rip(&m, regs); | 970 | mce_get_rip(&m, regs); |
| 869 | mce_log(&m); | 971 | mce_log(&m); |
| 870 | 972 | ||
| @@ -916,6 +1018,36 @@ out: | |||
| 916 | } | 1018 | } |
| 917 | EXPORT_SYMBOL_GPL(do_machine_check); | 1019 | EXPORT_SYMBOL_GPL(do_machine_check); |
| 918 | 1020 | ||
| 1021 | /* dummy to break dependency. actual code is in mm/memory-failure.c */ | ||
| 1022 | void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) | ||
| 1023 | { | ||
| 1024 | printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | /* | ||
| 1028 | * Called after mce notification in process context. This code | ||
| 1029 | * is allowed to sleep. Call the high level VM handler to process | ||
| 1030 | * any corrupted pages. | ||
| 1031 | * Assume that the work queue code only calls this one at a time | ||
| 1032 | * per CPU. | ||
| 1033 | * Note we don't disable preemption, so this code might run on the wrong | ||
| 1034 | * CPU. In this case the event is picked up by the scheduled work queue. | ||
| 1035 | * This is merely a fast path to expedite processing in some common | ||
| 1036 | * cases. | ||
| 1037 | */ | ||
| 1038 | void mce_notify_process(void) | ||
| 1039 | { | ||
| 1040 | unsigned long pfn; | ||
| 1041 | mce_notify_irq(); | ||
| 1042 | while (mce_ring_get(&pfn)) | ||
| 1043 | memory_failure(pfn, MCE_VECTOR); | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | static void mce_process_work(struct work_struct *dummy) | ||
| 1047 | { | ||
| 1048 | mce_notify_process(); | ||
| 1049 | } | ||
| 1050 | |||
| 919 | #ifdef CONFIG_X86_MCE_INTEL | 1051 | #ifdef CONFIG_X86_MCE_INTEL |
| 920 | /*** | 1052 | /*** |
| 921 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | 1053 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog |
| @@ -1204,6 +1336,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | |||
| 1204 | mce_init(); | 1336 | mce_init(); |
| 1205 | mce_cpu_features(c); | 1337 | mce_cpu_features(c); |
| 1206 | mce_init_timer(); | 1338 | mce_init_timer(); |
| 1339 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | ||
| 1207 | } | 1340 | } |
| 1208 | 1341 | ||
| 1209 | /* | 1342 | /* |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d5dc15bce005..4976888094f0 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -860,7 +860,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
| 860 | #ifdef CONFIG_X86_NEW_MCE | 860 | #ifdef CONFIG_X86_NEW_MCE |
| 861 | /* notify userspace of pending MCEs */ | 861 | /* notify userspace of pending MCEs */ |
| 862 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 862 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
| 863 | mce_notify_irq(); | 863 | mce_notify_process(); |
| 864 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | 864 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
| 865 | 865 | ||
| 866 | /* deal with pending signal delivery */ | 866 | /* deal with pending signal delivery */ |
