diff options
| -rw-r--r-- | arch/x86/include/asm/mce.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/thread_info.h | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 109 | ||||
| -rw-r--r-- | arch/x86/kernel/signal.c | 6 |
4 files changed, 26 insertions, 94 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 51b26e895933..9b3de99dc004 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -190,7 +190,6 @@ enum mcp_flags { | |||
| 190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
| 191 | 191 | ||
| 192 | int mce_notify_irq(void); | 192 | int mce_notify_irq(void); |
| 193 | void mce_notify_process(void); | ||
| 194 | 193 | ||
| 195 | DECLARE_PER_CPU(struct mce, injectm); | 194 | DECLARE_PER_CPU(struct mce, injectm); |
| 196 | 195 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b13b0fbda8e..e82e95abc92b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -75,7 +75,6 @@ struct thread_info { | |||
| 75 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | 75 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
| 76 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 76 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
| 77 | #define TIF_SECCOMP 8 /* secure computing */ | 77 | #define TIF_SECCOMP 8 /* secure computing */ |
| 78 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | ||
| 79 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | 78 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ |
| 80 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | 79 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ |
| 81 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 80 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
| @@ -100,7 +99,6 @@ struct thread_info { | |||
| 100 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) | 99 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
| 101 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 100 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
| 102 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 101 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
| 103 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | ||
| 104 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | 102 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) |
| 105 | #define _TIF_UPROBE (1 << TIF_UPROBE) | 103 | #define _TIF_UPROBE (1 << TIF_UPROBE) |
| 106 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 104 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
| @@ -140,7 +138,7 @@ struct thread_info { | |||
| 140 | 138 | ||
| 141 | /* Only used for 64 bit */ | 139 | /* Only used for 64 bit */ |
| 142 | #define _TIF_DO_NOTIFY_MASK \ | 140 | #define _TIF_DO_NOTIFY_MASK \ |
| 143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ | 141 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ |
| 144 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) | 142 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) |
| 145 | 143 | ||
| 146 | /* flags to check in __switch_to() */ | 144 | /* flags to check in __switch_to() */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 800d423f1e92..d23179900755 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -1004,51 +1004,6 @@ static void mce_clear_state(unsigned long *toclear) | |||
| 1004 | } | 1004 | } |
| 1005 | 1005 | ||
| 1006 | /* | 1006 | /* |
| 1007 | * Need to save faulting physical address associated with a process | ||
| 1008 | * in the machine check handler some place where we can grab it back | ||
| 1009 | * later in mce_notify_process() | ||
| 1010 | */ | ||
| 1011 | #define MCE_INFO_MAX 16 | ||
| 1012 | |||
| 1013 | struct mce_info { | ||
| 1014 | atomic_t inuse; | ||
| 1015 | struct task_struct *t; | ||
| 1016 | __u64 paddr; | ||
| 1017 | int restartable; | ||
| 1018 | } mce_info[MCE_INFO_MAX]; | ||
| 1019 | |||
| 1020 | static void mce_save_info(__u64 addr, int c) | ||
| 1021 | { | ||
| 1022 | struct mce_info *mi; | ||
| 1023 | |||
| 1024 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { | ||
| 1025 | if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | ||
| 1026 | mi->t = current; | ||
| 1027 | mi->paddr = addr; | ||
| 1028 | mi->restartable = c; | ||
| 1029 | return; | ||
| 1030 | } | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | mce_panic("Too many concurrent recoverable errors", NULL, NULL); | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | static struct mce_info *mce_find_info(void) | ||
| 1037 | { | ||
| 1038 | struct mce_info *mi; | ||
| 1039 | |||
| 1040 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) | ||
| 1041 | if (atomic_read(&mi->inuse) && mi->t == current) | ||
| 1042 | return mi; | ||
| 1043 | return NULL; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | static void mce_clear_info(struct mce_info *mi) | ||
| 1047 | { | ||
| 1048 | atomic_set(&mi->inuse, 0); | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | /* | ||
| 1052 | * The actual machine check handler. This only handles real | 1007 | * The actual machine check handler. This only handles real |
| 1053 | * exceptions when something got corrupted coming in through int 18. | 1008 | * exceptions when something got corrupted coming in through int 18. |
| 1054 | * | 1009 | * |
| @@ -1086,6 +1041,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 1086 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1041 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
| 1087 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | 1042 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); |
| 1088 | char *msg = "Unknown"; | 1043 | char *msg = "Unknown"; |
| 1044 | u64 recover_paddr = ~0ull; | ||
| 1045 | int flags = MF_ACTION_REQUIRED; | ||
| 1089 | 1046 | ||
| 1090 | prev_state = ist_enter(regs); | 1047 | prev_state = ist_enter(regs); |
| 1091 | 1048 | ||
| @@ -1207,9 +1164,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 1207 | if (no_way_out) | 1164 | if (no_way_out) |
| 1208 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1165 | mce_panic("Fatal machine check on current CPU", &m, msg); |
| 1209 | if (worst == MCE_AR_SEVERITY) { | 1166 | if (worst == MCE_AR_SEVERITY) { |
| 1210 | /* schedule action before return to userland */ | 1167 | recover_paddr = m.addr; |
| 1211 | mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); | 1168 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
| 1212 | set_thread_flag(TIF_MCE_NOTIFY); | 1169 | flags |= MF_MUST_KILL; |
| 1213 | } else if (kill_it) { | 1170 | } else if (kill_it) { |
| 1214 | force_sig(SIGBUS, current); | 1171 | force_sig(SIGBUS, current); |
| 1215 | } | 1172 | } |
| @@ -1220,6 +1177,26 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 1220 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | 1177 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); |
| 1221 | out: | 1178 | out: |
| 1222 | sync_core(); | 1179 | sync_core(); |
| 1180 | |||
| 1181 | if (recover_paddr == ~0ull) | ||
| 1182 | goto done; | ||
| 1183 | |||
| 1184 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
| 1185 | recover_paddr); | ||
| 1186 | /* | ||
| 1187 | * We must call memory_failure() here even if the current process is | ||
| 1188 | * doomed. We still need to mark the page as poisoned and alert any | ||
| 1189 | * other users of the page. | ||
| 1190 | */ | ||
| 1191 | ist_begin_non_atomic(regs); | ||
| 1192 | local_irq_enable(); | ||
| 1193 | if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { | ||
| 1194 | pr_err("Memory error not recovered"); | ||
| 1195 | force_sig(SIGBUS, current); | ||
| 1196 | } | ||
| 1197 | local_irq_disable(); | ||
| 1198 | ist_end_non_atomic(); | ||
| 1199 | done: | ||
| 1223 | ist_exit(regs, prev_state); | 1200 | ist_exit(regs, prev_state); |
| 1224 | } | 1201 | } |
| 1225 | EXPORT_SYMBOL_GPL(do_machine_check); | 1202 | EXPORT_SYMBOL_GPL(do_machine_check); |
| @@ -1238,42 +1215,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
| 1238 | #endif | 1215 | #endif |
| 1239 | 1216 | ||
| 1240 | /* | 1217 | /* |
| 1241 | * Called in process context that interrupted by MCE and marked with | ||
| 1242 | * TIF_MCE_NOTIFY, just before returning to erroneous userland. | ||
| 1243 | * This code is allowed to sleep. | ||
| 1244 | * Attempt possible recovery such as calling the high level VM handler to | ||
| 1245 | * process any corrupted pages, and kill/signal current process if required. | ||
| 1246 | * Action required errors are handled here. | ||
| 1247 | */ | ||
| 1248 | void mce_notify_process(void) | ||
| 1249 | { | ||
| 1250 | unsigned long pfn; | ||
| 1251 | struct mce_info *mi = mce_find_info(); | ||
| 1252 | int flags = MF_ACTION_REQUIRED; | ||
| 1253 | |||
| 1254 | if (!mi) | ||
| 1255 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | ||
| 1256 | pfn = mi->paddr >> PAGE_SHIFT; | ||
| 1257 | |||
| 1258 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
| 1259 | |||
| 1260 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
| 1261 | mi->paddr); | ||
| 1262 | /* | ||
| 1263 | * We must call memory_failure() here even if the current process is | ||
| 1264 | * doomed. We still need to mark the page as poisoned and alert any | ||
| 1265 | * other users of the page. | ||
| 1266 | */ | ||
| 1267 | if (!mi->restartable) | ||
| 1268 | flags |= MF_MUST_KILL; | ||
| 1269 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
| 1270 | pr_err("Memory error not recovered"); | ||
| 1271 | force_sig(SIGBUS, current); | ||
| 1272 | } | ||
| 1273 | mce_clear_info(mi); | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | /* | ||
| 1277 | * Action optional processing happens here (picking up | 1218 | * Action optional processing happens here (picking up |
| 1278 | * from the list of faulting pages that do_machine_check() | 1219 | * from the list of faulting pages that do_machine_check() |
| 1279 | * placed into the "ring"). | 1220 | * placed into the "ring"). |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0fc..2a33c8f68319 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
| 740 | { | 740 | { |
| 741 | user_exit(); | 741 | user_exit(); |
| 742 | 742 | ||
| 743 | #ifdef CONFIG_X86_MCE | ||
| 744 | /* notify userspace of pending MCEs */ | ||
| 745 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
| 746 | mce_notify_process(); | ||
| 747 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
| 748 | |||
| 749 | if (thread_info_flags & _TIF_UPROBE) | 743 | if (thread_info_flags & _TIF_UPROBE) |
| 750 | uprobe_notify_resume(regs); | 744 | uprobe_notify_resume(regs); |
| 751 | 745 | ||
