diff options
author | Luck, Tony <tony.luck@intel.com> | 2015-01-05 19:44:42 -0500 |
---|---|---|
committer | Andy Lutomirski <luto@amacapital.net> | 2015-01-07 10:47:42 -0500 |
commit | d4812e169de44f4ab53ff671c6193c67de24da62 (patch) | |
tree | e923316c76c2af0d2858f63dc275c1aa516ab020 /arch | |
parent | bced35b65aefe53a6f77a9ed0ce1aea86e9d65a2 (diff) |
x86, mce: Get rid of TIF_MCE_NOTIFY and associated mce tricks
We now switch to the kernel stack when a machine check interrupts
during user mode. This means that we can perform recovery actions
in the tail of do_machine_check()
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/mce.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 109 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 6 |
4 files changed, 26 insertions, 94 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 51b26e895933..9b3de99dc004 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -190,7 +190,6 @@ enum mcp_flags { | |||
190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
191 | 191 | ||
192 | int mce_notify_irq(void); | 192 | int mce_notify_irq(void); |
193 | void mce_notify_process(void); | ||
194 | 193 | ||
195 | DECLARE_PER_CPU(struct mce, injectm); | 194 | DECLARE_PER_CPU(struct mce, injectm); |
196 | 195 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b13b0fbda8e..e82e95abc92b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -75,7 +75,6 @@ struct thread_info { | |||
75 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | 75 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
76 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 76 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
77 | #define TIF_SECCOMP 8 /* secure computing */ | 77 | #define TIF_SECCOMP 8 /* secure computing */ |
78 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | ||
79 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | 78 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ |
80 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | 79 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ |
81 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 80 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
@@ -100,7 +99,6 @@ struct thread_info { | |||
100 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) | 99 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
101 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 100 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
102 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 101 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
103 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | ||
104 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | 102 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) |
105 | #define _TIF_UPROBE (1 << TIF_UPROBE) | 103 | #define _TIF_UPROBE (1 << TIF_UPROBE) |
106 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 104 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
@@ -140,7 +138,7 @@ struct thread_info { | |||
140 | 138 | ||
141 | /* Only used for 64 bit */ | 139 | /* Only used for 64 bit */ |
142 | #define _TIF_DO_NOTIFY_MASK \ | 140 | #define _TIF_DO_NOTIFY_MASK \ |
143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ | 141 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ |
144 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) | 142 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) |
145 | 143 | ||
146 | /* flags to check in __switch_to() */ | 144 | /* flags to check in __switch_to() */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 800d423f1e92..d23179900755 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1004,51 +1004,6 @@ static void mce_clear_state(unsigned long *toclear) | |||
1004 | } | 1004 | } |
1005 | 1005 | ||
1006 | /* | 1006 | /* |
1007 | * Need to save faulting physical address associated with a process | ||
1008 | * in the machine check handler some place where we can grab it back | ||
1009 | * later in mce_notify_process() | ||
1010 | */ | ||
1011 | #define MCE_INFO_MAX 16 | ||
1012 | |||
1013 | struct mce_info { | ||
1014 | atomic_t inuse; | ||
1015 | struct task_struct *t; | ||
1016 | __u64 paddr; | ||
1017 | int restartable; | ||
1018 | } mce_info[MCE_INFO_MAX]; | ||
1019 | |||
1020 | static void mce_save_info(__u64 addr, int c) | ||
1021 | { | ||
1022 | struct mce_info *mi; | ||
1023 | |||
1024 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { | ||
1025 | if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | ||
1026 | mi->t = current; | ||
1027 | mi->paddr = addr; | ||
1028 | mi->restartable = c; | ||
1029 | return; | ||
1030 | } | ||
1031 | } | ||
1032 | |||
1033 | mce_panic("Too many concurrent recoverable errors", NULL, NULL); | ||
1034 | } | ||
1035 | |||
1036 | static struct mce_info *mce_find_info(void) | ||
1037 | { | ||
1038 | struct mce_info *mi; | ||
1039 | |||
1040 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) | ||
1041 | if (atomic_read(&mi->inuse) && mi->t == current) | ||
1042 | return mi; | ||
1043 | return NULL; | ||
1044 | } | ||
1045 | |||
1046 | static void mce_clear_info(struct mce_info *mi) | ||
1047 | { | ||
1048 | atomic_set(&mi->inuse, 0); | ||
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | * The actual machine check handler. This only handles real | 1007 | * The actual machine check handler. This only handles real |
1053 | * exceptions when something got corrupted coming in through int 18. | 1008 | * exceptions when something got corrupted coming in through int 18. |
1054 | * | 1009 | * |
@@ -1086,6 +1041,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1086 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1041 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
1087 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | 1042 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); |
1088 | char *msg = "Unknown"; | 1043 | char *msg = "Unknown"; |
1044 | u64 recover_paddr = ~0ull; | ||
1045 | int flags = MF_ACTION_REQUIRED; | ||
1089 | 1046 | ||
1090 | prev_state = ist_enter(regs); | 1047 | prev_state = ist_enter(regs); |
1091 | 1048 | ||
@@ -1207,9 +1164,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1207 | if (no_way_out) | 1164 | if (no_way_out) |
1208 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1165 | mce_panic("Fatal machine check on current CPU", &m, msg); |
1209 | if (worst == MCE_AR_SEVERITY) { | 1166 | if (worst == MCE_AR_SEVERITY) { |
1210 | /* schedule action before return to userland */ | 1167 | recover_paddr = m.addr; |
1211 | mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); | 1168 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
1212 | set_thread_flag(TIF_MCE_NOTIFY); | 1169 | flags |= MF_MUST_KILL; |
1213 | } else if (kill_it) { | 1170 | } else if (kill_it) { |
1214 | force_sig(SIGBUS, current); | 1171 | force_sig(SIGBUS, current); |
1215 | } | 1172 | } |
@@ -1220,6 +1177,26 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1220 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | 1177 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); |
1221 | out: | 1178 | out: |
1222 | sync_core(); | 1179 | sync_core(); |
1180 | |||
1181 | if (recover_paddr == ~0ull) | ||
1182 | goto done; | ||
1183 | |||
1184 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1185 | recover_paddr); | ||
1186 | /* | ||
1187 | * We must call memory_failure() here even if the current process is | ||
1188 | * doomed. We still need to mark the page as poisoned and alert any | ||
1189 | * other users of the page. | ||
1190 | */ | ||
1191 | ist_begin_non_atomic(regs); | ||
1192 | local_irq_enable(); | ||
1193 | if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { | ||
1194 | pr_err("Memory error not recovered"); | ||
1195 | force_sig(SIGBUS, current); | ||
1196 | } | ||
1197 | local_irq_disable(); | ||
1198 | ist_end_non_atomic(); | ||
1199 | done: | ||
1223 | ist_exit(regs, prev_state); | 1200 | ist_exit(regs, prev_state); |
1224 | } | 1201 | } |
1225 | EXPORT_SYMBOL_GPL(do_machine_check); | 1202 | EXPORT_SYMBOL_GPL(do_machine_check); |
@@ -1238,42 +1215,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
1238 | #endif | 1215 | #endif |
1239 | 1216 | ||
1240 | /* | 1217 | /* |
1241 | * Called in process context that interrupted by MCE and marked with | ||
1242 | * TIF_MCE_NOTIFY, just before returning to erroneous userland. | ||
1243 | * This code is allowed to sleep. | ||
1244 | * Attempt possible recovery such as calling the high level VM handler to | ||
1245 | * process any corrupted pages, and kill/signal current process if required. | ||
1246 | * Action required errors are handled here. | ||
1247 | */ | ||
1248 | void mce_notify_process(void) | ||
1249 | { | ||
1250 | unsigned long pfn; | ||
1251 | struct mce_info *mi = mce_find_info(); | ||
1252 | int flags = MF_ACTION_REQUIRED; | ||
1253 | |||
1254 | if (!mi) | ||
1255 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | ||
1256 | pfn = mi->paddr >> PAGE_SHIFT; | ||
1257 | |||
1258 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1259 | |||
1260 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1261 | mi->paddr); | ||
1262 | /* | ||
1263 | * We must call memory_failure() here even if the current process is | ||
1264 | * doomed. We still need to mark the page as poisoned and alert any | ||
1265 | * other users of the page. | ||
1266 | */ | ||
1267 | if (!mi->restartable) | ||
1268 | flags |= MF_MUST_KILL; | ||
1269 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1270 | pr_err("Memory error not recovered"); | ||
1271 | force_sig(SIGBUS, current); | ||
1272 | } | ||
1273 | mce_clear_info(mi); | ||
1274 | } | ||
1275 | |||
1276 | /* | ||
1277 | * Action optional processing happens here (picking up | 1218 | * Action optional processing happens here (picking up |
1278 | * from the list of faulting pages that do_machine_check() | 1219 | * from the list of faulting pages that do_machine_check() |
1279 | * placed into the "ring"). | 1220 | * placed into the "ring"). |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0fc..2a33c8f68319 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
740 | { | 740 | { |
741 | user_exit(); | 741 | user_exit(); |
742 | 742 | ||
743 | #ifdef CONFIG_X86_MCE | ||
744 | /* notify userspace of pending MCEs */ | ||
745 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
746 | mce_notify_process(); | ||
747 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
748 | |||
749 | if (thread_info_flags & _TIF_UPROBE) | 743 | if (thread_info_flags & _TIF_UPROBE) |
750 | uprobe_notify_resume(regs); | 744 | uprobe_notify_resume(regs); |
751 | 745 | ||