diff options
author | Tony Luck <tony.luck@intel.com> | 2011-12-13 12:27:58 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2012-01-03 15:06:38 -0500 |
commit | 7329bbeb92740f35d64a8860ae7837ff4db27fe0 (patch) | |
tree | df4decab54463fd2dee4979f1aa38615f1ef2f3c /mm/memory-failure.c | |
parent | cd42f4a3b2b1c4cbd997363dc57821953d73fd87 (diff) |
HWPOISON: Add code to handle "action required" errors.
Add new flag bit "MF_ACTION_REQUIRED" to be used by machine check
code to force a signal with si_code = BUS_MCEERR_AR in the case
where the error occurs in processor execution context. Pass the
flags argument along call chain:
memory_failure()
hwpoison_user_mappings()
kill_procs()
kill_proc()
Drop the "_ao" suffix from kill_procs_ao() and kill_proc_ao() since
they can now handle "action required" as well as "action optional" errors.
Acked-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 50 |
1 files changed, 29 insertions, 21 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ab259bb0adc5..95fd307ebb30 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p) | |||
187 | EXPORT_SYMBOL_GPL(hwpoison_filter); | 187 | EXPORT_SYMBOL_GPL(hwpoison_filter); |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Send all the processes who have the page mapped an ``action optional'' | 190 | * Send all the processes who have the page mapped a signal. |
191 | * signal. | 191 | * ``action optional'' if they are not immediately affected by the error |
192 | * ``action required'' if error happened in current execution context | ||
192 | */ | 193 | */ |
193 | static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, | 194 | static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, |
194 | unsigned long pfn, struct page *page) | 195 | unsigned long pfn, struct page *page, int flags) |
195 | { | 196 | { |
196 | struct siginfo si; | 197 | struct siginfo si; |
197 | int ret; | 198 | int ret; |
198 | 199 | ||
199 | printk(KERN_ERR | 200 | printk(KERN_ERR |
200 | "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", | 201 | "MCE %#lx: Killing %s:%d due to hardware memory corruption\n", |
201 | pfn, t->comm, t->pid); | 202 | pfn, t->comm, t->pid); |
202 | si.si_signo = SIGBUS; | 203 | si.si_signo = SIGBUS; |
203 | si.si_errno = 0; | 204 | si.si_errno = 0; |
204 | si.si_code = BUS_MCEERR_AO; | ||
205 | si.si_addr = (void *)addr; | 205 | si.si_addr = (void *)addr; |
206 | #ifdef __ARCH_SI_TRAPNO | 206 | #ifdef __ARCH_SI_TRAPNO |
207 | si.si_trapno = trapno; | 207 | si.si_trapno = trapno; |
208 | #endif | 208 | #endif |
209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; | 209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; |
210 | /* | 210 | |
211 | * Don't use force here, it's convenient if the signal | 211 | if ((flags & MF_ACTION_REQUIRED) && t == current) { |
212 | * can be temporarily blocked. | 212 | si.si_code = BUS_MCEERR_AR; |
213 | * This could cause a loop when the user sets SIGBUS | 213 | ret = force_sig_info(SIGBUS, &si, t); |
214 | * to SIG_IGN, but hopefully no one will do that? | 214 | } else { |
215 | */ | 215 | /* |
216 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | 216 | * Don't use force here, it's convenient if the signal |
217 | * can be temporarily blocked. | ||
218 | * This could cause a loop when the user sets SIGBUS | ||
219 | * to SIG_IGN, but hopefully no one will do that? | ||
220 | */ | ||
221 | si.si_code = BUS_MCEERR_AO; | ||
222 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | ||
223 | } | ||
217 | if (ret < 0) | 224 | if (ret < 0) |
218 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", | 225 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", |
219 | t->comm, t->pid, ret); | 226 | t->comm, t->pid, ret); |
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, | |||
338 | * Also when FAIL is set do a force kill because something went | 345 | * Also when FAIL is set do a force kill because something went |
339 | * wrong earlier. | 346 | * wrong earlier. |
340 | */ | 347 | */ |
341 | static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | 348 | static void kill_procs(struct list_head *to_kill, int doit, int trapno, |
342 | int fail, struct page *page, unsigned long pfn) | 349 | int fail, struct page *page, unsigned long pfn, |
350 | int flags) | ||
343 | { | 351 | { |
344 | struct to_kill *tk, *next; | 352 | struct to_kill *tk, *next; |
345 | 353 | ||
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | |||
363 | * check for that, but we need to tell the | 371 | * check for that, but we need to tell the |
364 | * process anyways. | 372 | * process anyways. |
365 | */ | 373 | */ |
366 | else if (kill_proc_ao(tk->tsk, tk->addr, trapno, | 374 | else if (kill_proc(tk->tsk, tk->addr, trapno, |
367 | pfn, page) < 0) | 375 | pfn, page, flags) < 0) |
368 | printk(KERN_ERR | 376 | printk(KERN_ERR |
369 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", | 377 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", |
370 | pfn, tk->tsk->comm, tk->tsk->pid); | 378 | pfn, tk->tsk->comm, tk->tsk->pid); |
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p, | |||
844 | * the pages and send SIGBUS to the processes if the data was dirty. | 852 | * the pages and send SIGBUS to the processes if the data was dirty. |
845 | */ | 853 | */ |
846 | static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | 854 | static int hwpoison_user_mappings(struct page *p, unsigned long pfn, |
847 | int trapno) | 855 | int trapno, int flags) |
848 | { | 856 | { |
849 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; | 857 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; |
850 | struct address_space *mapping; | 858 | struct address_space *mapping; |
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
962 | * use a more force-full uncatchable kill to prevent | 970 | * use a more force-full uncatchable kill to prevent |
963 | * any accesses to the poisoned memory. | 971 | * any accesses to the poisoned memory. |
964 | */ | 972 | */ |
965 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, | 973 | kill_procs(&tokill, !!PageDirty(ppage), trapno, |
966 | ret != SWAP_SUCCESS, p, pfn); | 974 | ret != SWAP_SUCCESS, p, pfn, flags); |
967 | 975 | ||
968 | return ret; | 976 | return ret; |
969 | } | 977 | } |
@@ -1148,7 +1156,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1148 | * Now take care of user space mappings. | 1156 | * Now take care of user space mappings. |
1149 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. | 1157 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. |
1150 | */ | 1158 | */ |
1151 | if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { | 1159 | if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) { |
1152 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); | 1160 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); |
1153 | res = -EBUSY; | 1161 | res = -EBUSY; |
1154 | goto out; | 1162 | goto out; |