diff options
author | Ingo Molnar <mingo@elte.hu> | 2012-01-26 05:40:13 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-01-26 05:40:13 -0500 |
commit | 4e9f44ba29f20484615a461244bfd3a419391490 (patch) | |
tree | 490dd38bb8d14765327cee0be2f9731254e9c402 /mm | |
parent | 87f71ae2dd7471c1b4c94100be1f218e91dc64c3 (diff) | |
parent | 5f7b88d51e89771f64c15903b96b5933dd0bc6d8 (diff) |
Merge tag 'mce-recovery-for-tip' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/mce
Implement MCE recovery for the data load error path and assorted cleanups.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hwpoison-inject.c | 4 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 96 |
3 files changed, 53 insertions, 49 deletions
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index c7fc7fd00e32..cc448bb983ba 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c | |||
@@ -45,7 +45,7 @@ static int hwpoison_inject(void *data, u64 val) | |||
45 | * do a racy check with elevated page count, to make sure PG_hwpoison | 45 | * do a racy check with elevated page count, to make sure PG_hwpoison |
46 | * will only be set for the targeted owner (or on a free page). | 46 | * will only be set for the targeted owner (or on a free page). |
47 | * We temporarily take page lock for try_get_mem_cgroup_from_page(). | 47 | * We temporarily take page lock for try_get_mem_cgroup_from_page(). |
48 | * __memory_failure() will redo the check reliably inside page lock. | 48 | * memory_failure() will redo the check reliably inside page lock. |
49 | */ | 49 | */ |
50 | lock_page(hpage); | 50 | lock_page(hpage); |
51 | err = hwpoison_filter(hpage); | 51 | err = hwpoison_filter(hpage); |
@@ -55,7 +55,7 @@ static int hwpoison_inject(void *data, u64 val) | |||
55 | 55 | ||
56 | inject: | 56 | inject: |
57 | printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); | 57 | printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); |
58 | return __memory_failure(pfn, 18, MF_COUNT_INCREASED); | 58 | return memory_failure(pfn, 18, MF_COUNT_INCREASED); |
59 | } | 59 | } |
60 | 60 | ||
61 | static int hwpoison_unpoison(void *data, u64 val) | 61 | static int hwpoison_unpoison(void *data, u64 val) |
diff --git a/mm/madvise.c b/mm/madvise.c index 74bf193eff04..f5ab745672b7 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -251,7 +251,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) | |||
251 | printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", | 251 | printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", |
252 | page_to_pfn(p), start); | 252 | page_to_pfn(p), start); |
253 | /* Ignore return value for now */ | 253 | /* Ignore return value for now */ |
254 | __memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); | 254 | memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); |
255 | } | 255 | } |
256 | return ret; | 256 | return ret; |
257 | } | 257 | } |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 56080ea36140..0f6033b01ffc 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p) | |||
187 | EXPORT_SYMBOL_GPL(hwpoison_filter); | 187 | EXPORT_SYMBOL_GPL(hwpoison_filter); |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Send all the processes who have the page mapped an ``action optional'' | 190 | * Send all the processes who have the page mapped a signal. |
191 | * signal. | 191 | * ``action optional'' if they are not immediately affected by the error |
192 | * ``action required'' if error happened in current execution context | ||
192 | */ | 193 | */ |
193 | static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, | 194 | static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, |
194 | unsigned long pfn, struct page *page) | 195 | unsigned long pfn, struct page *page, int flags) |
195 | { | 196 | { |
196 | struct siginfo si; | 197 | struct siginfo si; |
197 | int ret; | 198 | int ret; |
198 | 199 | ||
199 | printk(KERN_ERR | 200 | printk(KERN_ERR |
200 | "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", | 201 | "MCE %#lx: Killing %s:%d due to hardware memory corruption\n", |
201 | pfn, t->comm, t->pid); | 202 | pfn, t->comm, t->pid); |
202 | si.si_signo = SIGBUS; | 203 | si.si_signo = SIGBUS; |
203 | si.si_errno = 0; | 204 | si.si_errno = 0; |
204 | si.si_code = BUS_MCEERR_AO; | ||
205 | si.si_addr = (void *)addr; | 205 | si.si_addr = (void *)addr; |
206 | #ifdef __ARCH_SI_TRAPNO | 206 | #ifdef __ARCH_SI_TRAPNO |
207 | si.si_trapno = trapno; | 207 | si.si_trapno = trapno; |
208 | #endif | 208 | #endif |
209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; | 209 | si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; |
210 | /* | 210 | |
211 | * Don't use force here, it's convenient if the signal | 211 | if ((flags & MF_ACTION_REQUIRED) && t == current) { |
212 | * can be temporarily blocked. | 212 | si.si_code = BUS_MCEERR_AR; |
213 | * This could cause a loop when the user sets SIGBUS | 213 | ret = force_sig_info(SIGBUS, &si, t); |
214 | * to SIG_IGN, but hopefully no one will do that? | 214 | } else { |
215 | */ | 215 | /* |
216 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | 216 | * Don't use force here, it's convenient if the signal |
217 | * can be temporarily blocked. | ||
218 | * This could cause a loop when the user sets SIGBUS | ||
219 | * to SIG_IGN, but hopefully no one will do that? | ||
220 | */ | ||
221 | si.si_code = BUS_MCEERR_AO; | ||
222 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | ||
223 | } | ||
217 | if (ret < 0) | 224 | if (ret < 0) |
218 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", | 225 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", |
219 | t->comm, t->pid, ret); | 226 | t->comm, t->pid, ret); |
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, | |||
338 | * Also when FAIL is set do a force kill because something went | 345 | * Also when FAIL is set do a force kill because something went |
339 | * wrong earlier. | 346 | * wrong earlier. |
340 | */ | 347 | */ |
341 | static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | 348 | static void kill_procs(struct list_head *to_kill, int doit, int trapno, |
342 | int fail, struct page *page, unsigned long pfn) | 349 | int fail, struct page *page, unsigned long pfn, |
350 | int flags) | ||
343 | { | 351 | { |
344 | struct to_kill *tk, *next; | 352 | struct to_kill *tk, *next; |
345 | 353 | ||
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | |||
363 | * check for that, but we need to tell the | 371 | * check for that, but we need to tell the |
364 | * process anyways. | 372 | * process anyways. |
365 | */ | 373 | */ |
366 | else if (kill_proc_ao(tk->tsk, tk->addr, trapno, | 374 | else if (kill_proc(tk->tsk, tk->addr, trapno, |
367 | pfn, page) < 0) | 375 | pfn, page, flags) < 0) |
368 | printk(KERN_ERR | 376 | printk(KERN_ERR |
369 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", | 377 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", |
370 | pfn, tk->tsk->comm, tk->tsk->pid); | 378 | pfn, tk->tsk->comm, tk->tsk->pid); |
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p, | |||
844 | * the pages and send SIGBUS to the processes if the data was dirty. | 852 | * the pages and send SIGBUS to the processes if the data was dirty. |
845 | */ | 853 | */ |
846 | static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | 854 | static int hwpoison_user_mappings(struct page *p, unsigned long pfn, |
847 | int trapno) | 855 | int trapno, int flags) |
848 | { | 856 | { |
849 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; | 857 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; |
850 | struct address_space *mapping; | 858 | struct address_space *mapping; |
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
962 | * use a more force-full uncatchable kill to prevent | 970 | * use a more force-full uncatchable kill to prevent |
963 | * any accesses to the poisoned memory. | 971 | * any accesses to the poisoned memory. |
964 | */ | 972 | */ |
965 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, | 973 | kill_procs(&tokill, !!PageDirty(ppage), trapno, |
966 | ret != SWAP_SUCCESS, p, pfn); | 974 | ret != SWAP_SUCCESS, p, pfn, flags); |
967 | 975 | ||
968 | return ret; | 976 | return ret; |
969 | } | 977 | } |
@@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage) | |||
984 | ClearPageHWPoison(hpage + i); | 992 | ClearPageHWPoison(hpage + i); |
985 | } | 993 | } |
986 | 994 | ||
987 | int __memory_failure(unsigned long pfn, int trapno, int flags) | 995 | /** |
996 | * memory_failure - Handle memory failure of a page. | ||
997 | * @pfn: Page Number of the corrupted page | ||
998 | * @trapno: Trap number reported in the signal to user space. | ||
999 | * @flags: fine tune action taken | ||
1000 | * | ||
1001 | * This function is called by the low level machine check code | ||
1002 | * of an architecture when it detects hardware memory corruption | ||
1003 | * of a page. It tries its best to recover, which includes | ||
1004 | * dropping pages, killing processes etc. | ||
1005 | * | ||
1006 | * The function is primarily of use for corruptions that | ||
1007 | * happen outside the current execution context (e.g. when | ||
1008 | * detected by a background scrubber) | ||
1009 | * | ||
1010 | * Must run in process context (e.g. a work queue) with interrupts | ||
1011 | * enabled and no spinlocks hold. | ||
1012 | */ | ||
1013 | int memory_failure(unsigned long pfn, int trapno, int flags) | ||
988 | { | 1014 | { |
989 | struct page_state *ps; | 1015 | struct page_state *ps; |
990 | struct page *p; | 1016 | struct page *p; |
@@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1130 | * Now take care of user space mappings. | 1156 | * Now take care of user space mappings. |
1131 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. | 1157 | * Abort on fail: __delete_from_page_cache() assumes unmapped page. |
1132 | */ | 1158 | */ |
1133 | if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { | 1159 | if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) { |
1134 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); | 1160 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); |
1135 | res = -EBUSY; | 1161 | res = -EBUSY; |
1136 | goto out; | 1162 | goto out; |
@@ -1156,29 +1182,7 @@ out: | |||
1156 | unlock_page(hpage); | 1182 | unlock_page(hpage); |
1157 | return res; | 1183 | return res; |
1158 | } | 1184 | } |
1159 | EXPORT_SYMBOL_GPL(__memory_failure); | 1185 | EXPORT_SYMBOL_GPL(memory_failure); |
1160 | |||
1161 | /** | ||
1162 | * memory_failure - Handle memory failure of a page. | ||
1163 | * @pfn: Page Number of the corrupted page | ||
1164 | * @trapno: Trap number reported in the signal to user space. | ||
1165 | * | ||
1166 | * This function is called by the low level machine check code | ||
1167 | * of an architecture when it detects hardware memory corruption | ||
1168 | * of a page. It tries its best to recover, which includes | ||
1169 | * dropping pages, killing processes etc. | ||
1170 | * | ||
1171 | * The function is primarily of use for corruptions that | ||
1172 | * happen outside the current execution context (e.g. when | ||
1173 | * detected by a background scrubber) | ||
1174 | * | ||
1175 | * Must run in process context (e.g. a work queue) with interrupts | ||
1176 | * enabled and no spinlocks hold. | ||
1177 | */ | ||
1178 | void memory_failure(unsigned long pfn, int trapno) | ||
1179 | { | ||
1180 | __memory_failure(pfn, trapno, 0); | ||
1181 | } | ||
1182 | 1186 | ||
1183 | #define MEMORY_FAILURE_FIFO_ORDER 4 | 1187 | #define MEMORY_FAILURE_FIFO_ORDER 4 |
1184 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) | 1188 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) |
@@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work) | |||
1251 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | 1255 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); |
1252 | if (!gotten) | 1256 | if (!gotten) |
1253 | break; | 1257 | break; |
1254 | __memory_failure(entry.pfn, entry.trapno, entry.flags); | 1258 | memory_failure(entry.pfn, entry.trapno, entry.flags); |
1255 | } | 1259 | } |
1256 | } | 1260 | } |
1257 | 1261 | ||