aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2012-01-26 05:40:13 -0500
committerIngo Molnar <mingo@elte.hu>2012-01-26 05:40:13 -0500
commit4e9f44ba29f20484615a461244bfd3a419391490 (patch)
tree490dd38bb8d14765327cee0be2f9731254e9c402 /mm
parent87f71ae2dd7471c1b4c94100be1f218e91dc64c3 (diff)
parent5f7b88d51e89771f64c15903b96b5933dd0bc6d8 (diff)
Merge tag 'mce-recovery-for-tip' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/mce
Implement MCE recovery for the data load error path and assorted cleanups. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r--mm/hwpoison-inject.c4
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory-failure.c96
3 files changed, 53 insertions, 49 deletions
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c7fc7fd00e32..cc448bb983ba 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -45,7 +45,7 @@ static int hwpoison_inject(void *data, u64 val)
45 * do a racy check with elevated page count, to make sure PG_hwpoison 45 * do a racy check with elevated page count, to make sure PG_hwpoison
46 * will only be set for the targeted owner (or on a free page). 46 * will only be set for the targeted owner (or on a free page).
47 * We temporarily take page lock for try_get_mem_cgroup_from_page(). 47 * We temporarily take page lock for try_get_mem_cgroup_from_page().
48 * __memory_failure() will redo the check reliably inside page lock. 48 * memory_failure() will redo the check reliably inside page lock.
49 */ 49 */
50 lock_page(hpage); 50 lock_page(hpage);
51 err = hwpoison_filter(hpage); 51 err = hwpoison_filter(hpage);
@@ -55,7 +55,7 @@ static int hwpoison_inject(void *data, u64 val)
55 55
56inject: 56inject:
57 printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn); 57 printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn);
58 return __memory_failure(pfn, 18, MF_COUNT_INCREASED); 58 return memory_failure(pfn, 18, MF_COUNT_INCREASED);
59} 59}
60 60
61static int hwpoison_unpoison(void *data, u64 val) 61static int hwpoison_unpoison(void *data, u64 val)
diff --git a/mm/madvise.c b/mm/madvise.c
index 74bf193eff04..f5ab745672b7 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -251,7 +251,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
251 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", 251 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
252 page_to_pfn(p), start); 252 page_to_pfn(p), start);
253 /* Ignore return value for now */ 253 /* Ignore return value for now */
254 __memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); 254 memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
255 } 255 }
256 return ret; 256 return ret;
257} 257}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 56080ea36140..0f6033b01ffc 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p)
187EXPORT_SYMBOL_GPL(hwpoison_filter); 187EXPORT_SYMBOL_GPL(hwpoison_filter);
188 188
189/* 189/*
190 * Send all the processes who have the page mapped an ``action optional'' 190 * Send all the processes who have the page mapped a signal.
191 * signal. 191 * ``action optional'' if they are not immediately affected by the error
192 * ``action required'' if error happened in current execution context
192 */ 193 */
193static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, 194static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
194 unsigned long pfn, struct page *page) 195 unsigned long pfn, struct page *page, int flags)
195{ 196{
196 struct siginfo si; 197 struct siginfo si;
197 int ret; 198 int ret;
198 199
199 printk(KERN_ERR 200 printk(KERN_ERR
200 "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", 201 "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
201 pfn, t->comm, t->pid); 202 pfn, t->comm, t->pid);
202 si.si_signo = SIGBUS; 203 si.si_signo = SIGBUS;
203 si.si_errno = 0; 204 si.si_errno = 0;
204 si.si_code = BUS_MCEERR_AO;
205 si.si_addr = (void *)addr; 205 si.si_addr = (void *)addr;
206#ifdef __ARCH_SI_TRAPNO 206#ifdef __ARCH_SI_TRAPNO
207 si.si_trapno = trapno; 207 si.si_trapno = trapno;
208#endif 208#endif
209 si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; 209 si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
210 /* 210
211 * Don't use force here, it's convenient if the signal 211 if ((flags & MF_ACTION_REQUIRED) && t == current) {
212 * can be temporarily blocked. 212 si.si_code = BUS_MCEERR_AR;
213 * This could cause a loop when the user sets SIGBUS 213 ret = force_sig_info(SIGBUS, &si, t);
214 * to SIG_IGN, but hopefully no one will do that? 214 } else {
215 */ 215 /*
216 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ 216 * Don't use force here, it's convenient if the signal
217 * can be temporarily blocked.
218 * This could cause a loop when the user sets SIGBUS
219 * to SIG_IGN, but hopefully no one will do that?
220 */
221 si.si_code = BUS_MCEERR_AO;
222 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
223 }
217 if (ret < 0) 224 if (ret < 0)
218 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", 225 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
219 t->comm, t->pid, ret); 226 t->comm, t->pid, ret);
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
338 * Also when FAIL is set do a force kill because something went 345 * Also when FAIL is set do a force kill because something went
339 * wrong earlier. 346 * wrong earlier.
340 */ 347 */
341static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, 348static void kill_procs(struct list_head *to_kill, int doit, int trapno,
342 int fail, struct page *page, unsigned long pfn) 349 int fail, struct page *page, unsigned long pfn,
350 int flags)
343{ 351{
344 struct to_kill *tk, *next; 352 struct to_kill *tk, *next;
345 353
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
363 * check for that, but we need to tell the 371 * check for that, but we need to tell the
364 * process anyways. 372 * process anyways.
365 */ 373 */
366 else if (kill_proc_ao(tk->tsk, tk->addr, trapno, 374 else if (kill_proc(tk->tsk, tk->addr, trapno,
367 pfn, page) < 0) 375 pfn, page, flags) < 0)
368 printk(KERN_ERR 376 printk(KERN_ERR
369 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", 377 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
370 pfn, tk->tsk->comm, tk->tsk->pid); 378 pfn, tk->tsk->comm, tk->tsk->pid);
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p,
844 * the pages and send SIGBUS to the processes if the data was dirty. 852 * the pages and send SIGBUS to the processes if the data was dirty.
845 */ 853 */
846static int hwpoison_user_mappings(struct page *p, unsigned long pfn, 854static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
847 int trapno) 855 int trapno, int flags)
848{ 856{
849 enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; 857 enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
850 struct address_space *mapping; 858 struct address_space *mapping;
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
962 * use a more force-full uncatchable kill to prevent 970 * use a more force-full uncatchable kill to prevent
963 * any accesses to the poisoned memory. 971 * any accesses to the poisoned memory.
964 */ 972 */
965 kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, 973 kill_procs(&tokill, !!PageDirty(ppage), trapno,
966 ret != SWAP_SUCCESS, p, pfn); 974 ret != SWAP_SUCCESS, p, pfn, flags);
967 975
968 return ret; 976 return ret;
969} 977}
@@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage)
984 ClearPageHWPoison(hpage + i); 992 ClearPageHWPoison(hpage + i);
985} 993}
986 994
987int __memory_failure(unsigned long pfn, int trapno, int flags) 995/**
996 * memory_failure - Handle memory failure of a page.
997 * @pfn: Page Number of the corrupted page
998 * @trapno: Trap number reported in the signal to user space.
999 * @flags: fine tune action taken
1000 *
1001 * This function is called by the low level machine check code
1002 * of an architecture when it detects hardware memory corruption
1003 * of a page. It tries its best to recover, which includes
1004 * dropping pages, killing processes etc.
1005 *
1006 * The function is primarily of use for corruptions that
1007 * happen outside the current execution context (e.g. when
1008 * detected by a background scrubber)
1009 *
1010 * Must run in process context (e.g. a work queue) with interrupts
1011 * enabled and no spinlocks hold.
1012 */
1013int memory_failure(unsigned long pfn, int trapno, int flags)
988{ 1014{
989 struct page_state *ps; 1015 struct page_state *ps;
990 struct page *p; 1016 struct page *p;
@@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1130 * Now take care of user space mappings. 1156 * Now take care of user space mappings.
1131 * Abort on fail: __delete_from_page_cache() assumes unmapped page. 1157 * Abort on fail: __delete_from_page_cache() assumes unmapped page.
1132 */ 1158 */
1133 if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { 1159 if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
1134 printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); 1160 printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
1135 res = -EBUSY; 1161 res = -EBUSY;
1136 goto out; 1162 goto out;
@@ -1156,29 +1182,7 @@ out:
1156 unlock_page(hpage); 1182 unlock_page(hpage);
1157 return res; 1183 return res;
1158} 1184}
1159EXPORT_SYMBOL_GPL(__memory_failure); 1185EXPORT_SYMBOL_GPL(memory_failure);
1160
1161/**
1162 * memory_failure - Handle memory failure of a page.
1163 * @pfn: Page Number of the corrupted page
1164 * @trapno: Trap number reported in the signal to user space.
1165 *
1166 * This function is called by the low level machine check code
1167 * of an architecture when it detects hardware memory corruption
1168 * of a page. It tries its best to recover, which includes
1169 * dropping pages, killing processes etc.
1170 *
1171 * The function is primarily of use for corruptions that
1172 * happen outside the current execution context (e.g. when
1173 * detected by a background scrubber)
1174 *
1175 * Must run in process context (e.g. a work queue) with interrupts
1176 * enabled and no spinlocks hold.
1177 */
1178void memory_failure(unsigned long pfn, int trapno)
1179{
1180 __memory_failure(pfn, trapno, 0);
1181}
1182 1186
1183#define MEMORY_FAILURE_FIFO_ORDER 4 1187#define MEMORY_FAILURE_FIFO_ORDER 4
1184#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) 1188#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
@@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work)
1251 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); 1255 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1252 if (!gotten) 1256 if (!gotten)
1253 break; 1257 break;
1254 __memory_failure(entry.pfn, entry.trapno, entry.flags); 1258 memory_failure(entry.pfn, entry.trapno, entry.flags);
1255 } 1259 }
1256} 1260}
1257 1261