aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen Yucong <slaoub@gmail.com>2016-05-20 19:57:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 20:58:30 -0400
commit495367c051fb200a42636bdc63be78ca1713a85a (patch)
tree9391eaae7ab1e0b21510d001d9aa37f2d2376d6d
parent340a43bed674a70308f196f2a61ec0b01f8a14d9 (diff)
mm/memory-failure.c: replace "MCE" with "Memory failure"
HWPoison was specific to some particular x86 platforms. And it is often seen as high level machine check handler. And therefore, 'MCE' is used for the format prefix of printk(). However, 'PowerNV' has also used HWPoison for handling memory errors[1], so 'MCE' is no longer suitable to memory_failure.c. Additionally, 'MCE' and 'Memory failure' have different context. The former belongs to exception context and the latter belongs to process context. Furthermore, HWPoison can also be used for off-lining those sub-health pages that do not trigger any machine check exception. This patch aims to replace 'MCE' with a more appropriate prefix. [1] commit 75eb3d9b60c2 ("powerpc/powernv: Get FSP memory errors and plumb into memory poison infrastructure.") Signed-off-by: Chen Yucong <slaoub@gmail.com> Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory-failure.c72
1 files changed, 40 insertions, 32 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ca5acee53b7a..2fcca6b0e005 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -184,8 +184,8 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
184 struct siginfo si; 184 struct siginfo si;
185 int ret; 185 int ret;
186 186
187 pr_err("MCE %#lx: Killing %s:%d due to hardware memory corruption\n", 187 pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n",
188 pfn, t->comm, t->pid); 188 pfn, t->comm, t->pid);
189 si.si_signo = SIGBUS; 189 si.si_signo = SIGBUS;
190 si.si_errno = 0; 190 si.si_errno = 0;
191 si.si_addr = (void *)addr; 191 si.si_addr = (void *)addr;
@@ -208,7 +208,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
208 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ 208 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
209 } 209 }
210 if (ret < 0) 210 if (ret < 0)
211 pr_info("MCE: Error sending signal to %s:%d: %d\n", 211 pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
212 t->comm, t->pid, ret); 212 t->comm, t->pid, ret);
213 return ret; 213 return ret;
214} 214}
@@ -289,7 +289,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
289 } else { 289 } else {
290 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); 290 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
291 if (!tk) { 291 if (!tk) {
292 pr_err("MCE: Out of memory while machine check handling\n"); 292 pr_err("Memory failure: Out of memory while machine check handling\n");
293 return; 293 return;
294 } 294 }
295 } 295 }
@@ -303,7 +303,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
303 * a SIGKILL because the error is not contained anymore. 303 * a SIGKILL because the error is not contained anymore.
304 */ 304 */
305 if (tk->addr == -EFAULT) { 305 if (tk->addr == -EFAULT) {
306 pr_info("MCE: Unable to find user space address %lx in %s\n", 306 pr_info("Memory failure: Unable to find user space address %lx in %s\n",
307 page_to_pfn(p), tsk->comm); 307 page_to_pfn(p), tsk->comm);
308 tk->addr_valid = 0; 308 tk->addr_valid = 0;
309 } 309 }
@@ -334,7 +334,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
334 * signal and then access the memory. Just kill it. 334 * signal and then access the memory. Just kill it.
335 */ 335 */
336 if (fail || tk->addr_valid == 0) { 336 if (fail || tk->addr_valid == 0) {
337 pr_err("MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", 337 pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
338 pfn, tk->tsk->comm, tk->tsk->pid); 338 pfn, tk->tsk->comm, tk->tsk->pid);
339 force_sig(SIGKILL, tk->tsk); 339 force_sig(SIGKILL, tk->tsk);
340 } 340 }
@@ -347,7 +347,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
347 */ 347 */
348 else if (kill_proc(tk->tsk, tk->addr, trapno, 348 else if (kill_proc(tk->tsk, tk->addr, trapno,
349 pfn, page, flags) < 0) 349 pfn, page, flags) < 0)
350 pr_err("MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", 350 pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
351 pfn, tk->tsk->comm, tk->tsk->pid); 351 pfn, tk->tsk->comm, tk->tsk->pid);
352 } 352 }
353 put_task_struct(tk->tsk); 353 put_task_struct(tk->tsk);
@@ -559,7 +559,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
559 */ 559 */
560static int me_unknown(struct page *p, unsigned long pfn) 560static int me_unknown(struct page *p, unsigned long pfn)
561{ 561{
562 pr_err("MCE %#lx: Unknown page state\n", pfn); 562 pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
563 return MF_FAILED; 563 return MF_FAILED;
564} 564}
565 565
@@ -604,11 +604,12 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
604 if (mapping->a_ops->error_remove_page) { 604 if (mapping->a_ops->error_remove_page) {
605 err = mapping->a_ops->error_remove_page(mapping, p); 605 err = mapping->a_ops->error_remove_page(mapping, p);
606 if (err != 0) { 606 if (err != 0) {
607 pr_info("MCE %#lx: Failed to punch page: %d\n", 607 pr_info("Memory failure: %#lx: Failed to punch page: %d\n",
608 pfn, err); 608 pfn, err);
609 } else if (page_has_private(p) && 609 } else if (page_has_private(p) &&
610 !try_to_release_page(p, GFP_NOIO)) { 610 !try_to_release_page(p, GFP_NOIO)) {
611 pr_info("MCE %#lx: failed to release buffers\n", pfn); 611 pr_info("Memory failure: %#lx: failed to release buffers\n",
612 pfn);
612 } else { 613 } else {
613 ret = MF_RECOVERED; 614 ret = MF_RECOVERED;
614 } 615 }
@@ -620,7 +621,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
620 if (invalidate_inode_page(p)) 621 if (invalidate_inode_page(p))
621 ret = MF_RECOVERED; 622 ret = MF_RECOVERED;
622 else 623 else
623 pr_info("MCE %#lx: Failed to invalidate\n", pfn); 624 pr_info("Memory failure: %#lx: Failed to invalidate\n",
625 pfn);
624 } 626 }
625 return ret; 627 return ret;
626} 628}
@@ -833,7 +835,7 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type,
833{ 835{
834 trace_memory_failure_event(pfn, type, result); 836 trace_memory_failure_event(pfn, type, result);
835 837
836 pr_err("MCE %#lx: recovery action for %s: %s\n", 838 pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
837 pfn, action_page_types[type], action_name[result]); 839 pfn, action_page_types[type], action_name[result]);
838} 840}
839 841
@@ -849,7 +851,7 @@ static int page_action(struct page_state *ps, struct page *p,
849 if (ps->action == me_swapcache_dirty && result == MF_DELAYED) 851 if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
850 count--; 852 count--;
851 if (count != 0) { 853 if (count != 0) {
852 pr_err("MCE %#lx: %s still referenced by %d users\n", 854 pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
853 pfn, action_page_types[ps->type], count); 855 pfn, action_page_types[ps->type], count);
854 result = MF_FAILED; 856 result = MF_FAILED;
855 } 857 }
@@ -882,7 +884,7 @@ int get_hwpoison_page(struct page *page)
882 * tries to touch the "partially handled" page. 884 * tries to touch the "partially handled" page.
883 */ 885 */
884 if (!PageAnon(head)) { 886 if (!PageAnon(head)) {
885 pr_err("MCE: %#lx: non anonymous thp\n", 887 pr_err("Memory failure: %#lx: non anonymous thp\n",
886 page_to_pfn(page)); 888 page_to_pfn(page));
887 return 0; 889 return 0;
888 } 890 }
@@ -892,7 +894,8 @@ int get_hwpoison_page(struct page *page)
892 if (head == compound_head(page)) 894 if (head == compound_head(page))
893 return 1; 895 return 1;
894 896
895 pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page)); 897 pr_info("Memory failure: %#lx cannot catch tail\n",
898 page_to_pfn(page));
896 put_page(head); 899 put_page(head);
897 } 900 }
898 901
@@ -931,12 +934,13 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
931 return SWAP_SUCCESS; 934 return SWAP_SUCCESS;
932 935
933 if (PageKsm(p)) { 936 if (PageKsm(p)) {
934 pr_err("MCE %#lx: can't handle KSM pages.\n", pfn); 937 pr_err("Memory failure: %#lx: can't handle KSM pages.\n", pfn);
935 return SWAP_FAIL; 938 return SWAP_FAIL;
936 } 939 }
937 940
938 if (PageSwapCache(p)) { 941 if (PageSwapCache(p)) {
939 pr_err("MCE %#lx: keeping poisoned page in swap cache\n", pfn); 942 pr_err("Memory failure: %#lx: keeping poisoned page in swap cache\n",
943 pfn);
940 ttu |= TTU_IGNORE_HWPOISON; 944 ttu |= TTU_IGNORE_HWPOISON;
941 } 945 }
942 946
@@ -954,7 +958,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
954 } else { 958 } else {
955 kill = 0; 959 kill = 0;
956 ttu |= TTU_IGNORE_HWPOISON; 960 ttu |= TTU_IGNORE_HWPOISON;
957 pr_info("MCE %#lx: corrupted page was clean: dropped without side effects\n", 961 pr_info("Memory failure: %#lx: corrupted page was clean: dropped without side effects\n",
958 pfn); 962 pfn);
959 } 963 }
960 } 964 }
@@ -972,7 +976,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
972 976
973 ret = try_to_unmap(hpage, ttu); 977 ret = try_to_unmap(hpage, ttu);
974 if (ret != SWAP_SUCCESS) 978 if (ret != SWAP_SUCCESS)
975 pr_err("MCE %#lx: failed to unmap page (mapcount=%d)\n", 979 pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n",
976 pfn, page_mapcount(hpage)); 980 pfn, page_mapcount(hpage));
977 981
978 /* 982 /*
@@ -1040,14 +1044,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1040 panic("Memory failure from trap %d on page %lx", trapno, pfn); 1044 panic("Memory failure from trap %d on page %lx", trapno, pfn);
1041 1045
1042 if (!pfn_valid(pfn)) { 1046 if (!pfn_valid(pfn)) {
1043 pr_err("MCE %#lx: memory outside kernel control\n", pfn); 1047 pr_err("Memory failure: %#lx: memory outside kernel control\n",
1048 pfn);
1044 return -ENXIO; 1049 return -ENXIO;
1045 } 1050 }
1046 1051
1047 p = pfn_to_page(pfn); 1052 p = pfn_to_page(pfn);
1048 orig_head = hpage = compound_head(p); 1053 orig_head = hpage = compound_head(p);
1049 if (TestSetPageHWPoison(p)) { 1054 if (TestSetPageHWPoison(p)) {
1050 pr_err("MCE %#lx: already hardware poisoned\n", pfn); 1055 pr_err("Memory failure: %#lx: already hardware poisoned\n",
1056 pfn);
1051 return 0; 1057 return 0;
1052 } 1058 }
1053 1059
@@ -1112,9 +1118,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1112 if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) { 1118 if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
1113 unlock_page(hpage); 1119 unlock_page(hpage);
1114 if (!PageAnon(hpage)) 1120 if (!PageAnon(hpage))
1115 pr_err("MCE: %#lx: non anonymous thp\n", pfn); 1121 pr_err("Memory failure: %#lx: non anonymous thp\n",
1122 pfn);
1116 else 1123 else
1117 pr_err("MCE: %#lx: thp split failed\n", pfn); 1124 pr_err("Memory failure: %#lx: thp split failed\n",
1125 pfn);
1118 if (TestClearPageHWPoison(p)) 1126 if (TestClearPageHWPoison(p))
1119 num_poisoned_pages_sub(nr_pages); 1127 num_poisoned_pages_sub(nr_pages);
1120 put_hwpoison_page(p); 1128 put_hwpoison_page(p);
@@ -1178,7 +1186,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1178 * unpoison always clear PG_hwpoison inside page lock 1186 * unpoison always clear PG_hwpoison inside page lock
1179 */ 1187 */
1180 if (!PageHWPoison(p)) { 1188 if (!PageHWPoison(p)) {
1181 pr_err("MCE %#lx: just unpoisoned\n", pfn); 1189 pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
1182 num_poisoned_pages_sub(nr_pages); 1190 num_poisoned_pages_sub(nr_pages);
1183 unlock_page(hpage); 1191 unlock_page(hpage);
1184 put_hwpoison_page(hpage); 1192 put_hwpoison_page(hpage);
@@ -1395,25 +1403,25 @@ int unpoison_memory(unsigned long pfn)
1395 page = compound_head(p); 1403 page = compound_head(p);
1396 1404
1397 if (!PageHWPoison(p)) { 1405 if (!PageHWPoison(p)) {
1398 unpoison_pr_info("MCE: Page was already unpoisoned %#lx\n", 1406 unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
1399 pfn, &unpoison_rs); 1407 pfn, &unpoison_rs);
1400 return 0; 1408 return 0;
1401 } 1409 }
1402 1410
1403 if (page_count(page) > 1) { 1411 if (page_count(page) > 1) {
1404 unpoison_pr_info("MCE: Someone grabs the hwpoison page %#lx\n", 1412 unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
1405 pfn, &unpoison_rs); 1413 pfn, &unpoison_rs);
1406 return 0; 1414 return 0;
1407 } 1415 }
1408 1416
1409 if (page_mapped(page)) { 1417 if (page_mapped(page)) {
1410 unpoison_pr_info("MCE: Someone maps the hwpoison page %#lx\n", 1418 unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
1411 pfn, &unpoison_rs); 1419 pfn, &unpoison_rs);
1412 return 0; 1420 return 0;
1413 } 1421 }
1414 1422
1415 if (page_mapping(page)) { 1423 if (page_mapping(page)) {
1416 unpoison_pr_info("MCE: the hwpoison page has non-NULL mapping %#lx\n", 1424 unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
1417 pfn, &unpoison_rs); 1425 pfn, &unpoison_rs);
1418 return 0; 1426 return 0;
1419 } 1427 }
@@ -1424,7 +1432,7 @@ int unpoison_memory(unsigned long pfn)
1424 * In such case, we yield to memory_failure() and make unpoison fail. 1432 * In such case, we yield to memory_failure() and make unpoison fail.
1425 */ 1433 */
1426 if (!PageHuge(page) && PageTransHuge(page)) { 1434 if (!PageHuge(page) && PageTransHuge(page)) {
1427 unpoison_pr_info("MCE: Memory failure is now running on %#lx\n", 1435 unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
1428 pfn, &unpoison_rs); 1436 pfn, &unpoison_rs);
1429 return 0; 1437 return 0;
1430 } 1438 }
@@ -1439,13 +1447,13 @@ int unpoison_memory(unsigned long pfn)
1439 * to the end. 1447 * to the end.
1440 */ 1448 */
1441 if (PageHuge(page)) { 1449 if (PageHuge(page)) {
1442 unpoison_pr_info("MCE: Memory failure is now running on free hugepage %#lx\n", 1450 unpoison_pr_info("Unpoison: Memory failure is now running on free hugepage %#lx\n",
1443 pfn, &unpoison_rs); 1451 pfn, &unpoison_rs);
1444 return 0; 1452 return 0;
1445 } 1453 }
1446 if (TestClearPageHWPoison(p)) 1454 if (TestClearPageHWPoison(p))
1447 num_poisoned_pages_dec(); 1455 num_poisoned_pages_dec();
1448 unpoison_pr_info("MCE: Software-unpoisoned free page %#lx\n", 1456 unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
1449 pfn, &unpoison_rs); 1457 pfn, &unpoison_rs);
1450 return 0; 1458 return 0;
1451 } 1459 }
@@ -1458,7 +1466,7 @@ int unpoison_memory(unsigned long pfn)
1458 * the free buddy page pool. 1466 * the free buddy page pool.
1459 */ 1467 */
1460 if (TestClearPageHWPoison(page)) { 1468 if (TestClearPageHWPoison(page)) {
1461 unpoison_pr_info("MCE: Software-unpoisoned page %#lx\n", 1469 unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
1462 pfn, &unpoison_rs); 1470 pfn, &unpoison_rs);
1463 num_poisoned_pages_sub(nr_pages); 1471 num_poisoned_pages_sub(nr_pages);
1464 freeit = 1; 1472 freeit = 1;