aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2015-06-24 19:56:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 20:49:42 -0400
commitead07f6a867b5b1b41cf703735e8b39094987a7d (patch)
treeb97cbaa8a66f26ba26b3fb0b69af26ea13bec639 /mm/memory-failure.c
parent415c64c1453aa2bbcc7e30a38f8894d0894cb8ab (diff)
mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling
memory_failure() can run in 2 different mode (specified by MF_COUNT_INCREASED) in page refcount perspective. When MF_COUNT_INCREASED is set, memory_failure() assumes that the caller takes a refcount of the target page. And if cleared, memory_failure() takes it in it's own. In current code, however, refcounting is done differently in each caller. For example, madvise_hwpoison() uses get_user_pages_fast() and hwpoison_inject() uses get_page_unless_zero(). So this inconsistent refcounting causes refcount failure especially for thp tail pages. Typical user visible effects are like memory leak or VM_BUG_ON_PAGE(!page_count(page)) in isolate_lru_page(). To fix this refcounting issue, this patch introduces get_hwpoison_page() to handle thp tail pages in the same manner for each caller of hwpoison code. memory_failure() might fail to split thp and in such case it returns without completing page isolation. This is not good because PageHWPoison on the thp is still set and there's no easy way to unpoison such thps. So this patch try to roll back any action to the thp in "non anonymous thp" case and "thp split failed" case, expecting an MCE(SRAR) generated by later access afterward will properly free such thps. [akpm@linux-foundation.org: fix CONFIG_HWPOISON_INJECT=m] Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Tony Luck <tony.luck@intel.com> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c50
1 files changed, 45 insertions, 5 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 17a8e3bc3b01..a810ab1519f0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -915,6 +915,39 @@ static int page_action(struct page_state *ps, struct page *p,
915 return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY; 915 return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY;
916} 916}
917 917
918/**
919 * get_hwpoison_page() - Get refcount for memory error handling:
920 * @page: raw error page (hit by memory error)
921 *
922 * Return: return 0 if failed to grab the refcount, otherwise true (some
923 * non-zero value.)
924 */
925int get_hwpoison_page(struct page *page)
926{
927 struct page *head = compound_head(page);
928
929 if (PageHuge(head))
930 return get_page_unless_zero(head);
931
932 /*
933 * Thp tail page has special refcounting rule (refcount of tail pages
934 * is stored in ->_mapcount,) so we can't call get_page_unless_zero()
935 * directly for tail pages.
936 */
937 if (PageTransHuge(head)) {
938 if (get_page_unless_zero(head)) {
939 if (PageTail(page))
940 get_page(page);
941 return 1;
942 } else {
943 return 0;
944 }
945 }
946
947 return get_page_unless_zero(page);
948}
949EXPORT_SYMBOL_GPL(get_hwpoison_page);
950
918/* 951/*
919 * Do all that is necessary to remove user space mappings. Unmap 952 * Do all that is necessary to remove user space mappings. Unmap
920 * the pages and send SIGBUS to the processes if the data was dirty. 953 * the pages and send SIGBUS to the processes if the data was dirty.
@@ -1097,8 +1130,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1097 * In fact it's dangerous to directly bump up page count from 0, 1130 * In fact it's dangerous to directly bump up page count from 0,
1098 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch. 1131 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
1099 */ 1132 */
1100 if (!(flags & MF_COUNT_INCREASED) && 1133 if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
1101 !get_page_unless_zero(hpage)) {
1102 if (is_free_buddy_page(p)) { 1134 if (is_free_buddy_page(p)) {
1103 action_result(pfn, MSG_BUDDY, DELAYED); 1135 action_result(pfn, MSG_BUDDY, DELAYED);
1104 return 0; 1136 return 0;
@@ -1130,12 +1162,20 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1130 if (!PageHuge(p) && PageTransHuge(hpage)) { 1162 if (!PageHuge(p) && PageTransHuge(hpage)) {
1131 if (!PageAnon(hpage)) { 1163 if (!PageAnon(hpage)) {
1132 pr_err("MCE: %#lx: non anonymous thp\n", pfn); 1164 pr_err("MCE: %#lx: non anonymous thp\n", pfn);
1165 if (TestClearPageHWPoison(p))
1166 atomic_long_sub(nr_pages, &num_poisoned_pages);
1133 put_page(p); 1167 put_page(p);
1168 if (p != hpage)
1169 put_page(hpage);
1134 return -EBUSY; 1170 return -EBUSY;
1135 } 1171 }
1136 if (unlikely(split_huge_page(hpage))) { 1172 if (unlikely(split_huge_page(hpage))) {
1137 pr_err("MCE: %#lx: thp split failed\n", pfn); 1173 pr_err("MCE: %#lx: thp split failed\n", pfn);
1174 if (TestClearPageHWPoison(p))
1175 atomic_long_sub(nr_pages, &num_poisoned_pages);
1138 put_page(p); 1176 put_page(p);
1177 if (p != hpage)
1178 put_page(hpage);
1139 return -EBUSY; 1179 return -EBUSY;
1140 } 1180 }
1141 VM_BUG_ON_PAGE(!page_count(p), p); 1181 VM_BUG_ON_PAGE(!page_count(p), p);
@@ -1413,12 +1453,12 @@ int unpoison_memory(unsigned long pfn)
1413 */ 1453 */
1414 if (!PageHuge(page) && PageTransHuge(page)) { 1454 if (!PageHuge(page) && PageTransHuge(page)) {
1415 pr_info("MCE: Memory failure is now running on %#lx\n", pfn); 1455 pr_info("MCE: Memory failure is now running on %#lx\n", pfn);
1416 return 0; 1456 return 0;
1417 } 1457 }
1418 1458
1419 nr_pages = 1 << compound_order(page); 1459 nr_pages = 1 << compound_order(page);
1420 1460
1421 if (!get_page_unless_zero(page)) { 1461 if (!get_hwpoison_page(p)) {
1422 /* 1462 /*
1423 * Since HWPoisoned hugepage should have non-zero refcount, 1463 * Since HWPoisoned hugepage should have non-zero refcount,
1424 * race between memory failure and unpoison seems to happen. 1464 * race between memory failure and unpoison seems to happen.
@@ -1486,7 +1526,7 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
1486 * When the target page is a free hugepage, just remove it 1526 * When the target page is a free hugepage, just remove it
1487 * from free hugepage list. 1527 * from free hugepage list.
1488 */ 1528 */
1489 if (!get_page_unless_zero(compound_head(p))) { 1529 if (!get_hwpoison_page(p)) {
1490 if (PageHuge(p)) { 1530 if (PageHuge(p)) {
1491 pr_info("%s: %#lx free huge page\n", __func__, pfn); 1531 pr_info("%s: %#lx free huge page\n", __func__, pfn);
1492 ret = 0; 1532 ret = 0;