aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c102
1 files changed, 93 insertions, 9 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2044fe8920c2..44a8cefeae6e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
697 * Issues: 697 * Issues:
698 * - Error on hugepage is contained in hugepage unit (not in raw page unit.) 698 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
699 * To narrow down kill region to one page, we need to break up pmd. 699 * To narrow down kill region to one page, we need to break up pmd.
700 * - To support soft-offlining for hugepage, we need to support hugepage
701 * migration.
702 */ 700 */
703static int me_huge_page(struct page *p, unsigned long pfn) 701static int me_huge_page(struct page *p, unsigned long pfn)
704{ 702{
703 int res = 0;
705 struct page *hpage = compound_head(p); 704 struct page *hpage = compound_head(p);
706 /* 705 /*
707 * We can safely recover from error on free or reserved (i.e. 706 * We can safely recover from error on free or reserved (i.e.
@@ -714,8 +713,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)
714 * so there is no race between isolation and mapping/unmapping. 713 * so there is no race between isolation and mapping/unmapping.
715 */ 714 */
716 if (!(page_mapping(hpage) || PageAnon(hpage))) { 715 if (!(page_mapping(hpage) || PageAnon(hpage))) {
717 __isolate_hwpoisoned_huge_page(hpage); 716 res = dequeue_hwpoisoned_huge_page(hpage);
718 return RECOVERED; 717 if (!res)
718 return RECOVERED;
719 } 719 }
720 return DELAYED; 720 return DELAYED;
721} 721}
@@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
972 * We need/can do nothing about count=0 pages. 972 * We need/can do nothing about count=0 pages.
973 * 1) it's a free page, and therefore in safe hand: 973 * 1) it's a free page, and therefore in safe hand:
974 * prep_new_page() will be the gate keeper. 974 * prep_new_page() will be the gate keeper.
975 * 2) it's part of a non-compound high order page. 975 * 2) it's a free hugepage, which is also safe:
976 * an affected hugepage will be dequeued from hugepage freelist,
977 * so there's no concern about reusing it ever after.
978 * 3) it's part of a non-compound high order page.
976 * Implies some kernel user: cannot stop them from 979 * Implies some kernel user: cannot stop them from
977 * R/W the page; let's pray that the page has been 980 * R/W the page; let's pray that the page has been
978 * used and will be freed some time later. 981 * used and will be freed some time later.
@@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
984 if (is_free_buddy_page(p)) { 987 if (is_free_buddy_page(p)) {
985 action_result(pfn, "free buddy", DELAYED); 988 action_result(pfn, "free buddy", DELAYED);
986 return 0; 989 return 0;
990 } else if (PageHuge(hpage)) {
991 /*
992 * Check "just unpoisoned", "filter hit", and
993 * "race with other subpage."
994 */
995 lock_page_nosync(hpage);
996 if (!PageHWPoison(hpage)
997 || (hwpoison_filter(p) && TestClearPageHWPoison(p))
998 || (p != hpage && TestSetPageHWPoison(hpage))) {
999 atomic_long_sub(nr_pages, &mce_bad_pages);
1000 return 0;
1001 }
1002 set_page_hwpoison_huge_page(hpage);
1003 res = dequeue_hwpoisoned_huge_page(hpage);
1004 action_result(pfn, "free huge",
1005 res ? IGNORED : DELAYED);
1006 unlock_page(hpage);
1007 return res;
987 } else { 1008 } else {
988 action_result(pfn, "high order kernel", IGNORED); 1009 action_result(pfn, "high order kernel", IGNORED);
989 return -EBUSY; 1010 return -EBUSY;
@@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn)
1145 nr_pages = 1 << compound_order(page); 1166 nr_pages = 1 << compound_order(page);
1146 1167
1147 if (!get_page_unless_zero(page)) { 1168 if (!get_page_unless_zero(page)) {
1169 /*
1170 * Since HWPoisoned hugepage should have non-zero refcount,
1171 * race between memory failure and unpoison seems to happen.
1172 * In such case unpoison fails and memory failure runs
1173 * to the end.
1174 */
1175 if (PageHuge(page)) {
1176 pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
1177 return 0;
1178 }
1148 if (TestClearPageHWPoison(p)) 1179 if (TestClearPageHWPoison(p))
1149 atomic_long_sub(nr_pages, &mce_bad_pages); 1180 atomic_long_sub(nr_pages, &mce_bad_pages);
1150 pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); 1181 pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
@@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn)
1162 pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); 1193 pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
1163 atomic_long_sub(nr_pages, &mce_bad_pages); 1194 atomic_long_sub(nr_pages, &mce_bad_pages);
1164 freeit = 1; 1195 freeit = 1;
1196 if (PageHuge(page))
1197 clear_page_hwpoison_huge_page(page);
1165 } 1198 }
1166 if (PageHuge(p))
1167 clear_page_hwpoison_huge_page(page);
1168 unlock_page(page); 1199 unlock_page(page);
1169 1200
1170 put_page(page); 1201 put_page(page);
@@ -1178,7 +1209,11 @@ EXPORT_SYMBOL(unpoison_memory);
1178static struct page *new_page(struct page *p, unsigned long private, int **x) 1209static struct page *new_page(struct page *p, unsigned long private, int **x)
1179{ 1210{
1180 int nid = page_to_nid(p); 1211 int nid = page_to_nid(p);
1181 return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); 1212 if (PageHuge(p))
1213 return alloc_huge_page_node(page_hstate(compound_head(p)),
1214 nid);
1215 else
1216 return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
1182} 1217}
1183 1218
1184/* 1219/*
@@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1206 * was free. 1241 * was free.
1207 */ 1242 */
1208 set_migratetype_isolate(p); 1243 set_migratetype_isolate(p);
1244 /*
1245 * When the target page is a free hugepage, just remove it
1246 * from free hugepage list.
1247 */
1209 if (!get_page_unless_zero(compound_head(p))) { 1248 if (!get_page_unless_zero(compound_head(p))) {
1210 if (is_free_buddy_page(p)) { 1249 if (PageHuge(p)) {
1250 pr_info("get_any_page: %#lx free huge page\n", pfn);
1251 ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1252 } else if (is_free_buddy_page(p)) {
1211 pr_info("get_any_page: %#lx free buddy page\n", pfn); 1253 pr_info("get_any_page: %#lx free buddy page\n", pfn);
1212 /* Set hwpoison bit while page is still isolated */ 1254 /* Set hwpoison bit while page is still isolated */
1213 SetPageHWPoison(p); 1255 SetPageHWPoison(p);
@@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1226 return ret; 1268 return ret;
1227} 1269}
1228 1270
1271static int soft_offline_huge_page(struct page *page, int flags)
1272{
1273 int ret;
1274 unsigned long pfn = page_to_pfn(page);
1275 struct page *hpage = compound_head(page);
1276 LIST_HEAD(pagelist);
1277
1278 ret = get_any_page(page, pfn, flags);
1279 if (ret < 0)
1280 return ret;
1281 if (ret == 0)
1282 goto done;
1283
1284 if (PageHWPoison(hpage)) {
1285 put_page(hpage);
1286 pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn);
1287 return -EBUSY;
1288 }
1289
1290 /* Keep page count to indicate a given hugepage is isolated. */
1291
1292 list_add(&hpage->lru, &pagelist);
1293 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
1294 if (ret) {
1295 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1296 pfn, ret, page->flags);
1297 if (ret > 0)
1298 ret = -EIO;
1299 return ret;
1300 }
1301done:
1302 if (!PageHWPoison(hpage))
1303 atomic_long_add(1 << compound_order(hpage), &mce_bad_pages);
1304 set_page_hwpoison_huge_page(hpage);
1305 dequeue_hwpoisoned_huge_page(hpage);
1306 /* keep elevated page count for bad page */
1307 return ret;
1308}
1309
1229/** 1310/**
1230 * soft_offline_page - Soft offline a page. 1311 * soft_offline_page - Soft offline a page.
1231 * @page: page to offline 1312 * @page: page to offline
@@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags)
1253 int ret; 1334 int ret;
1254 unsigned long pfn = page_to_pfn(page); 1335 unsigned long pfn = page_to_pfn(page);
1255 1336
1337 if (PageHuge(page))
1338 return soft_offline_huge_page(page, flags);
1339
1256 ret = get_any_page(page, pfn, flags); 1340 ret = get_any_page(page, pfn, flags);
1257 if (ret < 0) 1341 if (ret < 0)
1258 return ret; 1342 return ret;