aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2010-09-07 21:19:39 -0400
committerAndi Kleen <ak@linux.intel.com>2010-10-08 03:32:45 -0400
commitd950b95882f3dc47e86f1496cd3f7fef540d6d6b (patch)
tree7eea4cc7dca413c29bbb2d935197bd2da352a505 /mm/memory-failure.c
parent8c6c2ecb44667f7204e9d2b89c4c1f42edc5a196 (diff)
HWPOISON, hugetlb: soft offlining for hugepage
This patch extends soft offlining framework to support hugepage. When memory corrected errors occur repeatedly on a hugepage, we can choose to stop using it by migrating data onto another hugepage and disabling the original (maybe half-broken) one. ChangeLog since v4: - branch soft_offline_page() for hugepage ChangeLog since v3: - remove comment about "ToDo: hugepage soft-offline" ChangeLog since v2: - move refcount handling into isolate_lru_page() ChangeLog since v1: - add double check in isolating hwpoisoned hugepage - define free/non-free checker for hugepage - postpone calling put_page() for hugepage in soft_offline_page() Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andi Kleen <ak@linux.intel.com>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c59
1 files changed, 55 insertions, 4 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 333f87da1845..74eb425010af 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -693,8 +693,6 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
693 * Issues: 693 * Issues:
694 * - Error on hugepage is contained in hugepage unit (not in raw page unit.) 694 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
695 * To narrow down kill region to one page, we need to break up pmd. 695 * To narrow down kill region to one page, we need to break up pmd.
696 * - To support soft-offlining for hugepage, we need to support hugepage
697 * migration.
698 */ 696 */
699static int me_huge_page(struct page *p, unsigned long pfn) 697static int me_huge_page(struct page *p, unsigned long pfn)
700{ 698{
@@ -1220,7 +1218,11 @@ EXPORT_SYMBOL(unpoison_memory);
1220static struct page *new_page(struct page *p, unsigned long private, int **x) 1218static struct page *new_page(struct page *p, unsigned long private, int **x)
1221{ 1219{
1222 int nid = page_to_nid(p); 1220 int nid = page_to_nid(p);
1223 return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); 1221 if (PageHuge(p))
1222 return alloc_huge_page_node(page_hstate(compound_head(p)),
1223 nid);
1224 else
1225 return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
1224} 1226}
1225 1227
1226/* 1228/*
@@ -1248,8 +1250,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1248 * was free. 1250 * was free.
1249 */ 1251 */
1250 set_migratetype_isolate(p); 1252 set_migratetype_isolate(p);
1253 /*
1254 * When the target page is a free hugepage, just remove it
1255 * from free hugepage list.
1256 */
1251 if (!get_page_unless_zero(compound_head(p))) { 1257 if (!get_page_unless_zero(compound_head(p))) {
1252 if (is_free_buddy_page(p)) { 1258 if (PageHuge(p)) {
1259 pr_debug("get_any_page: %#lx free huge page\n", pfn);
1260 ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1261 } else if (is_free_buddy_page(p)) {
1253 pr_debug("get_any_page: %#lx free buddy page\n", pfn); 1262 pr_debug("get_any_page: %#lx free buddy page\n", pfn);
1254 /* Set hwpoison bit while page is still isolated */ 1263 /* Set hwpoison bit while page is still isolated */
1255 SetPageHWPoison(p); 1264 SetPageHWPoison(p);
@@ -1268,6 +1277,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1268 return ret; 1277 return ret;
1269} 1278}
1270 1279
1280static int soft_offline_huge_page(struct page *page, int flags)
1281{
1282 int ret;
1283 unsigned long pfn = page_to_pfn(page);
1284 struct page *hpage = compound_head(page);
1285 LIST_HEAD(pagelist);
1286
1287 ret = get_any_page(page, pfn, flags);
1288 if (ret < 0)
1289 return ret;
1290 if (ret == 0)
1291 goto done;
1292
1293 if (PageHWPoison(hpage)) {
1294 put_page(hpage);
1295 pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn);
1296 return -EBUSY;
1297 }
1298
1299 /* Keep page count to indicate a given hugepage is isolated. */
1300
1301 list_add(&hpage->lru, &pagelist);
1302 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
1303 if (ret) {
1304 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1305 pfn, ret, page->flags);
1306 if (ret > 0)
1307 ret = -EIO;
1308 return ret;
1309 }
1310done:
1311 if (!PageHWPoison(hpage))
1312 atomic_long_add(1 << compound_order(hpage), &mce_bad_pages);
1313 set_page_hwpoison_huge_page(hpage);
1314 dequeue_hwpoisoned_huge_page(hpage);
1315 /* keep elevated page count for bad page */
1316 return ret;
1317}
1318
1271/** 1319/**
1272 * soft_offline_page - Soft offline a page. 1320 * soft_offline_page - Soft offline a page.
1273 * @page: page to offline 1321 * @page: page to offline
@@ -1295,6 +1343,9 @@ int soft_offline_page(struct page *page, int flags)
1295 int ret; 1343 int ret;
1296 unsigned long pfn = page_to_pfn(page); 1344 unsigned long pfn = page_to_pfn(page);
1297 1345
1346 if (PageHuge(page))
1347 return soft_offline_huge_page(page, flags);
1348
1298 ret = get_any_page(page, pfn, flags); 1349 ret = get_any_page(page, pfn, flags);
1299 if (ret < 0) 1350 if (ret < 0)
1300 return ret; 1351 return ret;