diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2010-09-07 21:19:39 -0400 |
---|---|---|
committer | Andi Kleen <ak@linux.intel.com> | 2010-10-08 03:32:45 -0400 |
commit | d950b95882f3dc47e86f1496cd3f7fef540d6d6b (patch) | |
tree | 7eea4cc7dca413c29bbb2d935197bd2da352a505 /mm/memory-failure.c | |
parent | 8c6c2ecb44667f7204e9d2b89c4c1f42edc5a196 (diff) |
HWPOISON, hugetlb: soft offlining for hugepage
This patch extends soft offlining framework to support hugepage.
When memory corrected errors occur repeatedly on a hugepage,
we can choose to stop using it by migrating data onto another hugepage
and disabling the original (maybe half-broken) one.
ChangeLog since v4:
- branch soft_offline_page() for hugepage
ChangeLog since v3:
- remove comment about "ToDo: hugepage soft-offline"
ChangeLog since v2:
- move refcount handling into isolate_lru_page()
ChangeLog since v1:
- add double check in isolating hwpoisoned hugepage
- define free/non-free checker for hugepage
- postpone calling put_page() for hugepage in soft_offline_page()
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 59 |
1 files changed, 55 insertions, 4 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 333f87da1845..74eb425010af 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -693,8 +693,6 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) | |||
693 | * Issues: | 693 | * Issues: |
694 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) | 694 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) |
695 | * To narrow down kill region to one page, we need to break up pmd. | 695 | * To narrow down kill region to one page, we need to break up pmd. |
696 | * - To support soft-offlining for hugepage, we need to support hugepage | ||
697 | * migration. | ||
698 | */ | 696 | */ |
699 | static int me_huge_page(struct page *p, unsigned long pfn) | 697 | static int me_huge_page(struct page *p, unsigned long pfn) |
700 | { | 698 | { |
@@ -1220,7 +1218,11 @@ EXPORT_SYMBOL(unpoison_memory); | |||
1220 | static struct page *new_page(struct page *p, unsigned long private, int **x) | 1218 | static struct page *new_page(struct page *p, unsigned long private, int **x) |
1221 | { | 1219 | { |
1222 | int nid = page_to_nid(p); | 1220 | int nid = page_to_nid(p); |
1223 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | 1221 | if (PageHuge(p)) |
1222 | return alloc_huge_page_node(page_hstate(compound_head(p)), | ||
1223 | nid); | ||
1224 | else | ||
1225 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | ||
1224 | } | 1226 | } |
1225 | 1227 | ||
1226 | /* | 1228 | /* |
@@ -1248,8 +1250,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1248 | * was free. | 1250 | * was free. |
1249 | */ | 1251 | */ |
1250 | set_migratetype_isolate(p); | 1252 | set_migratetype_isolate(p); |
1253 | /* | ||
1254 | * When the target page is a free hugepage, just remove it | ||
1255 | * from free hugepage list. | ||
1256 | */ | ||
1251 | if (!get_page_unless_zero(compound_head(p))) { | 1257 | if (!get_page_unless_zero(compound_head(p))) { |
1252 | if (is_free_buddy_page(p)) { | 1258 | if (PageHuge(p)) { |
1259 | pr_debug("get_any_page: %#lx free huge page\n", pfn); | ||
1260 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); | ||
1261 | } else if (is_free_buddy_page(p)) { | ||
1253 | pr_debug("get_any_page: %#lx free buddy page\n", pfn); | 1262 | pr_debug("get_any_page: %#lx free buddy page\n", pfn); |
1254 | /* Set hwpoison bit while page is still isolated */ | 1263 | /* Set hwpoison bit while page is still isolated */ |
1255 | SetPageHWPoison(p); | 1264 | SetPageHWPoison(p); |
@@ -1268,6 +1277,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1268 | return ret; | 1277 | return ret; |
1269 | } | 1278 | } |
1270 | 1279 | ||
1280 | static int soft_offline_huge_page(struct page *page, int flags) | ||
1281 | { | ||
1282 | int ret; | ||
1283 | unsigned long pfn = page_to_pfn(page); | ||
1284 | struct page *hpage = compound_head(page); | ||
1285 | LIST_HEAD(pagelist); | ||
1286 | |||
1287 | ret = get_any_page(page, pfn, flags); | ||
1288 | if (ret < 0) | ||
1289 | return ret; | ||
1290 | if (ret == 0) | ||
1291 | goto done; | ||
1292 | |||
1293 | if (PageHWPoison(hpage)) { | ||
1294 | put_page(hpage); | ||
1295 | pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn); | ||
1296 | return -EBUSY; | ||
1297 | } | ||
1298 | |||
1299 | /* Keep page count to indicate a given hugepage is isolated. */ | ||
1300 | |||
1301 | list_add(&hpage->lru, &pagelist); | ||
1302 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | ||
1303 | if (ret) { | ||
1304 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | ||
1305 | pfn, ret, page->flags); | ||
1306 | if (ret > 0) | ||
1307 | ret = -EIO; | ||
1308 | return ret; | ||
1309 | } | ||
1310 | done: | ||
1311 | if (!PageHWPoison(hpage)) | ||
1312 | atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); | ||
1313 | set_page_hwpoison_huge_page(hpage); | ||
1314 | dequeue_hwpoisoned_huge_page(hpage); | ||
1315 | /* keep elevated page count for bad page */ | ||
1316 | return ret; | ||
1317 | } | ||
1318 | |||
1271 | /** | 1319 | /** |
1272 | * soft_offline_page - Soft offline a page. | 1320 | * soft_offline_page - Soft offline a page. |
1273 | * @page: page to offline | 1321 | * @page: page to offline |
@@ -1295,6 +1343,9 @@ int soft_offline_page(struct page *page, int flags) | |||
1295 | int ret; | 1343 | int ret; |
1296 | unsigned long pfn = page_to_pfn(page); | 1344 | unsigned long pfn = page_to_pfn(page); |
1297 | 1345 | ||
1346 | if (PageHuge(page)) | ||
1347 | return soft_offline_huge_page(page, flags); | ||
1348 | |||
1298 | ret = get_any_page(page, pfn, flags); | 1349 | ret = get_any_page(page, pfn, flags); |
1299 | if (ret < 0) | 1350 | if (ret < 0) |
1300 | return ret; | 1351 | return ret; |