aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXishi Qiu <qiuxishi@huawei.com>2013-02-22 19:33:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:15 -0500
commit0ebff32c3637e0ed551c017eb9599ac108ab36aa (patch)
treec6c85f235dac0a0f9f00b7f54f271ddb0a666f4b
parent194159fbcc0d6ac1351837d3cd7a27a4af0219a6 (diff)
memory-failure: fix an error of mce_bad_pages statistics
When doing $ echo paddr > /sys/devices/system/memory/soft_offline_page to offline a *free* page, the value of mce_bad_pages will be added, and the page is set HWPoison flag, but it is still managed by page buddy alocator. $ cat /proc/meminfo | grep HardwareCorrupted shows the value. If we offline the same page, the value of mce_bad_pages will be added *again*, this means the value is incorrect now. Assume the page is still free during this short time. soft_offline_page() get_any_page() "else if (is_free_buddy_page(p))" branch return 0 "goto done"; "atomic_long_add(1, &mce_bad_pages);" This patch: Move poisoned page check at the beginning of the function in order to fix the error. Signed-off-by: Xishi Qiu <qiuxishi@huawei.com> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory-failure.c38
1 files changed, 17 insertions, 21 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c6e4dd3e1c08..1a9242c53315 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1419,18 +1419,17 @@ static int soft_offline_huge_page(struct page *page, int flags)
1419 unsigned long pfn = page_to_pfn(page); 1419 unsigned long pfn = page_to_pfn(page);
1420 struct page *hpage = compound_head(page); 1420 struct page *hpage = compound_head(page);
1421 1421
1422 if (PageHWPoison(hpage)) {
1423 pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
1424 return -EBUSY;
1425 }
1426
1422 ret = get_any_page(page, pfn, flags); 1427 ret = get_any_page(page, pfn, flags);
1423 if (ret < 0) 1428 if (ret < 0)
1424 return ret; 1429 return ret;
1425 if (ret == 0) 1430 if (ret == 0)
1426 goto done; 1431 goto done;
1427 1432
1428 if (PageHWPoison(hpage)) {
1429 put_page(hpage);
1430 pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
1431 return -EBUSY;
1432 }
1433
1434 /* Keep page count to indicate a given hugepage is isolated. */ 1433 /* Keep page count to indicate a given hugepage is isolated. */
1435 ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, 1434 ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false,
1436 MIGRATE_SYNC); 1435 MIGRATE_SYNC);
@@ -1441,12 +1440,11 @@ static int soft_offline_huge_page(struct page *page, int flags)
1441 return ret; 1440 return ret;
1442 } 1441 }
1443done: 1442done:
1444 if (!PageHWPoison(hpage)) 1443 /* keep elevated page count for bad page */
1445 atomic_long_add(1 << compound_trans_order(hpage), 1444 atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages);
1446 &mce_bad_pages);
1447 set_page_hwpoison_huge_page(hpage); 1445 set_page_hwpoison_huge_page(hpage);
1448 dequeue_hwpoisoned_huge_page(hpage); 1446 dequeue_hwpoisoned_huge_page(hpage);
1449 /* keep elevated page count for bad page */ 1447
1450 return ret; 1448 return ret;
1451} 1449}
1452 1450
@@ -1488,6 +1486,11 @@ int soft_offline_page(struct page *page, int flags)
1488 } 1486 }
1489 } 1487 }
1490 1488
1489 if (PageHWPoison(page)) {
1490 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1491 return -EBUSY;
1492 }
1493
1491 ret = get_any_page(page, pfn, flags); 1494 ret = get_any_page(page, pfn, flags);
1492 if (ret < 0) 1495 if (ret < 0)
1493 return ret; 1496 return ret;
@@ -1519,19 +1522,11 @@ int soft_offline_page(struct page *page, int flags)
1519 return -EIO; 1522 return -EIO;
1520 } 1523 }
1521 1524
1522 lock_page(page);
1523 wait_on_page_writeback(page);
1524
1525 /* 1525 /*
1526 * Synchronized using the page lock with memory_failure() 1526 * Synchronized using the page lock with memory_failure()
1527 */ 1527 */
1528 if (PageHWPoison(page)) { 1528 lock_page(page);
1529 unlock_page(page); 1529 wait_on_page_writeback(page);
1530 put_page(page);
1531 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1532 return -EBUSY;
1533 }
1534
1535 /* 1530 /*
1536 * Try to invalidate first. This should work for 1531 * Try to invalidate first. This should work for
1537 * non dirty unmapped page cache pages. 1532 * non dirty unmapped page cache pages.
@@ -1583,8 +1578,9 @@ int soft_offline_page(struct page *page, int flags)
1583 return ret; 1578 return ret;
1584 1579
1585done: 1580done:
1581 /* keep elevated page count for bad page */
1586 atomic_long_add(1, &mce_bad_pages); 1582 atomic_long_add(1, &mce_bad_pages);
1587 SetPageHWPoison(page); 1583 SetPageHWPoison(page);
1588 /* keep elevated page count for bad page */ 1584
1589 return ret; 1585 return ret;
1590} 1586}