diff options
author | Xishi Qiu <qiuxishi@huawei.com> | 2013-02-22 19:33:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:15 -0500 |
commit | 0ebff32c3637e0ed551c017eb9599ac108ab36aa (patch) | |
tree | c6c85f235dac0a0f9f00b7f54f271ddb0a666f4b | |
parent | 194159fbcc0d6ac1351837d3cd7a27a4af0219a6 (diff) |
memory-failure: fix an error of mce_bad_pages statistics
When doing
$ echo paddr > /sys/devices/system/memory/soft_offline_page
to offline a *free* page, the value of mce_bad_pages will be added, and
the page is set HWPoison flag, but it is still managed by page buddy
alocator.
$ cat /proc/meminfo | grep HardwareCorrupted
shows the value.
If we offline the same page, the value of mce_bad_pages will be added
*again*, this means the value is incorrect now. Assume the page is
still free during this short time.
soft_offline_page()
get_any_page()
"else if (is_free_buddy_page(p))" branch return 0
"goto done";
"atomic_long_add(1, &mce_bad_pages);"
This patch:
Move poisoned page check at the beginning of the function in order to
fix the error.
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Tested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/memory-failure.c | 38 |
1 files changed, 17 insertions, 21 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c6e4dd3e1c08..1a9242c53315 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -1419,18 +1419,17 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1419 | unsigned long pfn = page_to_pfn(page); | 1419 | unsigned long pfn = page_to_pfn(page); |
1420 | struct page *hpage = compound_head(page); | 1420 | struct page *hpage = compound_head(page); |
1421 | 1421 | ||
1422 | if (PageHWPoison(hpage)) { | ||
1423 | pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); | ||
1424 | return -EBUSY; | ||
1425 | } | ||
1426 | |||
1422 | ret = get_any_page(page, pfn, flags); | 1427 | ret = get_any_page(page, pfn, flags); |
1423 | if (ret < 0) | 1428 | if (ret < 0) |
1424 | return ret; | 1429 | return ret; |
1425 | if (ret == 0) | 1430 | if (ret == 0) |
1426 | goto done; | 1431 | goto done; |
1427 | 1432 | ||
1428 | if (PageHWPoison(hpage)) { | ||
1429 | put_page(hpage); | ||
1430 | pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); | ||
1431 | return -EBUSY; | ||
1432 | } | ||
1433 | |||
1434 | /* Keep page count to indicate a given hugepage is isolated. */ | 1433 | /* Keep page count to indicate a given hugepage is isolated. */ |
1435 | ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, | 1434 | ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, |
1436 | MIGRATE_SYNC); | 1435 | MIGRATE_SYNC); |
@@ -1441,12 +1440,11 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1441 | return ret; | 1440 | return ret; |
1442 | } | 1441 | } |
1443 | done: | 1442 | done: |
1444 | if (!PageHWPoison(hpage)) | 1443 | /* keep elevated page count for bad page */ |
1445 | atomic_long_add(1 << compound_trans_order(hpage), | 1444 | atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages); |
1446 | &mce_bad_pages); | ||
1447 | set_page_hwpoison_huge_page(hpage); | 1445 | set_page_hwpoison_huge_page(hpage); |
1448 | dequeue_hwpoisoned_huge_page(hpage); | 1446 | dequeue_hwpoisoned_huge_page(hpage); |
1449 | /* keep elevated page count for bad page */ | 1447 | |
1450 | return ret; | 1448 | return ret; |
1451 | } | 1449 | } |
1452 | 1450 | ||
@@ -1488,6 +1486,11 @@ int soft_offline_page(struct page *page, int flags) | |||
1488 | } | 1486 | } |
1489 | } | 1487 | } |
1490 | 1488 | ||
1489 | if (PageHWPoison(page)) { | ||
1490 | pr_info("soft offline: %#lx page already poisoned\n", pfn); | ||
1491 | return -EBUSY; | ||
1492 | } | ||
1493 | |||
1491 | ret = get_any_page(page, pfn, flags); | 1494 | ret = get_any_page(page, pfn, flags); |
1492 | if (ret < 0) | 1495 | if (ret < 0) |
1493 | return ret; | 1496 | return ret; |
@@ -1519,19 +1522,11 @@ int soft_offline_page(struct page *page, int flags) | |||
1519 | return -EIO; | 1522 | return -EIO; |
1520 | } | 1523 | } |
1521 | 1524 | ||
1522 | lock_page(page); | ||
1523 | wait_on_page_writeback(page); | ||
1524 | |||
1525 | /* | 1525 | /* |
1526 | * Synchronized using the page lock with memory_failure() | 1526 | * Synchronized using the page lock with memory_failure() |
1527 | */ | 1527 | */ |
1528 | if (PageHWPoison(page)) { | 1528 | lock_page(page); |
1529 | unlock_page(page); | 1529 | wait_on_page_writeback(page); |
1530 | put_page(page); | ||
1531 | pr_info("soft offline: %#lx page already poisoned\n", pfn); | ||
1532 | return -EBUSY; | ||
1533 | } | ||
1534 | |||
1535 | /* | 1530 | /* |
1536 | * Try to invalidate first. This should work for | 1531 | * Try to invalidate first. This should work for |
1537 | * non dirty unmapped page cache pages. | 1532 | * non dirty unmapped page cache pages. |
@@ -1583,8 +1578,9 @@ int soft_offline_page(struct page *page, int flags) | |||
1583 | return ret; | 1578 | return ret; |
1584 | 1579 | ||
1585 | done: | 1580 | done: |
1581 | /* keep elevated page count for bad page */ | ||
1586 | atomic_long_add(1, &mce_bad_pages); | 1582 | atomic_long_add(1, &mce_bad_pages); |
1587 | SetPageHWPoison(page); | 1583 | SetPageHWPoison(page); |
1588 | /* keep elevated page count for bad page */ | 1584 | |
1589 | return ret; | 1585 | return ret; |
1590 | } | 1586 | } |