aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2013-02-22 19:34:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:15 -0500
commitaf8fae7c08862bb85c5cf445bf9b36314b82111f (patch)
treed4a2a244e57a04861f1db701831fe579e0aa1b76 /mm/memory-failure.c
parent293c07e31ab5a0b8df8c19b2a9e5c6fa30308849 (diff)
mm/memory-failure.c: clean up soft_offline_page()
Currently soft_offline_page() is hard to maintain because it has many return points and goto statements. All of this mess come from get_any_page(). This function should only get page refcount as the name implies, but it does some page isolating actions like SetPageHWPoison() and dequeuing hugepage. This patch corrects it and introduces some internal subroutines to make soft offlining code more readable and maintainable. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reviewed-by: Andi Kleen <andi@firstfloor.org> Cc: Tony Luck <tony.luck@intel.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Jiang Liu <jiang.liu@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c156
1 files changed, 87 insertions, 69 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c95e19af510b..9cab165fd668 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1368,7 +1368,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
1368 * that is not free, and 1 for any other page type. 1368 * that is not free, and 1 for any other page type.
1369 * For 1 the page is returned with increased page count, otherwise not. 1369 * For 1 the page is returned with increased page count, otherwise not.
1370 */ 1370 */
1371static int get_any_page(struct page *p, unsigned long pfn, int flags) 1371static int __get_any_page(struct page *p, unsigned long pfn, int flags)
1372{ 1372{
1373 int ret; 1373 int ret;
1374 1374
@@ -1393,11 +1393,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1393 if (!get_page_unless_zero(compound_head(p))) { 1393 if (!get_page_unless_zero(compound_head(p))) {
1394 if (PageHuge(p)) { 1394 if (PageHuge(p)) {
1395 pr_info("%s: %#lx free huge page\n", __func__, pfn); 1395 pr_info("%s: %#lx free huge page\n", __func__, pfn);
1396 ret = dequeue_hwpoisoned_huge_page(compound_head(p)); 1396 ret = 0;
1397 } else if (is_free_buddy_page(p)) { 1397 } else if (is_free_buddy_page(p)) {
1398 pr_info("%s: %#lx free buddy page\n", __func__, pfn); 1398 pr_info("%s: %#lx free buddy page\n", __func__, pfn);
1399 /* Set hwpoison bit while page is still isolated */
1400 SetPageHWPoison(p);
1401 ret = 0; 1399 ret = 0;
1402 } else { 1400 } else {
1403 pr_info("%s: %#lx: unknown zero refcount page type %lx\n", 1401 pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
@@ -1413,23 +1411,48 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1413 return ret; 1411 return ret;
1414} 1412}
1415 1413
1414static int get_any_page(struct page *page, unsigned long pfn, int flags)
1415{
1416 int ret = __get_any_page(page, pfn, flags);
1417
1418 if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
1419 /*
1420 * Try to free it.
1421 */
1422 put_page(page);
1423 shake_page(page, 1);
1424
1425 /*
1426 * Did it turn free?
1427 */
1428 ret = __get_any_page(page, pfn, 0);
1429 if (!PageLRU(page)) {
1430 pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1431 pfn, page->flags);
1432 return -EIO;
1433 }
1434 }
1435 return ret;
1436}
1437
1416static int soft_offline_huge_page(struct page *page, int flags) 1438static int soft_offline_huge_page(struct page *page, int flags)
1417{ 1439{
1418 int ret; 1440 int ret;
1419 unsigned long pfn = page_to_pfn(page); 1441 unsigned long pfn = page_to_pfn(page);
1420 struct page *hpage = compound_head(page); 1442 struct page *hpage = compound_head(page);
1421 1443
1444 /*
1445 * This double-check of PageHWPoison is to avoid the race with
1446 * memory_failure(). See also comment in __soft_offline_page().
1447 */
1448 lock_page(hpage);
1422 if (PageHWPoison(hpage)) { 1449 if (PageHWPoison(hpage)) {
1450 unlock_page(hpage);
1451 put_page(hpage);
1423 pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); 1452 pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
1424 ret = -EBUSY; 1453 return -EBUSY;
1425 goto out;
1426 } 1454 }
1427 1455 unlock_page(hpage);
1428 ret = get_any_page(page, pfn, flags);
1429 if (ret < 0)
1430 goto out;
1431 if (ret == 0)
1432 goto done;
1433 1456
1434 /* Keep page count to indicate a given hugepage is isolated. */ 1457 /* Keep page count to indicate a given hugepage is isolated. */
1435 ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, 1458 ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false,
@@ -1438,17 +1461,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
1438 if (ret) { 1461 if (ret) {
1439 pr_info("soft offline: %#lx: migration failed %d, type %lx\n", 1462 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1440 pfn, ret, page->flags); 1463 pfn, ret, page->flags);
1441 goto out; 1464 } else {
1465 set_page_hwpoison_huge_page(hpage);
1466 dequeue_hwpoisoned_huge_page(hpage);
1467 atomic_long_add(1 << compound_trans_order(hpage),
1468 &num_poisoned_pages);
1442 } 1469 }
1443done:
1444 /* keep elevated page count for bad page */ 1470 /* keep elevated page count for bad page */
1445 atomic_long_add(1 << compound_trans_order(hpage), &num_poisoned_pages);
1446 set_page_hwpoison_huge_page(hpage);
1447 dequeue_hwpoisoned_huge_page(hpage);
1448out:
1449 return ret; 1471 return ret;
1450} 1472}
1451 1473
1474static int __soft_offline_page(struct page *page, int flags);
1475
1452/** 1476/**
1453 * soft_offline_page - Soft offline a page. 1477 * soft_offline_page - Soft offline a page.
1454 * @page: page to offline 1478 * @page: page to offline
@@ -1477,62 +1501,60 @@ int soft_offline_page(struct page *page, int flags)
1477 unsigned long pfn = page_to_pfn(page); 1501 unsigned long pfn = page_to_pfn(page);
1478 struct page *hpage = compound_trans_head(page); 1502 struct page *hpage = compound_trans_head(page);
1479 1503
1480 if (PageHuge(page)) { 1504 if (PageHWPoison(page)) {
1481 ret = soft_offline_huge_page(page, flags); 1505 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1482 goto out; 1506 return -EBUSY;
1483 } 1507 }
1484 if (PageTransHuge(hpage)) { 1508 if (!PageHuge(page) && PageTransHuge(hpage)) {
1485 if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { 1509 if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
1486 pr_info("soft offline: %#lx: failed to split THP\n", 1510 pr_info("soft offline: %#lx: failed to split THP\n",
1487 pfn); 1511 pfn);
1488 ret = -EBUSY; 1512 return -EBUSY;
1489 goto out;
1490 } 1513 }
1491 } 1514 }
1492 1515
1493 if (PageHWPoison(page)) {
1494 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1495 ret = -EBUSY;
1496 goto out;
1497 }
1498
1499 ret = get_any_page(page, pfn, flags); 1516 ret = get_any_page(page, pfn, flags);
1500 if (ret < 0) 1517 if (ret < 0)
1501 goto out; 1518 return ret;
1502 if (ret == 0) 1519 if (ret) { /* for in-use pages */
1503 goto done; 1520 if (PageHuge(page))
1504 1521 ret = soft_offline_huge_page(page, flags);
1505 /* 1522 else
1506 * Page cache page we can handle? 1523 ret = __soft_offline_page(page, flags);
1507 */ 1524 } else { /* for free pages */
1508 if (!PageLRU(page)) { 1525 if (PageHuge(page)) {
1509 /* 1526 set_page_hwpoison_huge_page(hpage);
1510 * Try to free it. 1527 dequeue_hwpoisoned_huge_page(hpage);
1511 */ 1528 atomic_long_add(1 << compound_trans_order(hpage),
1512 put_page(page); 1529 &num_poisoned_pages);
1513 shake_page(page, 1); 1530 } else {
1514 1531 SetPageHWPoison(page);
1515 /* 1532 atomic_long_inc(&num_poisoned_pages);
1516 * Did it turn free? 1533 }
1517 */
1518 ret = get_any_page(page, pfn, 0);
1519 if (ret < 0)
1520 goto out;
1521 if (ret == 0)
1522 goto done;
1523 }
1524 if (!PageLRU(page)) {
1525 pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1526 pfn, page->flags);
1527 ret = -EIO;
1528 goto out;
1529 } 1534 }
1535 /* keep elevated page count for bad page */
1536 return ret;
1537}
1538
1539static int __soft_offline_page(struct page *page, int flags)
1540{
1541 int ret;
1542 unsigned long pfn = page_to_pfn(page);
1530 1543
1531 /* 1544 /*
1532 * Synchronized using the page lock with memory_failure() 1545 * Check PageHWPoison again inside page lock because PageHWPoison
1546 * is set by memory_failure() outside page lock. Note that
1547 * memory_failure() also double-checks PageHWPoison inside page lock,
1548 * so there's no race between soft_offline_page() and memory_failure().
1533 */ 1549 */
1534 lock_page(page); 1550 lock_page(page);
1535 wait_on_page_writeback(page); 1551 wait_on_page_writeback(page);
1552 if (PageHWPoison(page)) {
1553 unlock_page(page);
1554 put_page(page);
1555 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1556 return -EBUSY;
1557 }
1536 /* 1558 /*
1537 * Try to invalidate first. This should work for 1559 * Try to invalidate first. This should work for
1538 * non dirty unmapped page cache pages. 1560 * non dirty unmapped page cache pages.
@@ -1545,9 +1567,10 @@ int soft_offline_page(struct page *page, int flags)
1545 */ 1567 */
1546 if (ret == 1) { 1568 if (ret == 1) {
1547 put_page(page); 1569 put_page(page);
1548 ret = 0;
1549 pr_info("soft_offline: %#lx: invalidated\n", pfn); 1570 pr_info("soft_offline: %#lx: invalidated\n", pfn);
1550 goto done; 1571 SetPageHWPoison(page);
1572 atomic_long_inc(&num_poisoned_pages);
1573 return 0;
1551 } 1574 }
1552 1575
1553 /* 1576 /*
@@ -1575,18 +1598,13 @@ int soft_offline_page(struct page *page, int flags)
1575 pfn, ret, page->flags); 1598 pfn, ret, page->flags);
1576 if (ret > 0) 1599 if (ret > 0)
1577 ret = -EIO; 1600 ret = -EIO;
1601 } else {
1602 SetPageHWPoison(page);
1603 atomic_long_inc(&num_poisoned_pages);
1578 } 1604 }
1579 } else { 1605 } else {
1580 pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", 1606 pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
1581 pfn, ret, page_count(page), page->flags); 1607 pfn, ret, page_count(page), page->flags);
1582 } 1608 }
1583 if (ret)
1584 goto out;
1585
1586done:
1587 /* keep elevated page count for bad page */
1588 atomic_long_inc(&num_poisoned_pages);
1589 SetPageHWPoison(page);
1590out:
1591 return ret; 1609 return ret;
1592} 1610}