aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c202
1 files changed, 124 insertions, 78 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c6e4dd3e1c08..df0694c6adef 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -61,7 +61,7 @@ int sysctl_memory_failure_early_kill __read_mostly = 0;
61 61
62int sysctl_memory_failure_recovery __read_mostly = 1; 62int sysctl_memory_failure_recovery __read_mostly = 1;
63 63
64atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); 64atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
65 65
66#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) 66#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
67 67
@@ -784,12 +784,12 @@ static struct page_state {
784 { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty }, 784 { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty },
785 { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, 785 { sc|dirty, sc, "clean swapcache", me_swapcache_clean },
786 786
787 { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
788 { unevict, unevict, "clean unevictable LRU", me_pagecache_clean },
789
790 { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, 787 { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty },
791 { mlock, mlock, "clean mlocked LRU", me_pagecache_clean }, 788 { mlock, mlock, "clean mlocked LRU", me_pagecache_clean },
792 789
790 { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
791 { unevict, unevict, "clean unevictable LRU", me_pagecache_clean },
792
793 { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, 793 { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty },
794 { lru|dirty, lru, "clean LRU", me_pagecache_clean }, 794 { lru|dirty, lru, "clean LRU", me_pagecache_clean },
795 795
@@ -1021,6 +1021,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1021 struct page *hpage; 1021 struct page *hpage;
1022 int res; 1022 int res;
1023 unsigned int nr_pages; 1023 unsigned int nr_pages;
1024 unsigned long page_flags;
1024 1025
1025 if (!sysctl_memory_failure_recovery) 1026 if (!sysctl_memory_failure_recovery)
1026 panic("Memory failure from trap %d on page %lx", trapno, pfn); 1027 panic("Memory failure from trap %d on page %lx", trapno, pfn);
@@ -1039,8 +1040,18 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1039 return 0; 1040 return 0;
1040 } 1041 }
1041 1042
1042 nr_pages = 1 << compound_trans_order(hpage); 1043 /*
1043 atomic_long_add(nr_pages, &mce_bad_pages); 1044 * Currently errors on hugetlbfs pages are measured in hugepage units,
1045 * so nr_pages should be 1 << compound_order. OTOH when errors are on
1046 * transparent hugepages, they are supposed to be split and error
1047 * measurement is done in normal page units. So nr_pages should be one
1048 * in this case.
1049 */
1050 if (PageHuge(p))
1051 nr_pages = 1 << compound_order(hpage);
1052 else /* normal page or thp */
1053 nr_pages = 1;
1054 atomic_long_add(nr_pages, &num_poisoned_pages);
1044 1055
1045 /* 1056 /*
1046 * We need/can do nothing about count=0 pages. 1057 * We need/can do nothing about count=0 pages.
@@ -1070,7 +1081,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1070 if (!PageHWPoison(hpage) 1081 if (!PageHWPoison(hpage)
1071 || (hwpoison_filter(p) && TestClearPageHWPoison(p)) 1082 || (hwpoison_filter(p) && TestClearPageHWPoison(p))
1072 || (p != hpage && TestSetPageHWPoison(hpage))) { 1083 || (p != hpage && TestSetPageHWPoison(hpage))) {
1073 atomic_long_sub(nr_pages, &mce_bad_pages); 1084 atomic_long_sub(nr_pages, &num_poisoned_pages);
1074 return 0; 1085 return 0;
1075 } 1086 }
1076 set_page_hwpoison_huge_page(hpage); 1087 set_page_hwpoison_huge_page(hpage);
@@ -1119,6 +1130,15 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1119 lock_page(hpage); 1130 lock_page(hpage);
1120 1131
1121 /* 1132 /*
1133 * We use page flags to determine what action should be taken, but
1134 * the flags can be modified by the error containment action. One
1135 * example is an mlocked page, where PG_mlocked is cleared by
1136 * page_remove_rmap() in try_to_unmap_one(). So to determine page status
1137 * correctly, we save a copy of the page flags at this time.
1138 */
1139 page_flags = p->flags;
1140
1141 /*
1122 * unpoison always clear PG_hwpoison inside page lock 1142 * unpoison always clear PG_hwpoison inside page lock
1123 */ 1143 */
1124 if (!PageHWPoison(p)) { 1144 if (!PageHWPoison(p)) {
@@ -1128,7 +1148,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1128 } 1148 }
1129 if (hwpoison_filter(p)) { 1149 if (hwpoison_filter(p)) {
1130 if (TestClearPageHWPoison(p)) 1150 if (TestClearPageHWPoison(p))
1131 atomic_long_sub(nr_pages, &mce_bad_pages); 1151 atomic_long_sub(nr_pages, &num_poisoned_pages);
1132 unlock_page(hpage); 1152 unlock_page(hpage);
1133 put_page(hpage); 1153 put_page(hpage);
1134 return 0; 1154 return 0;
@@ -1176,12 +1196,19 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
1176 } 1196 }
1177 1197
1178 res = -EBUSY; 1198 res = -EBUSY;
1179 for (ps = error_states;; ps++) { 1199 /*
1180 if ((p->flags & ps->mask) == ps->res) { 1200 * The first check uses the current page flags which may not have any
1181 res = page_action(ps, p, pfn); 1201 * relevant information. The second check with the saved page flagss is
1202 * carried out only if the first check can't determine the page status.
1203 */
1204 for (ps = error_states;; ps++)
1205 if ((p->flags & ps->mask) == ps->res)
1182 break; 1206 break;
1183 } 1207 if (!ps->mask)
1184 } 1208 for (ps = error_states;; ps++)
1209 if ((page_flags & ps->mask) == ps->res)
1210 break;
1211 res = page_action(ps, p, pfn);
1185out: 1212out:
1186 unlock_page(hpage); 1213 unlock_page(hpage);
1187 return res; 1214 return res;
@@ -1323,7 +1350,7 @@ int unpoison_memory(unsigned long pfn)
1323 return 0; 1350 return 0;
1324 } 1351 }
1325 if (TestClearPageHWPoison(p)) 1352 if (TestClearPageHWPoison(p))
1326 atomic_long_sub(nr_pages, &mce_bad_pages); 1353 atomic_long_sub(nr_pages, &num_poisoned_pages);
1327 pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); 1354 pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
1328 return 0; 1355 return 0;
1329 } 1356 }
@@ -1337,7 +1364,7 @@ int unpoison_memory(unsigned long pfn)
1337 */ 1364 */
1338 if (TestClearPageHWPoison(page)) { 1365 if (TestClearPageHWPoison(page)) {
1339 pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); 1366 pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
1340 atomic_long_sub(nr_pages, &mce_bad_pages); 1367 atomic_long_sub(nr_pages, &num_poisoned_pages);
1341 freeit = 1; 1368 freeit = 1;
1342 if (PageHuge(page)) 1369 if (PageHuge(page))
1343 clear_page_hwpoison_huge_page(page); 1370 clear_page_hwpoison_huge_page(page);
@@ -1368,7 +1395,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
1368 * that is not free, and 1 for any other page type. 1395 * that is not free, and 1 for any other page type.
1369 * For 1 the page is returned with increased page count, otherwise not. 1396 * For 1 the page is returned with increased page count, otherwise not.
1370 */ 1397 */
1371static int get_any_page(struct page *p, unsigned long pfn, int flags) 1398static int __get_any_page(struct page *p, unsigned long pfn, int flags)
1372{ 1399{
1373 int ret; 1400 int ret;
1374 1401
@@ -1393,11 +1420,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1393 if (!get_page_unless_zero(compound_head(p))) { 1420 if (!get_page_unless_zero(compound_head(p))) {
1394 if (PageHuge(p)) { 1421 if (PageHuge(p)) {
1395 pr_info("%s: %#lx free huge page\n", __func__, pfn); 1422 pr_info("%s: %#lx free huge page\n", __func__, pfn);
1396 ret = dequeue_hwpoisoned_huge_page(compound_head(p)); 1423 ret = 0;
1397 } else if (is_free_buddy_page(p)) { 1424 } else if (is_free_buddy_page(p)) {
1398 pr_info("%s: %#lx free buddy page\n", __func__, pfn); 1425 pr_info("%s: %#lx free buddy page\n", __func__, pfn);
1399 /* Set hwpoison bit while page is still isolated */
1400 SetPageHWPoison(p);
1401 ret = 0; 1426 ret = 0;
1402 } else { 1427 } else {
1403 pr_info("%s: %#lx: unknown zero refcount page type %lx\n", 1428 pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
@@ -1413,43 +1438,68 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1413 return ret; 1438 return ret;
1414} 1439}
1415 1440
1441static int get_any_page(struct page *page, unsigned long pfn, int flags)
1442{
1443 int ret = __get_any_page(page, pfn, flags);
1444
1445 if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
1446 /*
1447 * Try to free it.
1448 */
1449 put_page(page);
1450 shake_page(page, 1);
1451
1452 /*
1453 * Did it turn free?
1454 */
1455 ret = __get_any_page(page, pfn, 0);
1456 if (!PageLRU(page)) {
1457 pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1458 pfn, page->flags);
1459 return -EIO;
1460 }
1461 }
1462 return ret;
1463}
1464
1416static int soft_offline_huge_page(struct page *page, int flags) 1465static int soft_offline_huge_page(struct page *page, int flags)
1417{ 1466{
1418 int ret; 1467 int ret;
1419 unsigned long pfn = page_to_pfn(page); 1468 unsigned long pfn = page_to_pfn(page);
1420 struct page *hpage = compound_head(page); 1469 struct page *hpage = compound_head(page);
1421 1470
1422 ret = get_any_page(page, pfn, flags); 1471 /*
1423 if (ret < 0) 1472 * This double-check of PageHWPoison is to avoid the race with
1424 return ret; 1473 * memory_failure(). See also comment in __soft_offline_page().
1425 if (ret == 0) 1474 */
1426 goto done; 1475 lock_page(hpage);
1427
1428 if (PageHWPoison(hpage)) { 1476 if (PageHWPoison(hpage)) {
1477 unlock_page(hpage);
1429 put_page(hpage); 1478 put_page(hpage);
1430 pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); 1479 pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
1431 return -EBUSY; 1480 return -EBUSY;
1432 } 1481 }
1482 unlock_page(hpage);
1433 1483
1434 /* Keep page count to indicate a given hugepage is isolated. */ 1484 /* Keep page count to indicate a given hugepage is isolated. */
1435 ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, 1485 ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL,
1436 MIGRATE_SYNC); 1486 MIGRATE_SYNC);
1437 put_page(hpage); 1487 put_page(hpage);
1438 if (ret) { 1488 if (ret) {
1439 pr_info("soft offline: %#lx: migration failed %d, type %lx\n", 1489 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1440 pfn, ret, page->flags); 1490 pfn, ret, page->flags);
1441 return ret; 1491 } else {
1442 } 1492 set_page_hwpoison_huge_page(hpage);
1443done: 1493 dequeue_hwpoisoned_huge_page(hpage);
1444 if (!PageHWPoison(hpage))
1445 atomic_long_add(1 << compound_trans_order(hpage), 1494 atomic_long_add(1 << compound_trans_order(hpage),
1446 &mce_bad_pages); 1495 &num_poisoned_pages);
1447 set_page_hwpoison_huge_page(hpage); 1496 }
1448 dequeue_hwpoisoned_huge_page(hpage);
1449 /* keep elevated page count for bad page */ 1497 /* keep elevated page count for bad page */
1450 return ret; 1498 return ret;
1451} 1499}
1452 1500
1501static int __soft_offline_page(struct page *page, int flags);
1502
1453/** 1503/**
1454 * soft_offline_page - Soft offline a page. 1504 * soft_offline_page - Soft offline a page.
1455 * @page: page to offline 1505 * @page: page to offline
@@ -1478,9 +1528,11 @@ int soft_offline_page(struct page *page, int flags)
1478 unsigned long pfn = page_to_pfn(page); 1528 unsigned long pfn = page_to_pfn(page);
1479 struct page *hpage = compound_trans_head(page); 1529 struct page *hpage = compound_trans_head(page);
1480 1530
1481 if (PageHuge(page)) 1531 if (PageHWPoison(page)) {
1482 return soft_offline_huge_page(page, flags); 1532 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1483 if (PageTransHuge(hpage)) { 1533 return -EBUSY;
1534 }
1535 if (!PageHuge(page) && PageTransHuge(hpage)) {
1484 if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { 1536 if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
1485 pr_info("soft offline: %#lx: failed to split THP\n", 1537 pr_info("soft offline: %#lx: failed to split THP\n",
1486 pfn); 1538 pfn);
@@ -1491,47 +1543,45 @@ int soft_offline_page(struct page *page, int flags)
1491 ret = get_any_page(page, pfn, flags); 1543 ret = get_any_page(page, pfn, flags);
1492 if (ret < 0) 1544 if (ret < 0)
1493 return ret; 1545 return ret;
1494 if (ret == 0) 1546 if (ret) { /* for in-use pages */
1495 goto done; 1547 if (PageHuge(page))
1496 1548 ret = soft_offline_huge_page(page, flags);
1497 /* 1549 else
1498 * Page cache page we can handle? 1550 ret = __soft_offline_page(page, flags);
1499 */ 1551 } else { /* for free pages */
1500 if (!PageLRU(page)) { 1552 if (PageHuge(page)) {
1501 /* 1553 set_page_hwpoison_huge_page(hpage);
1502 * Try to free it. 1554 dequeue_hwpoisoned_huge_page(hpage);
1503 */ 1555 atomic_long_add(1 << compound_trans_order(hpage),
1504 put_page(page); 1556 &num_poisoned_pages);
1505 shake_page(page, 1); 1557 } else {
1506 1558 SetPageHWPoison(page);
1507 /* 1559 atomic_long_inc(&num_poisoned_pages);
1508 * Did it turn free? 1560 }
1509 */
1510 ret = get_any_page(page, pfn, 0);
1511 if (ret < 0)
1512 return ret;
1513 if (ret == 0)
1514 goto done;
1515 }
1516 if (!PageLRU(page)) {
1517 pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1518 pfn, page->flags);
1519 return -EIO;
1520 } 1561 }
1562 /* keep elevated page count for bad page */
1563 return ret;
1564}
1521 1565
1522 lock_page(page); 1566static int __soft_offline_page(struct page *page, int flags)
1523 wait_on_page_writeback(page); 1567{
1568 int ret;
1569 unsigned long pfn = page_to_pfn(page);
1524 1570
1525 /* 1571 /*
1526 * Synchronized using the page lock with memory_failure() 1572 * Check PageHWPoison again inside page lock because PageHWPoison
1573 * is set by memory_failure() outside page lock. Note that
1574 * memory_failure() also double-checks PageHWPoison inside page lock,
1575 * so there's no race between soft_offline_page() and memory_failure().
1527 */ 1576 */
1577 lock_page(page);
1578 wait_on_page_writeback(page);
1528 if (PageHWPoison(page)) { 1579 if (PageHWPoison(page)) {
1529 unlock_page(page); 1580 unlock_page(page);
1530 put_page(page); 1581 put_page(page);
1531 pr_info("soft offline: %#lx page already poisoned\n", pfn); 1582 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1532 return -EBUSY; 1583 return -EBUSY;
1533 } 1584 }
1534
1535 /* 1585 /*
1536 * Try to invalidate first. This should work for 1586 * Try to invalidate first. This should work for
1537 * non dirty unmapped page cache pages. 1587 * non dirty unmapped page cache pages.
@@ -1544,9 +1594,10 @@ int soft_offline_page(struct page *page, int flags)
1544 */ 1594 */
1545 if (ret == 1) { 1595 if (ret == 1) {
1546 put_page(page); 1596 put_page(page);
1547 ret = 0;
1548 pr_info("soft_offline: %#lx: invalidated\n", pfn); 1597 pr_info("soft_offline: %#lx: invalidated\n", pfn);
1549 goto done; 1598 SetPageHWPoison(page);
1599 atomic_long_inc(&num_poisoned_pages);
1600 return 0;
1550 } 1601 }
1551 1602
1552 /* 1603 /*
@@ -1563,28 +1614,23 @@ int soft_offline_page(struct page *page, int flags)
1563 if (!ret) { 1614 if (!ret) {
1564 LIST_HEAD(pagelist); 1615 LIST_HEAD(pagelist);
1565 inc_zone_page_state(page, NR_ISOLATED_ANON + 1616 inc_zone_page_state(page, NR_ISOLATED_ANON +
1566 page_is_file_cache(page)); 1617 page_is_file_cache(page));
1567 list_add(&page->lru, &pagelist); 1618 list_add(&page->lru, &pagelist);
1568 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 1619 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
1569 false, MIGRATE_SYNC, 1620 MIGRATE_SYNC, MR_MEMORY_FAILURE);
1570 MR_MEMORY_FAILURE);
1571 if (ret) { 1621 if (ret) {
1572 putback_lru_pages(&pagelist); 1622 putback_lru_pages(&pagelist);
1573 pr_info("soft offline: %#lx: migration failed %d, type %lx\n", 1623 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1574 pfn, ret, page->flags); 1624 pfn, ret, page->flags);
1575 if (ret > 0) 1625 if (ret > 0)
1576 ret = -EIO; 1626 ret = -EIO;
1627 } else {
1628 SetPageHWPoison(page);
1629 atomic_long_inc(&num_poisoned_pages);
1577 } 1630 }
1578 } else { 1631 } else {
1579 pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", 1632 pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
1580 pfn, ret, page_count(page), page->flags); 1633 pfn, ret, page_count(page), page->flags);
1581 } 1634 }
1582 if (ret)
1583 return ret;
1584
1585done:
1586 atomic_long_add(1, &mce_bad_pages);
1587 SetPageHWPoison(page);
1588 /* keep elevated page count for bad page */
1589 return ret; 1635 return ret;
1590} 1636}