diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 202 |
1 files changed, 124 insertions, 78 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c6e4dd3e1c08..df0694c6adef 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -61,7 +61,7 @@ int sysctl_memory_failure_early_kill __read_mostly = 0; | |||
61 | 61 | ||
62 | int sysctl_memory_failure_recovery __read_mostly = 1; | 62 | int sysctl_memory_failure_recovery __read_mostly = 1; |
63 | 63 | ||
64 | atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); | 64 | atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); |
65 | 65 | ||
66 | #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) | 66 | #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) |
67 | 67 | ||
@@ -784,12 +784,12 @@ static struct page_state { | |||
784 | { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty }, | 784 | { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty }, |
785 | { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, | 785 | { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, |
786 | 786 | ||
787 | { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, | ||
788 | { unevict, unevict, "clean unevictable LRU", me_pagecache_clean }, | ||
789 | |||
790 | { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, | 787 | { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, |
791 | { mlock, mlock, "clean mlocked LRU", me_pagecache_clean }, | 788 | { mlock, mlock, "clean mlocked LRU", me_pagecache_clean }, |
792 | 789 | ||
790 | { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, | ||
791 | { unevict, unevict, "clean unevictable LRU", me_pagecache_clean }, | ||
792 | |||
793 | { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, | 793 | { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, |
794 | { lru|dirty, lru, "clean LRU", me_pagecache_clean }, | 794 | { lru|dirty, lru, "clean LRU", me_pagecache_clean }, |
795 | 795 | ||
@@ -1021,6 +1021,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1021 | struct page *hpage; | 1021 | struct page *hpage; |
1022 | int res; | 1022 | int res; |
1023 | unsigned int nr_pages; | 1023 | unsigned int nr_pages; |
1024 | unsigned long page_flags; | ||
1024 | 1025 | ||
1025 | if (!sysctl_memory_failure_recovery) | 1026 | if (!sysctl_memory_failure_recovery) |
1026 | panic("Memory failure from trap %d on page %lx", trapno, pfn); | 1027 | panic("Memory failure from trap %d on page %lx", trapno, pfn); |
@@ -1039,8 +1040,18 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1039 | return 0; | 1040 | return 0; |
1040 | } | 1041 | } |
1041 | 1042 | ||
1042 | nr_pages = 1 << compound_trans_order(hpage); | 1043 | /* |
1043 | atomic_long_add(nr_pages, &mce_bad_pages); | 1044 | * Currently errors on hugetlbfs pages are measured in hugepage units, |
1045 | * so nr_pages should be 1 << compound_order. OTOH when errors are on | ||
1046 | * transparent hugepages, they are supposed to be split and error | ||
1047 | * measurement is done in normal page units. So nr_pages should be one | ||
1048 | * in this case. | ||
1049 | */ | ||
1050 | if (PageHuge(p)) | ||
1051 | nr_pages = 1 << compound_order(hpage); | ||
1052 | else /* normal page or thp */ | ||
1053 | nr_pages = 1; | ||
1054 | atomic_long_add(nr_pages, &num_poisoned_pages); | ||
1044 | 1055 | ||
1045 | /* | 1056 | /* |
1046 | * We need/can do nothing about count=0 pages. | 1057 | * We need/can do nothing about count=0 pages. |
@@ -1070,7 +1081,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1070 | if (!PageHWPoison(hpage) | 1081 | if (!PageHWPoison(hpage) |
1071 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) | 1082 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) |
1072 | || (p != hpage && TestSetPageHWPoison(hpage))) { | 1083 | || (p != hpage && TestSetPageHWPoison(hpage))) { |
1073 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1084 | atomic_long_sub(nr_pages, &num_poisoned_pages); |
1074 | return 0; | 1085 | return 0; |
1075 | } | 1086 | } |
1076 | set_page_hwpoison_huge_page(hpage); | 1087 | set_page_hwpoison_huge_page(hpage); |
@@ -1119,6 +1130,15 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1119 | lock_page(hpage); | 1130 | lock_page(hpage); |
1120 | 1131 | ||
1121 | /* | 1132 | /* |
1133 | * We use page flags to determine what action should be taken, but | ||
1134 | * the flags can be modified by the error containment action. One | ||
1135 | * example is an mlocked page, where PG_mlocked is cleared by | ||
1136 | * page_remove_rmap() in try_to_unmap_one(). So to determine page status | ||
1137 | * correctly, we save a copy of the page flags at this time. | ||
1138 | */ | ||
1139 | page_flags = p->flags; | ||
1140 | |||
1141 | /* | ||
1122 | * unpoison always clear PG_hwpoison inside page lock | 1142 | * unpoison always clear PG_hwpoison inside page lock |
1123 | */ | 1143 | */ |
1124 | if (!PageHWPoison(p)) { | 1144 | if (!PageHWPoison(p)) { |
@@ -1128,7 +1148,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1128 | } | 1148 | } |
1129 | if (hwpoison_filter(p)) { | 1149 | if (hwpoison_filter(p)) { |
1130 | if (TestClearPageHWPoison(p)) | 1150 | if (TestClearPageHWPoison(p)) |
1131 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1151 | atomic_long_sub(nr_pages, &num_poisoned_pages); |
1132 | unlock_page(hpage); | 1152 | unlock_page(hpage); |
1133 | put_page(hpage); | 1153 | put_page(hpage); |
1134 | return 0; | 1154 | return 0; |
@@ -1176,12 +1196,19 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1176 | } | 1196 | } |
1177 | 1197 | ||
1178 | res = -EBUSY; | 1198 | res = -EBUSY; |
1179 | for (ps = error_states;; ps++) { | 1199 | /* |
1180 | if ((p->flags & ps->mask) == ps->res) { | 1200 | * The first check uses the current page flags which may not have any |
1181 | res = page_action(ps, p, pfn); | 1201 | * relevant information. The second check with the saved page flagss is |
1202 | * carried out only if the first check can't determine the page status. | ||
1203 | */ | ||
1204 | for (ps = error_states;; ps++) | ||
1205 | if ((p->flags & ps->mask) == ps->res) | ||
1182 | break; | 1206 | break; |
1183 | } | 1207 | if (!ps->mask) |
1184 | } | 1208 | for (ps = error_states;; ps++) |
1209 | if ((page_flags & ps->mask) == ps->res) | ||
1210 | break; | ||
1211 | res = page_action(ps, p, pfn); | ||
1185 | out: | 1212 | out: |
1186 | unlock_page(hpage); | 1213 | unlock_page(hpage); |
1187 | return res; | 1214 | return res; |
@@ -1323,7 +1350,7 @@ int unpoison_memory(unsigned long pfn) | |||
1323 | return 0; | 1350 | return 0; |
1324 | } | 1351 | } |
1325 | if (TestClearPageHWPoison(p)) | 1352 | if (TestClearPageHWPoison(p)) |
1326 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1353 | atomic_long_sub(nr_pages, &num_poisoned_pages); |
1327 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1354 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); |
1328 | return 0; | 1355 | return 0; |
1329 | } | 1356 | } |
@@ -1337,7 +1364,7 @@ int unpoison_memory(unsigned long pfn) | |||
1337 | */ | 1364 | */ |
1338 | if (TestClearPageHWPoison(page)) { | 1365 | if (TestClearPageHWPoison(page)) { |
1339 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); | 1366 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); |
1340 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1367 | atomic_long_sub(nr_pages, &num_poisoned_pages); |
1341 | freeit = 1; | 1368 | freeit = 1; |
1342 | if (PageHuge(page)) | 1369 | if (PageHuge(page)) |
1343 | clear_page_hwpoison_huge_page(page); | 1370 | clear_page_hwpoison_huge_page(page); |
@@ -1368,7 +1395,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x) | |||
1368 | * that is not free, and 1 for any other page type. | 1395 | * that is not free, and 1 for any other page type. |
1369 | * For 1 the page is returned with increased page count, otherwise not. | 1396 | * For 1 the page is returned with increased page count, otherwise not. |
1370 | */ | 1397 | */ |
1371 | static int get_any_page(struct page *p, unsigned long pfn, int flags) | 1398 | static int __get_any_page(struct page *p, unsigned long pfn, int flags) |
1372 | { | 1399 | { |
1373 | int ret; | 1400 | int ret; |
1374 | 1401 | ||
@@ -1393,11 +1420,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1393 | if (!get_page_unless_zero(compound_head(p))) { | 1420 | if (!get_page_unless_zero(compound_head(p))) { |
1394 | if (PageHuge(p)) { | 1421 | if (PageHuge(p)) { |
1395 | pr_info("%s: %#lx free huge page\n", __func__, pfn); | 1422 | pr_info("%s: %#lx free huge page\n", __func__, pfn); |
1396 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); | 1423 | ret = 0; |
1397 | } else if (is_free_buddy_page(p)) { | 1424 | } else if (is_free_buddy_page(p)) { |
1398 | pr_info("%s: %#lx free buddy page\n", __func__, pfn); | 1425 | pr_info("%s: %#lx free buddy page\n", __func__, pfn); |
1399 | /* Set hwpoison bit while page is still isolated */ | ||
1400 | SetPageHWPoison(p); | ||
1401 | ret = 0; | 1426 | ret = 0; |
1402 | } else { | 1427 | } else { |
1403 | pr_info("%s: %#lx: unknown zero refcount page type %lx\n", | 1428 | pr_info("%s: %#lx: unknown zero refcount page type %lx\n", |
@@ -1413,43 +1438,68 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1413 | return ret; | 1438 | return ret; |
1414 | } | 1439 | } |
1415 | 1440 | ||
1441 | static int get_any_page(struct page *page, unsigned long pfn, int flags) | ||
1442 | { | ||
1443 | int ret = __get_any_page(page, pfn, flags); | ||
1444 | |||
1445 | if (ret == 1 && !PageHuge(page) && !PageLRU(page)) { | ||
1446 | /* | ||
1447 | * Try to free it. | ||
1448 | */ | ||
1449 | put_page(page); | ||
1450 | shake_page(page, 1); | ||
1451 | |||
1452 | /* | ||
1453 | * Did it turn free? | ||
1454 | */ | ||
1455 | ret = __get_any_page(page, pfn, 0); | ||
1456 | if (!PageLRU(page)) { | ||
1457 | pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", | ||
1458 | pfn, page->flags); | ||
1459 | return -EIO; | ||
1460 | } | ||
1461 | } | ||
1462 | return ret; | ||
1463 | } | ||
1464 | |||
1416 | static int soft_offline_huge_page(struct page *page, int flags) | 1465 | static int soft_offline_huge_page(struct page *page, int flags) |
1417 | { | 1466 | { |
1418 | int ret; | 1467 | int ret; |
1419 | unsigned long pfn = page_to_pfn(page); | 1468 | unsigned long pfn = page_to_pfn(page); |
1420 | struct page *hpage = compound_head(page); | 1469 | struct page *hpage = compound_head(page); |
1421 | 1470 | ||
1422 | ret = get_any_page(page, pfn, flags); | 1471 | /* |
1423 | if (ret < 0) | 1472 | * This double-check of PageHWPoison is to avoid the race with |
1424 | return ret; | 1473 | * memory_failure(). See also comment in __soft_offline_page(). |
1425 | if (ret == 0) | 1474 | */ |
1426 | goto done; | 1475 | lock_page(hpage); |
1427 | |||
1428 | if (PageHWPoison(hpage)) { | 1476 | if (PageHWPoison(hpage)) { |
1477 | unlock_page(hpage); | ||
1429 | put_page(hpage); | 1478 | put_page(hpage); |
1430 | pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); | 1479 | pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); |
1431 | return -EBUSY; | 1480 | return -EBUSY; |
1432 | } | 1481 | } |
1482 | unlock_page(hpage); | ||
1433 | 1483 | ||
1434 | /* Keep page count to indicate a given hugepage is isolated. */ | 1484 | /* Keep page count to indicate a given hugepage is isolated. */ |
1435 | ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, | 1485 | ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, |
1436 | MIGRATE_SYNC); | 1486 | MIGRATE_SYNC); |
1437 | put_page(hpage); | 1487 | put_page(hpage); |
1438 | if (ret) { | 1488 | if (ret) { |
1439 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1489 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1440 | pfn, ret, page->flags); | 1490 | pfn, ret, page->flags); |
1441 | return ret; | 1491 | } else { |
1442 | } | 1492 | set_page_hwpoison_huge_page(hpage); |
1443 | done: | 1493 | dequeue_hwpoisoned_huge_page(hpage); |
1444 | if (!PageHWPoison(hpage)) | ||
1445 | atomic_long_add(1 << compound_trans_order(hpage), | 1494 | atomic_long_add(1 << compound_trans_order(hpage), |
1446 | &mce_bad_pages); | 1495 | &num_poisoned_pages); |
1447 | set_page_hwpoison_huge_page(hpage); | 1496 | } |
1448 | dequeue_hwpoisoned_huge_page(hpage); | ||
1449 | /* keep elevated page count for bad page */ | 1497 | /* keep elevated page count for bad page */ |
1450 | return ret; | 1498 | return ret; |
1451 | } | 1499 | } |
1452 | 1500 | ||
1501 | static int __soft_offline_page(struct page *page, int flags); | ||
1502 | |||
1453 | /** | 1503 | /** |
1454 | * soft_offline_page - Soft offline a page. | 1504 | * soft_offline_page - Soft offline a page. |
1455 | * @page: page to offline | 1505 | * @page: page to offline |
@@ -1478,9 +1528,11 @@ int soft_offline_page(struct page *page, int flags) | |||
1478 | unsigned long pfn = page_to_pfn(page); | 1528 | unsigned long pfn = page_to_pfn(page); |
1479 | struct page *hpage = compound_trans_head(page); | 1529 | struct page *hpage = compound_trans_head(page); |
1480 | 1530 | ||
1481 | if (PageHuge(page)) | 1531 | if (PageHWPoison(page)) { |
1482 | return soft_offline_huge_page(page, flags); | 1532 | pr_info("soft offline: %#lx page already poisoned\n", pfn); |
1483 | if (PageTransHuge(hpage)) { | 1533 | return -EBUSY; |
1534 | } | ||
1535 | if (!PageHuge(page) && PageTransHuge(hpage)) { | ||
1484 | if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { | 1536 | if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { |
1485 | pr_info("soft offline: %#lx: failed to split THP\n", | 1537 | pr_info("soft offline: %#lx: failed to split THP\n", |
1486 | pfn); | 1538 | pfn); |
@@ -1491,47 +1543,45 @@ int soft_offline_page(struct page *page, int flags) | |||
1491 | ret = get_any_page(page, pfn, flags); | 1543 | ret = get_any_page(page, pfn, flags); |
1492 | if (ret < 0) | 1544 | if (ret < 0) |
1493 | return ret; | 1545 | return ret; |
1494 | if (ret == 0) | 1546 | if (ret) { /* for in-use pages */ |
1495 | goto done; | 1547 | if (PageHuge(page)) |
1496 | 1548 | ret = soft_offline_huge_page(page, flags); | |
1497 | /* | 1549 | else |
1498 | * Page cache page we can handle? | 1550 | ret = __soft_offline_page(page, flags); |
1499 | */ | 1551 | } else { /* for free pages */ |
1500 | if (!PageLRU(page)) { | 1552 | if (PageHuge(page)) { |
1501 | /* | 1553 | set_page_hwpoison_huge_page(hpage); |
1502 | * Try to free it. | 1554 | dequeue_hwpoisoned_huge_page(hpage); |
1503 | */ | 1555 | atomic_long_add(1 << compound_trans_order(hpage), |
1504 | put_page(page); | 1556 | &num_poisoned_pages); |
1505 | shake_page(page, 1); | 1557 | } else { |
1506 | 1558 | SetPageHWPoison(page); | |
1507 | /* | 1559 | atomic_long_inc(&num_poisoned_pages); |
1508 | * Did it turn free? | 1560 | } |
1509 | */ | ||
1510 | ret = get_any_page(page, pfn, 0); | ||
1511 | if (ret < 0) | ||
1512 | return ret; | ||
1513 | if (ret == 0) | ||
1514 | goto done; | ||
1515 | } | ||
1516 | if (!PageLRU(page)) { | ||
1517 | pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", | ||
1518 | pfn, page->flags); | ||
1519 | return -EIO; | ||
1520 | } | 1561 | } |
1562 | /* keep elevated page count for bad page */ | ||
1563 | return ret; | ||
1564 | } | ||
1521 | 1565 | ||
1522 | lock_page(page); | 1566 | static int __soft_offline_page(struct page *page, int flags) |
1523 | wait_on_page_writeback(page); | 1567 | { |
1568 | int ret; | ||
1569 | unsigned long pfn = page_to_pfn(page); | ||
1524 | 1570 | ||
1525 | /* | 1571 | /* |
1526 | * Synchronized using the page lock with memory_failure() | 1572 | * Check PageHWPoison again inside page lock because PageHWPoison |
1573 | * is set by memory_failure() outside page lock. Note that | ||
1574 | * memory_failure() also double-checks PageHWPoison inside page lock, | ||
1575 | * so there's no race between soft_offline_page() and memory_failure(). | ||
1527 | */ | 1576 | */ |
1577 | lock_page(page); | ||
1578 | wait_on_page_writeback(page); | ||
1528 | if (PageHWPoison(page)) { | 1579 | if (PageHWPoison(page)) { |
1529 | unlock_page(page); | 1580 | unlock_page(page); |
1530 | put_page(page); | 1581 | put_page(page); |
1531 | pr_info("soft offline: %#lx page already poisoned\n", pfn); | 1582 | pr_info("soft offline: %#lx page already poisoned\n", pfn); |
1532 | return -EBUSY; | 1583 | return -EBUSY; |
1533 | } | 1584 | } |
1534 | |||
1535 | /* | 1585 | /* |
1536 | * Try to invalidate first. This should work for | 1586 | * Try to invalidate first. This should work for |
1537 | * non dirty unmapped page cache pages. | 1587 | * non dirty unmapped page cache pages. |
@@ -1544,9 +1594,10 @@ int soft_offline_page(struct page *page, int flags) | |||
1544 | */ | 1594 | */ |
1545 | if (ret == 1) { | 1595 | if (ret == 1) { |
1546 | put_page(page); | 1596 | put_page(page); |
1547 | ret = 0; | ||
1548 | pr_info("soft_offline: %#lx: invalidated\n", pfn); | 1597 | pr_info("soft_offline: %#lx: invalidated\n", pfn); |
1549 | goto done; | 1598 | SetPageHWPoison(page); |
1599 | atomic_long_inc(&num_poisoned_pages); | ||
1600 | return 0; | ||
1550 | } | 1601 | } |
1551 | 1602 | ||
1552 | /* | 1603 | /* |
@@ -1563,28 +1614,23 @@ int soft_offline_page(struct page *page, int flags) | |||
1563 | if (!ret) { | 1614 | if (!ret) { |
1564 | LIST_HEAD(pagelist); | 1615 | LIST_HEAD(pagelist); |
1565 | inc_zone_page_state(page, NR_ISOLATED_ANON + | 1616 | inc_zone_page_state(page, NR_ISOLATED_ANON + |
1566 | page_is_file_cache(page)); | 1617 | page_is_file_cache(page)); |
1567 | list_add(&page->lru, &pagelist); | 1618 | list_add(&page->lru, &pagelist); |
1568 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, | 1619 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, |
1569 | false, MIGRATE_SYNC, | 1620 | MIGRATE_SYNC, MR_MEMORY_FAILURE); |
1570 | MR_MEMORY_FAILURE); | ||
1571 | if (ret) { | 1621 | if (ret) { |
1572 | putback_lru_pages(&pagelist); | 1622 | putback_lru_pages(&pagelist); |
1573 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1623 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1574 | pfn, ret, page->flags); | 1624 | pfn, ret, page->flags); |
1575 | if (ret > 0) | 1625 | if (ret > 0) |
1576 | ret = -EIO; | 1626 | ret = -EIO; |
1627 | } else { | ||
1628 | SetPageHWPoison(page); | ||
1629 | atomic_long_inc(&num_poisoned_pages); | ||
1577 | } | 1630 | } |
1578 | } else { | 1631 | } else { |
1579 | pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", | 1632 | pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", |
1580 | pfn, ret, page_count(page), page->flags); | 1633 | pfn, ret, page_count(page), page->flags); |
1581 | } | 1634 | } |
1582 | if (ret) | ||
1583 | return ret; | ||
1584 | |||
1585 | done: | ||
1586 | atomic_long_add(1, &mce_bad_pages); | ||
1587 | SetPageHWPoison(page); | ||
1588 | /* keep elevated page count for bad page */ | ||
1589 | return ret; | 1635 | return ret; |
1590 | } | 1636 | } |