diff options
| author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2018-08-23 20:00:42 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-23 21:48:43 -0400 |
| commit | d4ae9916ea2947341180d2b538f48875ff393a86 (patch) | |
| tree | e478e5e6e84de25fade8f2995c9ee49cacd5a488 /mm | |
| parent | 6bc9b56433b76e40d11099338d27fbc5cd2935ca (diff) | |
mm: soft-offline: close the race against page allocation
A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline. This is
undesirable because soft-offline (which is about corrected error) is
less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good
reason like "system is busy."
Two main changes of this patch are:
- setting migrate type of the target page to MIGRATE_ISOLATE. As done
in free_unref_page_commit(), this makes kernel bypass pcplist when
freeing the page. So we can assume that the page is in freelist just
after put_page() returns,
- setting PG_hwpoison on free page under zone->lock which protects
freelists, so this allows us to avoid setting PG_hwpoison on a page
that is decided to be allocated soon.
[akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment]
Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <zy.zhengyi@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/memory-failure.c | 26 | ||||
| -rw-r--r-- | mm/migrate.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 30 |
3 files changed, 52 insertions, 6 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 49dc32c61137..192d0bbfc9ea 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -57,6 +57,7 @@ | |||
| 57 | #include <linux/mm_inline.h> | 57 | #include <linux/mm_inline.h> |
| 58 | #include <linux/kfifo.h> | 58 | #include <linux/kfifo.h> |
| 59 | #include <linux/ratelimit.h> | 59 | #include <linux/ratelimit.h> |
| 60 | #include <linux/page-isolation.h> | ||
| 60 | #include "internal.h" | 61 | #include "internal.h" |
| 61 | #include "ras/ras_event.h" | 62 | #include "ras/ras_event.h" |
| 62 | 63 | ||
| @@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags) | |||
| 1697 | static int soft_offline_in_use_page(struct page *page, int flags) | 1698 | static int soft_offline_in_use_page(struct page *page, int flags) |
| 1698 | { | 1699 | { |
| 1699 | int ret; | 1700 | int ret; |
| 1701 | int mt; | ||
| 1700 | struct page *hpage = compound_head(page); | 1702 | struct page *hpage = compound_head(page); |
| 1701 | 1703 | ||
| 1702 | if (!PageHuge(page) && PageTransHuge(hpage)) { | 1704 | if (!PageHuge(page) && PageTransHuge(hpage)) { |
| @@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags) | |||
| 1715 | put_hwpoison_page(hpage); | 1717 | put_hwpoison_page(hpage); |
| 1716 | } | 1718 | } |
| 1717 | 1719 | ||
| 1720 | /* | ||
| 1721 | * Setting MIGRATE_ISOLATE here ensures that the page will be linked | ||
| 1722 | * to free list immediately (not via pcplist) when released after | ||
| 1723 | * successful page migration. Otherwise we can't guarantee that the | ||
| 1724 | * page is really free after put_page() returns, so | ||
| 1725 | * set_hwpoison_free_buddy_page() highly likely fails. | ||
| 1726 | */ | ||
| 1727 | mt = get_pageblock_migratetype(page); | ||
| 1728 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | ||
| 1718 | if (PageHuge(page)) | 1729 | if (PageHuge(page)) |
| 1719 | ret = soft_offline_huge_page(page, flags); | 1730 | ret = soft_offline_huge_page(page, flags); |
| 1720 | else | 1731 | else |
| 1721 | ret = __soft_offline_page(page, flags); | 1732 | ret = __soft_offline_page(page, flags); |
| 1722 | 1733 | set_pageblock_migratetype(page, mt); | |
| 1723 | return ret; | 1734 | return ret; |
| 1724 | } | 1735 | } |
| 1725 | 1736 | ||
| 1726 | static void soft_offline_free_page(struct page *page) | 1737 | static int soft_offline_free_page(struct page *page) |
| 1727 | { | 1738 | { |
| 1728 | int rc = 0; | 1739 | int rc = 0; |
| 1729 | struct page *head = compound_head(page); | 1740 | struct page *head = compound_head(page); |
| 1730 | 1741 | ||
| 1731 | if (PageHuge(head)) | 1742 | if (PageHuge(head)) |
| 1732 | rc = dissolve_free_huge_page(page); | 1743 | rc = dissolve_free_huge_page(page); |
| 1733 | if (!rc && !TestSetPageHWPoison(page)) | 1744 | if (!rc) { |
| 1734 | num_poisoned_pages_inc(); | 1745 | if (set_hwpoison_free_buddy_page(page)) |
| 1746 | num_poisoned_pages_inc(); | ||
| 1747 | else | ||
| 1748 | rc = -EBUSY; | ||
| 1749 | } | ||
| 1750 | return rc; | ||
| 1735 | } | 1751 | } |
| 1736 | 1752 | ||
| 1737 | /** | 1753 | /** |
| @@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags) | |||
| 1775 | if (ret > 0) | 1791 | if (ret > 0) |
| 1776 | ret = soft_offline_in_use_page(page, flags); | 1792 | ret = soft_offline_in_use_page(page, flags); |
| 1777 | else if (ret == 0) | 1793 | else if (ret == 0) |
| 1778 | soft_offline_free_page(page); | 1794 | ret = soft_offline_free_page(page); |
| 1779 | 1795 | ||
| 1780 | return ret; | 1796 | return ret; |
| 1781 | } | 1797 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 91a99457127c..d6a2e89b086a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -1212,7 +1212,7 @@ out: | |||
| 1212 | * intentionally. Although it's rather weird, | 1212 | * intentionally. Although it's rather weird, |
| 1213 | * it's how HWPoison flag works at the moment. | 1213 | * it's how HWPoison flag works at the moment. |
| 1214 | */ | 1214 | */ |
| 1215 | if (!test_set_page_hwpoison(page)) | 1215 | if (set_hwpoison_free_buddy_page(page)) |
| 1216 | num_poisoned_pages_inc(); | 1216 | num_poisoned_pages_inc(); |
| 1217 | } | 1217 | } |
| 1218 | } else { | 1218 | } else { |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c677c1506d73..e75865d58ba7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page) | |||
| 8096 | 8096 | ||
| 8097 | return order < MAX_ORDER; | 8097 | return order < MAX_ORDER; |
| 8098 | } | 8098 | } |
| 8099 | |||
| 8100 | #ifdef CONFIG_MEMORY_FAILURE | ||
| 8101 | /* | ||
| 8102 | * Set PG_hwpoison flag if a given page is confirmed to be a free page. This | ||
| 8103 | * test is performed under the zone lock to prevent a race against page | ||
| 8104 | * allocation. | ||
| 8105 | */ | ||
| 8106 | bool set_hwpoison_free_buddy_page(struct page *page) | ||
| 8107 | { | ||
| 8108 | struct zone *zone = page_zone(page); | ||
| 8109 | unsigned long pfn = page_to_pfn(page); | ||
| 8110 | unsigned long flags; | ||
| 8111 | unsigned int order; | ||
| 8112 | bool hwpoisoned = false; | ||
| 8113 | |||
| 8114 | spin_lock_irqsave(&zone->lock, flags); | ||
| 8115 | for (order = 0; order < MAX_ORDER; order++) { | ||
| 8116 | struct page *page_head = page - (pfn & ((1 << order) - 1)); | ||
| 8117 | |||
| 8118 | if (PageBuddy(page_head) && page_order(page_head) >= order) { | ||
| 8119 | if (!TestSetPageHWPoison(page)) | ||
| 8120 | hwpoisoned = true; | ||
| 8121 | break; | ||
| 8122 | } | ||
| 8123 | } | ||
| 8124 | spin_unlock_irqrestore(&zone->lock, flags); | ||
| 8125 | |||
| 8126 | return hwpoisoned; | ||
| 8127 | } | ||
| 8128 | #endif | ||
