aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2018-08-23 20:00:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-23 21:48:43 -0400
commitd4ae9916ea2947341180d2b538f48875ff393a86 (patch)
treee478e5e6e84de25fade8f2995c9ee49cacd5a488 /mm
parent6bc9b56433b76e40d11099338d27fbc5cd2935ca (diff)
mm: soft-offline: close the race against page allocation
A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to allocate a page that was just freed on the way of soft-offline. This is undesirable because soft-offline (which is about corrected error) is less aggressive than hard-offline (which is about uncorrected error), and we can make soft-offline fail and keep using the page for good reason like "system is busy." Two main changes of this patch are: - setting migrate type of the target page to MIGRATE_ISOLATE. As done in free_unref_page_commit(), this makes kernel bypass pcplist when freeing the page. So we can assume that the page is in freelist just after put_page() returns, - setting PG_hwpoison on free page under zone->lock which protects freelists, so this allows us to avoid setting PG_hwpoison on a page that is decided to be allocated soon. [akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment] Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com> Tested-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: <zy.zhengyi@alibaba-inc.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memory-failure.c26
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/page_alloc.c30
3 files changed, 52 insertions, 6 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 49dc32c61137..192d0bbfc9ea 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -57,6 +57,7 @@
57#include <linux/mm_inline.h> 57#include <linux/mm_inline.h>
58#include <linux/kfifo.h> 58#include <linux/kfifo.h>
59#include <linux/ratelimit.h> 59#include <linux/ratelimit.h>
60#include <linux/page-isolation.h>
60#include "internal.h" 61#include "internal.h"
61#include "ras/ras_event.h" 62#include "ras/ras_event.h"
62 63
@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags)
1697static int soft_offline_in_use_page(struct page *page, int flags) 1698static int soft_offline_in_use_page(struct page *page, int flags)
1698{ 1699{
1699 int ret; 1700 int ret;
1701 int mt;
1700 struct page *hpage = compound_head(page); 1702 struct page *hpage = compound_head(page);
1701 1703
1702 if (!PageHuge(page) && PageTransHuge(hpage)) { 1704 if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags)
1715 put_hwpoison_page(hpage); 1717 put_hwpoison_page(hpage);
1716 } 1718 }
1717 1719
1720 /*
1721 * Setting MIGRATE_ISOLATE here ensures that the page will be linked
1722 * to free list immediately (not via pcplist) when released after
1723 * successful page migration. Otherwise we can't guarantee that the
1724 * page is really free after put_page() returns, so
1725 * set_hwpoison_free_buddy_page() highly likely fails.
1726 */
1727 mt = get_pageblock_migratetype(page);
1728 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
1718 if (PageHuge(page)) 1729 if (PageHuge(page))
1719 ret = soft_offline_huge_page(page, flags); 1730 ret = soft_offline_huge_page(page, flags);
1720 else 1731 else
1721 ret = __soft_offline_page(page, flags); 1732 ret = __soft_offline_page(page, flags);
1722 1733 set_pageblock_migratetype(page, mt);
1723 return ret; 1734 return ret;
1724} 1735}
1725 1736
1726static void soft_offline_free_page(struct page *page) 1737static int soft_offline_free_page(struct page *page)
1727{ 1738{
1728 int rc = 0; 1739 int rc = 0;
1729 struct page *head = compound_head(page); 1740 struct page *head = compound_head(page);
1730 1741
1731 if (PageHuge(head)) 1742 if (PageHuge(head))
1732 rc = dissolve_free_huge_page(page); 1743 rc = dissolve_free_huge_page(page);
1733 if (!rc && !TestSetPageHWPoison(page)) 1744 if (!rc) {
1734 num_poisoned_pages_inc(); 1745 if (set_hwpoison_free_buddy_page(page))
1746 num_poisoned_pages_inc();
1747 else
1748 rc = -EBUSY;
1749 }
1750 return rc;
1735} 1751}
1736 1752
1737/** 1753/**
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags)
1775 if (ret > 0) 1791 if (ret > 0)
1776 ret = soft_offline_in_use_page(page, flags); 1792 ret = soft_offline_in_use_page(page, flags);
1777 else if (ret == 0) 1793 else if (ret == 0)
1778 soft_offline_free_page(page); 1794 ret = soft_offline_free_page(page);
1779 1795
1780 return ret; 1796 return ret;
1781} 1797}
diff --git a/mm/migrate.c b/mm/migrate.c
index 91a99457127c..d6a2e89b086a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1212,7 +1212,7 @@ out:
1212 * intentionally. Although it's rather weird, 1212 * intentionally. Although it's rather weird,
1213 * it's how HWPoison flag works at the moment. 1213 * it's how HWPoison flag works at the moment.
1214 */ 1214 */
1215 if (!test_set_page_hwpoison(page)) 1215 if (set_hwpoison_free_buddy_page(page))
1216 num_poisoned_pages_inc(); 1216 num_poisoned_pages_inc();
1217 } 1217 }
1218 } else { 1218 } else {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c677c1506d73..e75865d58ba7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page)
8096 8096
8097 return order < MAX_ORDER; 8097 return order < MAX_ORDER;
8098} 8098}
8099
8100#ifdef CONFIG_MEMORY_FAILURE
8101/*
8102 * Set PG_hwpoison flag if a given page is confirmed to be a free page. This
8103 * test is performed under the zone lock to prevent a race against page
8104 * allocation.
8105 */
8106bool set_hwpoison_free_buddy_page(struct page *page)
8107{
8108 struct zone *zone = page_zone(page);
8109 unsigned long pfn = page_to_pfn(page);
8110 unsigned long flags;
8111 unsigned int order;
8112 bool hwpoisoned = false;
8113
8114 spin_lock_irqsave(&zone->lock, flags);
8115 for (order = 0; order < MAX_ORDER; order++) {
8116 struct page *page_head = page - (pfn & ((1 << order) - 1));
8117
8118 if (PageBuddy(page_head) && page_order(page_head) >= order) {
8119 if (!TestSetPageHWPoison(page))
8120 hwpoisoned = true;
8121 break;
8122 }
8123 }
8124 spin_unlock_irqrestore(&zone->lock, flags);
8125
8126 return hwpoisoned;
8127}
8128#endif