diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2015-08-06 18:47:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-06 21:39:42 -0400 |
commit | f4c18e6f7b5bbb5b528b3334115806b0d76f50f9 (patch) | |
tree | 08e934ca7a74761a3d88ef6c494b93284c918b0c | |
parent | 98ed2b0052e68420f1bad6c81e3f2600d25023e7 (diff) |
mm: check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_*
The race condition addressed in commit add05cecef80 ("mm: soft-offline:
don't free target page in successful page migration") was not closed
completely, because that can happen not only for soft-offline, but also
for hard-offline. Consider that a slab page is about to be freed into
buddy pool, and then an uncorrected memory error hits the page just
after entering __free_one_page(), then VM_BUG_ON_PAGE(page->flags &
PAGE_FLAGS_CHECK_AT_PREP) is triggered, despite the fact that it's not
necessary because the data on the affected page is not consumed.
To solve it, this patch drops __PG_HWPOISON from page flag checks at
allocation/free time. I think it's justified because __PG_HWPOISON
flags is defined to prevent the page from being reused, and setting it
outside the page's alloc-free cycle is a designed behavior (not a bug.)
For recent months, I was annoyed about BUG_ON when soft-offlined page
remains on lru cache list for a while, which is avoided by calling
put_page() instead of putback_lru_page() in page migration's success
path. This means that this patch reverts a major change from commit
add05cecef80 about the new refcounting rule of soft-offlined pages, so
"reuse window" revives. This will be closed by a subsequent patch.
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dean Nelson <dnelson@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/page-flags.h | 10 | ||||
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/migrate.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 |
4 files changed, 16 insertions, 10 deletions
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f34e040b34e9..41c93844fb1d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) | |||
631 | 1 << PG_private | 1 << PG_private_2 | \ | 631 | 1 << PG_private | 1 << PG_private_2 | \ |
632 | 1 << PG_writeback | 1 << PG_reserved | \ | 632 | 1 << PG_writeback | 1 << PG_reserved | \ |
633 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | 633 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ |
634 | 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ | 634 | 1 << PG_unevictable | __PG_MLOCKED | \ |
635 | __PG_COMPOUND_LOCK) | 635 | __PG_COMPOUND_LOCK) |
636 | 636 | ||
637 | /* | 637 | /* |
638 | * Flags checked when a page is prepped for return by the page allocator. | 638 | * Flags checked when a page is prepped for return by the page allocator. |
639 | * Pages being prepped should not have any flags set. It they are set, | 639 | * Pages being prepped should not have these flags set. It they are set, |
640 | * there has been a kernel bug or struct page corruption. | 640 | * there has been a kernel bug or struct page corruption. |
641 | * | ||
642 | * __PG_HWPOISON is exceptional because it needs to be kept beyond page's | ||
643 | * alloc-free cycle to prevent from reusing the page. | ||
641 | */ | 644 | */ |
642 | #define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) | 645 | #define PAGE_FLAGS_CHECK_AT_PREP \ |
646 | (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) | ||
643 | 647 | ||
644 | #define PAGE_FLAGS_PRIVATE \ | 648 | #define PAGE_FLAGS_PRIVATE \ |
645 | (1 << PG_private | 1 << PG_private_2) | 649 | (1 << PG_private | 1 << PG_private_2) |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c107094f79ba..097c7a4bfbd9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page, | |||
1676 | /* after clearing PageTail the gup refcount can be released */ | 1676 | /* after clearing PageTail the gup refcount can be released */ |
1677 | smp_mb__after_atomic(); | 1677 | smp_mb__after_atomic(); |
1678 | 1678 | ||
1679 | /* | 1679 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
1680 | * retain hwpoison flag of the poisoned tail page: | ||
1681 | * fix for the unsuitable process killed on Guest Machine(KVM) | ||
1682 | * by the memory-failure. | ||
1683 | */ | ||
1684 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; | ||
1685 | page_tail->flags |= (page->flags & | 1680 | page_tail->flags |= (page->flags & |
1686 | ((1L << PG_referenced) | | 1681 | ((1L << PG_referenced) | |
1687 | (1L << PG_swapbacked) | | 1682 | (1L << PG_swapbacked) | |
diff --git a/mm/migrate.c b/mm/migrate.c index ee401e4e5ef1..f2415be7d93b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -950,7 +950,10 @@ out: | |||
950 | list_del(&page->lru); | 950 | list_del(&page->lru); |
951 | dec_zone_page_state(page, NR_ISOLATED_ANON + | 951 | dec_zone_page_state(page, NR_ISOLATED_ANON + |
952 | page_is_file_cache(page)); | 952 | page_is_file_cache(page)); |
953 | if (reason != MR_MEMORY_FAILURE) | 953 | /* Soft-offlined page shouldn't go through lru cache list */ |
954 | if (reason == MR_MEMORY_FAILURE) | ||
955 | put_page(page); | ||
956 | else | ||
954 | putback_lru_page(page); | 957 | putback_lru_page(page); |
955 | } | 958 | } |
956 | 959 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb61f44eb3fc..beda41710802 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1296,6 +1296,10 @@ static inline int check_new_page(struct page *page) | |||
1296 | bad_reason = "non-NULL mapping"; | 1296 | bad_reason = "non-NULL mapping"; |
1297 | if (unlikely(atomic_read(&page->_count) != 0)) | 1297 | if (unlikely(atomic_read(&page->_count) != 0)) |
1298 | bad_reason = "nonzero _count"; | 1298 | bad_reason = "nonzero _count"; |
1299 | if (unlikely(page->flags & __PG_HWPOISON)) { | ||
1300 | bad_reason = "HWPoisoned (hardware-corrupted)"; | ||
1301 | bad_flags = __PG_HWPOISON; | ||
1302 | } | ||
1299 | if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { | 1303 | if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { |
1300 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; | 1304 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; |
1301 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; | 1305 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; |