diff options
author | Mel Gorman <mgorman@suse.de> | 2014-10-02 14:47:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-02 14:57:18 -0400 |
commit | abc40bd2eeb77eb7c2effcaf63154aad929a1d5f (patch) | |
tree | ee3cddc97fad0654fdc36e3052c0598ae370e9fc /mm | |
parent | d3cb8bf6081b8b7a2dabb1264fe968fd870fa595 (diff) |
mm: numa: Do not mark PTEs pte_numa when splitting huge pages
This patch reverts 1ba6e0b50b ("mm: numa: split_huge_page: transfer the
NUMA type from the pmd to the pte"). If a huge page is being split due
a protection change and the tail will be in a PROT_NONE vma then NUMA
hinting PTEs are temporarily created in the protected VMA.
VM_RW|VM_PROTNONE
|-----------------|
^
split here
In the specific case above, it should get fixed up by change_pte_range()
but there is a window of opportunity for weirdness to happen. Similarly,
if a huge page is shrunk and split during a protection update but before
pmd_numa is cleared then a pte_numa can be left behind.
Instead of adding complexity trying to deal with the case, this patch
will not mark PTEs NUMA when splitting a huge page. NUMA hinting faults
will not be triggered which is marginal in comparison to the complexity
in dealing with the corner cases during THP split.
Cc: stable@vger.kernel.org
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d9a21d06b862..f8ffd9412ec5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1795,14 +1795,17 @@ static int __split_huge_page_map(struct page *page, | |||
1795 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | 1795 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { |
1796 | pte_t *pte, entry; | 1796 | pte_t *pte, entry; |
1797 | BUG_ON(PageCompound(page+i)); | 1797 | BUG_ON(PageCompound(page+i)); |
1798 | /* | ||
1799 | * Note that pmd_numa is not transferred deliberately | ||
1800 | * to avoid any possibility that pte_numa leaks to | ||
1801 | * a PROT_NONE VMA by accident. | ||
1802 | */ | ||
1798 | entry = mk_pte(page + i, vma->vm_page_prot); | 1803 | entry = mk_pte(page + i, vma->vm_page_prot); |
1799 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1804 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
1800 | if (!pmd_write(*pmd)) | 1805 | if (!pmd_write(*pmd)) |
1801 | entry = pte_wrprotect(entry); | 1806 | entry = pte_wrprotect(entry); |
1802 | if (!pmd_young(*pmd)) | 1807 | if (!pmd_young(*pmd)) |
1803 | entry = pte_mkold(entry); | 1808 | entry = pte_mkold(entry); |
1804 | if (pmd_numa(*pmd)) | ||
1805 | entry = pte_mknuma(entry); | ||
1806 | pte = pte_offset_map(&_pmd, haddr); | 1809 | pte = pte_offset_map(&_pmd, haddr); |
1807 | BUG_ON(!pte_none(*pte)); | 1810 | BUG_ON(!pte_none(*pte)); |
1808 | set_pte_at(mm, haddr, pte, entry); | 1811 | set_pte_at(mm, haddr, pte, entry); |