aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2016-08-25 18:16:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-26 20:39:35 -0400
commit804dd150468cfd920d92d4b3cf00536fedef3902 (patch)
tree4c95f7d6c52df318a9c164b9b3541a8daca1ddc4 /mm
parente7d316a02f683864a12389f8808570e37fb90aa3 (diff)
soft_dirty: fix soft_dirty during THP split
While adding proper userfaultfd_wp support with bits in pagetable and swap entry to avoid false positives WP userfaults through swap/fork/ KSM/etc, I've been adding a framework that mostly mirrors soft dirty. So I noticed in one place I had to add uffd_wp support to the pagetables that wasn't covered by soft_dirty and I think it should have. Example: in the THP migration code migrate_misplaced_transhuge_page() pmd_mkdirty is called unconditionally after mk_huge_pmd. entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); That sets soft dirty too (it's a false positive for soft dirty, the soft dirty bit could be more finegrained and transfer the bit like uffd_wp will do.. pmd/pte_uffd_wp() enforces the invariant that when it's set pmd/pte_write is not set). However in the THP split there's no unconditional pmd_mkdirty after mk_huge_pmd and pte_swp_mksoft_dirty isn't called after the migration entry is created. The code sets the dirty bit in the struct page instead of setting it in the pagetable (which is fully equivalent as far as the real dirty bit is concerned, as the whole point of pagetable bits is to be eventually flushed out of to the page, but that is not equivalent for the soft-dirty bit that gets lost in translation). This was found by code review only and totally untested as I'm working to actually replace soft dirty and I don't have time to test potential soft dirty bugfixes as well :). Transfer the soft_dirty from pmd to pte during THP splits. This fix avoids losing the soft_dirty bit and avoids userland memory corruption in the checkpoint. Fixes: eef1b3ba053aa6 ("thp: implement split_huge_pmd()") Link: http://lkml.kernel.org/r/1471610515-30229-2-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Pavel Emelyanov <xemul@virtuozzo.com> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c7
1 files changed, 6 insertions, 1 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2373f0a7d340..2db2112aa31e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1512,7 +1512,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
1512 struct page *page; 1512 struct page *page;
1513 pgtable_t pgtable; 1513 pgtable_t pgtable;
1514 pmd_t _pmd; 1514 pmd_t _pmd;
1515 bool young, write, dirty; 1515 bool young, write, dirty, soft_dirty;
1516 unsigned long addr; 1516 unsigned long addr;
1517 int i; 1517 int i;
1518 1518
@@ -1546,6 +1546,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
1546 write = pmd_write(*pmd); 1546 write = pmd_write(*pmd);
1547 young = pmd_young(*pmd); 1547 young = pmd_young(*pmd);
1548 dirty = pmd_dirty(*pmd); 1548 dirty = pmd_dirty(*pmd);
1549 soft_dirty = pmd_soft_dirty(*pmd);
1549 1550
1550 pmdp_huge_split_prepare(vma, haddr, pmd); 1551 pmdp_huge_split_prepare(vma, haddr, pmd);
1551 pgtable = pgtable_trans_huge_withdraw(mm, pmd); 1552 pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1562,6 +1563,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
1562 swp_entry_t swp_entry; 1563 swp_entry_t swp_entry;
1563 swp_entry = make_migration_entry(page + i, write); 1564 swp_entry = make_migration_entry(page + i, write);
1564 entry = swp_entry_to_pte(swp_entry); 1565 entry = swp_entry_to_pte(swp_entry);
1566 if (soft_dirty)
1567 entry = pte_swp_mksoft_dirty(entry);
1565 } else { 1568 } else {
1566 entry = mk_pte(page + i, vma->vm_page_prot); 1569 entry = mk_pte(page + i, vma->vm_page_prot);
1567 entry = maybe_mkwrite(entry, vma); 1570 entry = maybe_mkwrite(entry, vma);
@@ -1569,6 +1572,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
1569 entry = pte_wrprotect(entry); 1572 entry = pte_wrprotect(entry);
1570 if (!young) 1573 if (!young)
1571 entry = pte_mkold(entry); 1574 entry = pte_mkold(entry);
1575 if (soft_dirty)
1576 entry = pte_mksoft_dirty(entry);
1572 } 1577 }
1573 if (dirty) 1578 if (dirty)
1574 SetPageDirty(page + i); 1579 SetPageDirty(page + i);