aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2017-11-26 22:21:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-27 15:26:29 -0500
commit152e93af3cfe2d29d8136cc0a02a8612507136ee (patch)
tree19bd28f0ea6af08ba14ae4bfd841b5256f888ee7
parenta8f97366452ed491d13cf1e44241bc0b5740b1f0 (diff)
mm, thp: Do not make pmd/pud dirty without a reason
Currently we make page table entries dirty all the time regardless of access type and don't even consider if the mapping is write-protected. The reasoning is that we don't really need dirty tracking on THP and making the entry dirty upfront may save some time on first write to the page. Unfortunately, such approach may result in false-positive can_follow_write_pmd() for huge zero page or read-only shmem file. Let's only make page dirty only if we about to write to the page anyway (as we do for small pages). I've restructured the code to make entry dirty inside maybe_p[mu]d_mkwrite(). It also takes into account if the vma is write-protected. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/huge_memory.c31
-rw-r--r--mm/internal.h3
-rw-r--r--mm/khugepaged.c2
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c2
5 files changed, 24 insertions, 16 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0e7ded98d114..f22401fd83b5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -474,10 +474,13 @@ out:
474} 474}
475__setup("transparent_hugepage=", setup_transparent_hugepage); 475__setup("transparent_hugepage=", setup_transparent_hugepage);
476 476
477pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) 477pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty)
478{ 478{
479 if (likely(vma->vm_flags & VM_WRITE)) 479 if (likely(vma->vm_flags & VM_WRITE)) {
480 pmd = pmd_mkwrite(pmd); 480 pmd = pmd_mkwrite(pmd);
481 if (dirty)
482 pmd = pmd_mkdirty(pmd);
483 }
481 return pmd; 484 return pmd;
482} 485}
483 486
@@ -599,7 +602,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
599 } 602 }
600 603
601 entry = mk_huge_pmd(page, vma->vm_page_prot); 604 entry = mk_huge_pmd(page, vma->vm_page_prot);
602 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 605 entry = maybe_pmd_mkwrite(entry, vma, true);
603 page_add_new_anon_rmap(page, vma, haddr, true); 606 page_add_new_anon_rmap(page, vma, haddr, true);
604 mem_cgroup_commit_charge(page, memcg, false, true); 607 mem_cgroup_commit_charge(page, memcg, false, true);
605 lru_cache_add_active_or_unevictable(page, vma); 608 lru_cache_add_active_or_unevictable(page, vma);
@@ -741,8 +744,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
741 if (pfn_t_devmap(pfn)) 744 if (pfn_t_devmap(pfn))
742 entry = pmd_mkdevmap(entry); 745 entry = pmd_mkdevmap(entry);
743 if (write) { 746 if (write) {
744 entry = pmd_mkyoung(pmd_mkdirty(entry)); 747 entry = pmd_mkyoung(entry);
745 entry = maybe_pmd_mkwrite(entry, vma); 748 entry = maybe_pmd_mkwrite(entry, vma, true);
746 } 749 }
747 750
748 if (pgtable) { 751 if (pgtable) {
@@ -788,10 +791,14 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
788EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); 791EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
789 792
790#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 793#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
791static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) 794static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma,
795 bool dirty)
792{ 796{
793 if (likely(vma->vm_flags & VM_WRITE)) 797 if (likely(vma->vm_flags & VM_WRITE)) {
794 pud = pud_mkwrite(pud); 798 pud = pud_mkwrite(pud);
799 if (dirty)
800 pud = pud_mkdirty(pud);
801 }
795 return pud; 802 return pud;
796} 803}
797 804
@@ -807,8 +814,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
807 if (pfn_t_devmap(pfn)) 814 if (pfn_t_devmap(pfn))
808 entry = pud_mkdevmap(entry); 815 entry = pud_mkdevmap(entry);
809 if (write) { 816 if (write) {
810 entry = pud_mkyoung(pud_mkdirty(entry)); 817 entry = pud_mkyoung(entry);
811 entry = maybe_pud_mkwrite(entry, vma); 818 entry = maybe_pud_mkwrite(entry, vma, true);
812 } 819 }
813 set_pud_at(mm, addr, pud, entry); 820 set_pud_at(mm, addr, pud, entry);
814 update_mmu_cache_pud(vma, addr, pud); 821 update_mmu_cache_pud(vma, addr, pud);
@@ -1279,7 +1286,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
1279 if (reuse_swap_page(page, NULL)) { 1286 if (reuse_swap_page(page, NULL)) {
1280 pmd_t entry; 1287 pmd_t entry;
1281 entry = pmd_mkyoung(orig_pmd); 1288 entry = pmd_mkyoung(orig_pmd);
1282 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1289 entry = maybe_pmd_mkwrite(entry, vma, true);
1283 if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) 1290 if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))
1284 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); 1291 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
1285 ret |= VM_FAULT_WRITE; 1292 ret |= VM_FAULT_WRITE;
@@ -1349,7 +1356,7 @@ alloc:
1349 } else { 1356 } else {
1350 pmd_t entry; 1357 pmd_t entry;
1351 entry = mk_huge_pmd(new_page, vma->vm_page_prot); 1358 entry = mk_huge_pmd(new_page, vma->vm_page_prot);
1352 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1359 entry = maybe_pmd_mkwrite(entry, vma, true);
1353 pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); 1360 pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
1354 page_add_new_anon_rmap(new_page, vma, haddr, true); 1361 page_add_new_anon_rmap(new_page, vma, haddr, true);
1355 mem_cgroup_commit_charge(new_page, memcg, false, true); 1362 mem_cgroup_commit_charge(new_page, memcg, false, true);
@@ -2928,7 +2935,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
2928 if (pmd_swp_soft_dirty(*pvmw->pmd)) 2935 if (pmd_swp_soft_dirty(*pvmw->pmd))
2929 pmde = pmd_mksoft_dirty(pmde); 2936 pmde = pmd_mksoft_dirty(pmde);
2930 if (is_write_migration_entry(entry)) 2937 if (is_write_migration_entry(entry))
2931 pmde = maybe_pmd_mkwrite(pmde, vma); 2938 pmde = maybe_pmd_mkwrite(pmde, vma, false);
2932 2939
2933 flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); 2940 flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
2934 page_add_anon_rmap(new, vma, mmun_start, true); 2941 page_add_anon_rmap(new, vma, mmun_start, true);
diff --git a/mm/internal.h b/mm/internal.h
index e6bd35182dae..b35cdebda0ce 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -328,7 +328,8 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
328 } 328 }
329} 329}
330 330
331extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); 331extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma,
332 bool dirty);
332 333
333/* 334/*
334 * At what user virtual address is page expected in @vma? 335 * At what user virtual address is page expected in @vma?
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ea4ff259b671..db43dc8a8ae6 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1057 pgtable = pmd_pgtable(_pmd); 1057 pgtable = pmd_pgtable(_pmd);
1058 1058
1059 _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); 1059 _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
1060 _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); 1060 _pmd = maybe_pmd_mkwrite(_pmd, vma, false);
1061 1061
1062 /* 1062 /*
1063 * spin_lock() below is not the equivalent of smp_wmb(), so 1063 * spin_lock() below is not the equivalent of smp_wmb(), so
diff --git a/mm/memory.c b/mm/memory.c
index 85e7a87da79f..b10c1d26f675 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
3335 3335
3336 entry = mk_huge_pmd(page, vma->vm_page_prot); 3336 entry = mk_huge_pmd(page, vma->vm_page_prot);
3337 if (write) 3337 if (write)
3338 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 3338 entry = maybe_pmd_mkwrite(entry, vma, true);
3339 3339
3340 add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); 3340 add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
3341 page_add_file_rmap(page, true); 3341 page_add_file_rmap(page, true);
diff --git a/mm/migrate.c b/mm/migrate.c
index 4d0be47a322a..57865fc8cfe3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
2068 } 2068 }
2069 2069
2070 entry = mk_huge_pmd(new_page, vma->vm_page_prot); 2070 entry = mk_huge_pmd(new_page, vma->vm_page_prot);
2071 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 2071 entry = maybe_pmd_mkwrite(entry, vma, false);
2072 2072
2073 /* 2073 /*
2074 * Clear the old entry under pagetable lock and establish the new PTE. 2074 * Clear the old entry under pagetable lock and establish the new PTE.