aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-05-20 19:58:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 20:58:30 -0400
commit5c0a85fad949212b3e059692deecdeed74ae7ec7 (patch)
treeb6fa0d070d4cb85fcf0e6d578edbc074e999dbe9
parent4b50bcc7eda4d3cc9e3f2a0aa60e590fedf728c5 (diff)
mm: make faultaround produce old ptes
Currently, faultaround code produces young pte. This can screw up vmscan behaviour[1], as it makes vmscan think that these pages are hot and not push them out on first round. During sparse file access faultaround gets more pages mapped and all of them are young. Under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Modify faultaround to produce old ptes, so they can easily be reclaimed under memory pressure. This can to some extend defeat the purpose of faultaround on machines without hardware accessed bit as it will not help us with reducing the number of minor page faults. We may want to disable faultaround on such machines altogether, but that's subject for separate patchset. Minchan: "I tested 512M mmap sequential word read test on non-HW access bit system (i.e., ARM) and confirmed it doesn't increase minor fault any more. old: 4096 fault_around minor fault: 131291 elapsed time: 6747645 usec new: 65536 fault_around minor fault: 131291 elapsed time: 6709263 usec 0.56% benefit" [1] https://lkml.kernel.org/r/1460992636-711-1-git-send-email-vinmenon@codeaurora.org Link: http://lkml.kernel.org/r/1463488366-47723-1-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Minchan Kim <minchan@kernel.org> Tested-by: Minchan Kim <minchan@kernel.org> Acked-by: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vinayak Menon <vinmenon@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memory.c23
3 files changed, 20 insertions, 7 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fbdb9d40847f..f223ac26b5d9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -596,7 +596,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
596} 596}
597 597
598void do_set_pte(struct vm_area_struct *vma, unsigned long address, 598void do_set_pte(struct vm_area_struct *vma, unsigned long address,
599 struct page *page, pte_t *pte, bool write, bool anon); 599 struct page *page, pte_t *pte, bool write, bool anon, bool old);
600#endif 600#endif
601 601
602/* 602/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 8f4859989f1b..b418405903bc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2191,7 +2191,7 @@ repeat:
2191 if (file->f_ra.mmap_miss > 0) 2191 if (file->f_ra.mmap_miss > 0)
2192 file->f_ra.mmap_miss--; 2192 file->f_ra.mmap_miss--;
2193 addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; 2193 addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
2194 do_set_pte(vma, addr, page, pte, false, false); 2194 do_set_pte(vma, addr, page, pte, false, false, true);
2195 unlock_page(page); 2195 unlock_page(page);
2196 goto next; 2196 goto next;
2197unlock: 2197unlock:
diff --git a/mm/memory.c b/mm/memory.c
index 007c72ad03f6..f29e5ab0342d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2876,7 +2876,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2876 * vm_ops->map_pages. 2876 * vm_ops->map_pages.
2877 */ 2877 */
2878void do_set_pte(struct vm_area_struct *vma, unsigned long address, 2878void do_set_pte(struct vm_area_struct *vma, unsigned long address,
2879 struct page *page, pte_t *pte, bool write, bool anon) 2879 struct page *page, pte_t *pte, bool write, bool anon, bool old)
2880{ 2880{
2881 pte_t entry; 2881 pte_t entry;
2882 2882
@@ -2884,6 +2884,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
2884 entry = mk_pte(page, vma->vm_page_prot); 2884 entry = mk_pte(page, vma->vm_page_prot);
2885 if (write) 2885 if (write)
2886 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2886 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2887 if (old)
2888 entry = pte_mkold(entry);
2887 if (anon) { 2889 if (anon) {
2888 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); 2890 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
2889 page_add_new_anon_rmap(page, vma, address, false); 2891 page_add_new_anon_rmap(page, vma, address, false);
@@ -3021,9 +3023,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3021 */ 3023 */
3022 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { 3024 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
3023 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 3025 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
3024 do_fault_around(vma, address, pte, pgoff, flags);
3025 if (!pte_same(*pte, orig_pte)) 3026 if (!pte_same(*pte, orig_pte))
3026 goto unlock_out; 3027 goto unlock_out;
3028 do_fault_around(vma, address, pte, pgoff, flags);
3029 /* Check if the fault is handled by faultaround */
3030 if (!pte_same(*pte, orig_pte)) {
3031 /*
3032 * Faultaround produce old pte, but the pte we've
3033 * handler fault for should be young.
3034 */
3035 pte_t entry = pte_mkyoung(*pte);
3036 if (ptep_set_access_flags(vma, address, pte, entry, 0))
3037 update_mmu_cache(vma, address, pte);
3038 goto unlock_out;
3039 }
3027 pte_unmap_unlock(pte, ptl); 3040 pte_unmap_unlock(pte, ptl);
3028 } 3041 }
3029 3042
@@ -3038,7 +3051,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3038 put_page(fault_page); 3051 put_page(fault_page);
3039 return ret; 3052 return ret;
3040 } 3053 }
3041 do_set_pte(vma, address, fault_page, pte, false, false); 3054 do_set_pte(vma, address, fault_page, pte, false, false, false);
3042 unlock_page(fault_page); 3055 unlock_page(fault_page);
3043unlock_out: 3056unlock_out:
3044 pte_unmap_unlock(pte, ptl); 3057 pte_unmap_unlock(pte, ptl);
@@ -3090,7 +3103,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3090 } 3103 }
3091 goto uncharge_out; 3104 goto uncharge_out;
3092 } 3105 }
3093 do_set_pte(vma, address, new_page, pte, true, true); 3106 do_set_pte(vma, address, new_page, pte, true, true, false);
3094 mem_cgroup_commit_charge(new_page, memcg, false, false); 3107 mem_cgroup_commit_charge(new_page, memcg, false, false);
3095 lru_cache_add_active_or_unevictable(new_page, vma); 3108 lru_cache_add_active_or_unevictable(new_page, vma);
3096 pte_unmap_unlock(pte, ptl); 3109 pte_unmap_unlock(pte, ptl);
@@ -3147,7 +3160,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3147 put_page(fault_page); 3160 put_page(fault_page);
3148 return ret; 3161 return ret;
3149 } 3162 }
3150 do_set_pte(vma, address, fault_page, pte, true, false); 3163 do_set_pte(vma, address, fault_page, pte, true, false, false);
3151 pte_unmap_unlock(pte, ptl); 3164 pte_unmap_unlock(pte, ptl);
3152 3165
3153 if (set_page_dirty(fault_page)) 3166 if (set_page_dirty(fault_page))