diff options
author | Minchan Kim <minchan@kernel.org> | 2017-11-15 20:33:07 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 21:21:02 -0500 |
commit | 0bcac06f27d7528591c27ac2b093ccd71c5d0168 (patch) | |
tree | b8261d05b232e57a58a7fe90306cad5d545de900 /mm/memory.c | |
parent | 539a6fea7fdcade532bd3e77be2862a683f8f0c9 (diff) |
mm, swap: skip swapcache for swapin of synchronous device
With fast swap storage, the platforms want to use swap more aggressively
and swap-in is crucial to application latency.
The rw_page() based synchronous devices like zram, pmem and btt are such
fast storage. When I profile swapin performance with zram lz4
decompress test, S/W overhead is more than 70%. Maybe, it would be
bigger in nvdimm.
This patch aims to reduce swap-in latency by skipping swapcache if the
swap device is synchronous device like rw_page based device. It
enhances 45% my swapin test(5G sequential swapin, no readahead, from
2.41sec to 1.64sec).
Link: http://lkml.kernel.org/r/1505886205-9671-5-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 52 |
1 files changed, 36 insertions, 16 deletions
diff --git a/mm/memory.c b/mm/memory.c index cae514e7dcfc..f75bff2cf662 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2842,7 +2842,7 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
2842 | int do_swap_page(struct vm_fault *vmf) | 2842 | int do_swap_page(struct vm_fault *vmf) |
2843 | { | 2843 | { |
2844 | struct vm_area_struct *vma = vmf->vma; | 2844 | struct vm_area_struct *vma = vmf->vma; |
2845 | struct page *page = NULL, *swapcache; | 2845 | struct page *page = NULL, *swapcache = NULL; |
2846 | struct mem_cgroup *memcg; | 2846 | struct mem_cgroup *memcg; |
2847 | struct vma_swap_readahead swap_ra; | 2847 | struct vma_swap_readahead swap_ra; |
2848 | swp_entry_t entry; | 2848 | swp_entry_t entry; |
@@ -2881,17 +2881,35 @@ int do_swap_page(struct vm_fault *vmf) | |||
2881 | } | 2881 | } |
2882 | goto out; | 2882 | goto out; |
2883 | } | 2883 | } |
2884 | |||
2885 | |||
2884 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); | 2886 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); |
2885 | if (!page) | 2887 | if (!page) |
2886 | page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, | 2888 | page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, |
2887 | vmf->address); | 2889 | vmf->address); |
2888 | if (!page) { | 2890 | if (!page) { |
2889 | if (vma_readahead) | 2891 | struct swap_info_struct *si = swp_swap_info(entry); |
2890 | page = do_swap_page_readahead(entry, | 2892 | |
2891 | GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); | 2893 | if (!(si->flags & SWP_SYNCHRONOUS_IO)) { |
2892 | else | 2894 | if (vma_readahead) |
2893 | page = swapin_readahead(entry, | 2895 | page = do_swap_page_readahead(entry, |
2894 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); | 2896 | GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); |
2897 | else | ||
2898 | page = swapin_readahead(entry, | ||
2899 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); | ||
2900 | swapcache = page; | ||
2901 | } else { | ||
2902 | /* skip swapcache */ | ||
2903 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); | ||
2904 | if (page) { | ||
2905 | __SetPageLocked(page); | ||
2906 | __SetPageSwapBacked(page); | ||
2907 | set_page_private(page, entry.val); | ||
2908 | lru_cache_add_anon(page); | ||
2909 | swap_readpage(page, true); | ||
2910 | } | ||
2911 | } | ||
2912 | |||
2895 | if (!page) { | 2913 | if (!page) { |
2896 | /* | 2914 | /* |
2897 | * Back out if somebody else faulted in this pte | 2915 | * Back out if somebody else faulted in this pte |
@@ -2920,7 +2938,6 @@ int do_swap_page(struct vm_fault *vmf) | |||
2920 | goto out_release; | 2938 | goto out_release; |
2921 | } | 2939 | } |
2922 | 2940 | ||
2923 | swapcache = page; | ||
2924 | locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); | 2941 | locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); |
2925 | 2942 | ||
2926 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2943 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
@@ -2935,7 +2952,8 @@ int do_swap_page(struct vm_fault *vmf) | |||
2935 | * test below, are not enough to exclude that. Even if it is still | 2952 | * test below, are not enough to exclude that. Even if it is still |
2936 | * swapcache, we need to check that the page's swap has not changed. | 2953 | * swapcache, we need to check that the page's swap has not changed. |
2937 | */ | 2954 | */ |
2938 | if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) | 2955 | if (unlikely((!PageSwapCache(page) || |
2956 | page_private(page) != entry.val)) && swapcache) | ||
2939 | goto out_page; | 2957 | goto out_page; |
2940 | 2958 | ||
2941 | page = ksm_might_need_to_copy(page, vma, vmf->address); | 2959 | page = ksm_might_need_to_copy(page, vma, vmf->address); |
@@ -2988,14 +3006,16 @@ int do_swap_page(struct vm_fault *vmf) | |||
2988 | pte = pte_mksoft_dirty(pte); | 3006 | pte = pte_mksoft_dirty(pte); |
2989 | set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); | 3007 | set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); |
2990 | vmf->orig_pte = pte; | 3008 | vmf->orig_pte = pte; |
2991 | if (page == swapcache) { | 3009 | |
2992 | do_page_add_anon_rmap(page, vma, vmf->address, exclusive); | 3010 | /* ksm created a completely new copy */ |
2993 | mem_cgroup_commit_charge(page, memcg, true, false); | 3011 | if (unlikely(page != swapcache && swapcache)) { |
2994 | activate_page(page); | ||
2995 | } else { /* ksm created a completely new copy */ | ||
2996 | page_add_new_anon_rmap(page, vma, vmf->address, false); | 3012 | page_add_new_anon_rmap(page, vma, vmf->address, false); |
2997 | mem_cgroup_commit_charge(page, memcg, false, false); | 3013 | mem_cgroup_commit_charge(page, memcg, false, false); |
2998 | lru_cache_add_active_or_unevictable(page, vma); | 3014 | lru_cache_add_active_or_unevictable(page, vma); |
3015 | } else { | ||
3016 | do_page_add_anon_rmap(page, vma, vmf->address, exclusive); | ||
3017 | mem_cgroup_commit_charge(page, memcg, true, false); | ||
3018 | activate_page(page); | ||
2999 | } | 3019 | } |
3000 | 3020 | ||
3001 | swap_free(entry); | 3021 | swap_free(entry); |
@@ -3003,7 +3023,7 @@ int do_swap_page(struct vm_fault *vmf) | |||
3003 | (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 3023 | (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
3004 | try_to_free_swap(page); | 3024 | try_to_free_swap(page); |
3005 | unlock_page(page); | 3025 | unlock_page(page); |
3006 | if (page != swapcache) { | 3026 | if (page != swapcache && swapcache) { |
3007 | /* | 3027 | /* |
3008 | * Hold the lock to avoid the swap entry to be reused | 3028 | * Hold the lock to avoid the swap entry to be reused |
3009 | * until we take the PT lock for the pte_same() check | 3029 | * until we take the PT lock for the pte_same() check |
@@ -3036,7 +3056,7 @@ out_page: | |||
3036 | unlock_page(page); | 3056 | unlock_page(page); |
3037 | out_release: | 3057 | out_release: |
3038 | put_page(page); | 3058 | put_page(page); |
3039 | if (page != swapcache) { | 3059 | if (page != swapcache && swapcache) { |
3040 | unlock_page(swapcache); | 3060 | unlock_page(swapcache); |
3041 | put_page(swapcache); | 3061 | put_page(swapcache); |
3042 | } | 3062 | } |