aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2017-10-13 18:58:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-13 19:18:33 -0400
commit61b639723be5a9fc4812d5d85cb769589afa5a38 (patch)
tree7a7b92cdaa08c84af347f9ce015f13c49b0d1971
parenta7b100953aa33a5bbdc3e5e7f2241b9c0704606e (diff)
mm, swap: use page-cluster as max window of VMA based swap readahead
When the VMA based swap readahead was introduced, a new knob /sys/kernel/mm/swap/vma_ra_max_order was added as the max window of VMA swap readahead. This is to make it possible to use different max window for VMA based readahead and original physical readahead. But Minchan Kim pointed out that this will cause a regression because setting page-cluster sysctl to zero cannot disable swap readahead with the change. To fix the regression, the page-cluster sysctl is used as the max window of both the VMA based swap readahead and original physical swap readahead. If more fine grained control is needed in the future, more knobs can be added as the subordinate knobs of the page-cluster sysctl. The vma_ra_max_order knob is deleted. Because the knob was introduced in v4.14-rc1, and this patch is targeting being merged before v4.14 releasing, there should be no existing users of this newly added ABI. Link: http://lkml.kernel.org/r/20171011070847.16003-1-ying.huang@intel.com Fixes: ec560175c0b6fce ("mm, swap: VMA based swap readahead") Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Reported-by: Minchan Kim <minchan@kernel.org> Acked-by: Minchan Kim <minchan@kernel.org> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Tim Chen <tim.c.chen@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-swap10
-rw-r--r--mm/swap_state.c41
2 files changed, 7 insertions, 44 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-swap b/Documentation/ABI/testing/sysfs-kernel-mm-swap
index 587db52084c7..94672016c268 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-swap
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-swap
@@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead.
14 still used for tmpfs etc. other users. If set to 14 still used for tmpfs etc. other users. If set to
15 false, the global swap readahead algorithm will be 15 false, the global swap readahead algorithm will be
16 used for all swappable pages. 16 used for all swappable pages.
17
18What: /sys/kernel/mm/swap/vma_ra_max_order
19Date: August 2017
20Contact: Linux memory management mailing list <linux-mm@kvack.org>
21Description: The max readahead size in order for VMA based swap readahead
22
23 VMA based swap readahead algorithm will readahead at
24 most 1 << max_order pages for each readahead. The
25 real readahead size for each readahead will be scaled
26 according to the estimation algorithm.
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ed91091d1e68..05b6803f0cce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
39static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; 39static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
40bool swap_vma_readahead = true; 40bool swap_vma_readahead = true;
41 41
42#define SWAP_RA_MAX_ORDER_DEFAULT 3
43
44static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
45
46#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 42#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
47#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 43#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
48#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK 44#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
@@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
664 pte_t *tpte; 660 pte_t *tpte;
665#endif 661#endif
666 662
663 max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
664 SWAP_RA_ORDER_CEILING);
665 if (max_win == 1) {
666 swap_ra->win = 1;
667 return NULL;
668 }
669
667 faddr = vmf->address; 670 faddr = vmf->address;
668 entry = pte_to_swp_entry(vmf->orig_pte); 671 entry = pte_to_swp_entry(vmf->orig_pte);
669 if ((unlikely(non_swap_entry(entry)))) 672 if ((unlikely(non_swap_entry(entry))))
@@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
672 if (page) 675 if (page)
673 return page; 676 return page;
674 677
675 max_win = 1 << READ_ONCE(swap_ra_max_order);
676 if (max_win == 1) {
677 swap_ra->win = 1;
678 return NULL;
679 }
680
681 fpfn = PFN_DOWN(faddr); 678 fpfn = PFN_DOWN(faddr);
682 swap_ra_info = GET_SWAP_RA_VAL(vma); 679 swap_ra_info = GET_SWAP_RA_VAL(vma);
683 pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); 680 pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
786 __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, 783 __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
787 vma_ra_enabled_store); 784 vma_ra_enabled_store);
788 785
789static ssize_t vma_ra_max_order_show(struct kobject *kobj,
790 struct kobj_attribute *attr, char *buf)
791{
792 return sprintf(buf, "%d\n", swap_ra_max_order);
793}
794static ssize_t vma_ra_max_order_store(struct kobject *kobj,
795 struct kobj_attribute *attr,
796 const char *buf, size_t count)
797{
798 int err, v;
799
800 err = kstrtoint(buf, 10, &v);
801 if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
802 return -EINVAL;
803
804 swap_ra_max_order = v;
805
806 return count;
807}
808static struct kobj_attribute vma_ra_max_order_attr =
809 __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
810 vma_ra_max_order_store);
811
812static struct attribute *swap_attrs[] = { 786static struct attribute *swap_attrs[] = {
813 &vma_ra_enabled_attr.attr, 787 &vma_ra_enabled_attr.attr,
814 &vma_ra_max_order_attr.attr,
815 NULL, 788 NULL,
816}; 789};
817 790