mm, swap: use page-cluster as max window of VMA based swap readahead

When the VMA based swap readahead was introduced, a new knob /sys/kernel/mm/swap/vma_ra_max_order was added as the max window of VMA swap readahead. This is to make it possible to use different max window for VMA based readahead and original physical readahead. But Minchan Kim pointed out that this will cause a regression because setting page-cluster sysctl to zero cannot disable swap readahead with the change. To fix the regression, the page-cluster sysctl is used as the max window of both the VMA based swap readahead and original physical swap readahead. If more fine grained control is needed in the future, more knobs can be added as the subordinate knobs of the page-cluster sysctl. The vma_ra_max_order knob is deleted. Because the knob was introduced in v4.14-rc1, and this patch is targeting being merged before v4.14 releasing, there should be no existing users of this newly added ABI. Link: http://lkml.kernel.org/r/20171011070847.16003-1-ying.huang@intel.com Fixes: ec560175c0b6fce ("mm, swap: VMA based swap readahead") Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Reported-by: Minchan Kim <minchan@kernel.org> Acked-by: Minchan Kim <minchan@kernel.org> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Tim Chen <tim.c.chen@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Huang Ying <ying.huang@intel.com> 2017-10-13 18:58:29 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-10-13 19:18:33 -0400
commit: 61b639723be5a9fc4812d5d85cb769589afa5a38 (patch)
tree: 7a7b92cdaa08c84af347f9ce015f13c49b0d1971
parent: a7b100953aa33a5bbdc3e5e7f2241b9c0704606e (diff)
2 files changed, 7 insertions, 44 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-swap b/Documentation/ABI/testing/sysfs-kernel-mm-swap
index 587db52084c7..94672016c268 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-swap
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-swap
@@ -14,13 +14,3 @@ Description:	Enable/disable VMA based swap readahead.
                still used for tmpfs etc. other users.  If set to
                false, the global swap readahead algorithm will be
                used for all swappable pages.
-What:           /sys/kernel/mm/swap/vma_ra_max_order
-Date:           August 2017
-Contact:        Linux memory management mailing list <linux-mm@kvack.org>
-Description:    The max readahead size in order for VMA based swap readahead
-                VMA based swap readahead algorithm will readahead at
-                most 1 << max_order pages for each readahead.  The
-                real readahead size for each readahead will be scaled
-                according to the estimation algorithm.
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ed91091d1e68..05b6803f0cce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
 static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
 bool swap_vma_readahead = true;
-#define SWAP_RA_MAX_ORDER_DEFAULT       3
-static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
 #define SWAP_RA_WIN_SHIFT       (PAGE_SHIFT / 2)
 #define SWAP_RA_HITS_MASK       ((1UL << SWAP_RA_WIN_SHIFT) - 1)
 #define SWAP_RA_HITS_MAX        SWAP_RA_HITS_MASK
@@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
        pte_t *tpte;
 #endif
+        max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
+                             SWAP_RA_ORDER_CEILING);
+        if (max_win == 1) {
+                swap_ra->win = 1;
+                return NULL;
+        }
        faddr = vmf->address;
        entry = pte_to_swp_entry(vmf->orig_pte);
        if ((unlikely(non_swap_entry(entry))))
@@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
        if (page)
                return page;
-        max_win = 1 << READ_ONCE(swap_ra_max_order);
-        if (max_win == 1) {
-                swap_ra->win = 1;
-                return NULL;
-        }
        fpfn = PFN_DOWN(faddr);
        swap_ra_info = GET_SWAP_RA_VAL(vma);
        pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
        __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
               vma_ra_enabled_store);
-static ssize_t vma_ra_max_order_show(struct kobject *kobj,
-                                     struct kobj_attribute *attr, char *buf)
-{
-        return sprintf(buf, "%d\n", swap_ra_max_order);
-}
-static ssize_t vma_ra_max_order_store(struct kobject *kobj,
-                                      struct kobj_attribute *attr,
-                                      const char *buf, size_t count)
-{
-        int err, v;
-        err = kstrtoint(buf, 10, &v);
-        if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
-                return -EINVAL;
-        swap_ra_max_order = v;
-        return count;
-}
-static struct kobj_attribute vma_ra_max_order_attr =
-        __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
-               vma_ra_max_order_store);
 static struct attribute *swap_attrs[] = {
        &vma_ra_enabled_attr.attr,
-        &vma_ra_max_order_attr.attr,
        NULL,
 };
author	Huang Ying <ying.huang@intel.com>	2017-10-13 18:58:29 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-10-13 19:18:33 -0400
commit	61b639723be5a9fc4812d5d85cb769589afa5a38 (patch)
tree	7a7b92cdaa08c84af347f9ce015f13c49b0d1971
parent	a7b100953aa33a5bbdc3e5e7f2241b9c0704606e (diff)

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-swap b/Documentation/ABI/testing/sysfs-kernel-mm-swap index 587db52084c7..94672016c268 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-swap +++ b/Documentation/ABI/testing/sysfs-kernel-mm-swap
@@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead.
14	still used for tmpfs etc. other users. If set to	14	still used for tmpfs etc. other users. If set to
15	false, the global swap readahead algorithm will be	15	false, the global swap readahead algorithm will be
16	used for all swappable pages.	16	used for all swappable pages.
17
18	What: /sys/kernel/mm/swap/vma_ra_max_order
19	Date: August 2017
20	Contact: Linux memory management mailing list <linux-mm@kvack.org>
21	Description: The max readahead size in order for VMA based swap readahead
22
23	VMA based swap readahead algorithm will readahead at
24	most 1 << max_order pages for each readahead. The
25	real readahead size for each readahead will be scaled
26	according to the estimation algorithm.


diff --git a/mm/swap_state.c b/mm/swap_state.c index ed91091d1e68..05b6803f0cce 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c
@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
39	static unsigned int nr_swapper_spaces[MAX_SWAPFILES];	39	static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
40	bool swap_vma_readahead = true;	40	bool swap_vma_readahead = true;
41		41
42	#define SWAP_RA_MAX_ORDER_DEFAULT 3
43
44	static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
45
46	#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)	42	#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
47	#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)	43	#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
48	#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK	44	#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
@@ -664,6 +660,13 @@ struct page swap_readahead_detect(struct vm_fault vmf,
664	pte_t *tpte;	660	pte_t *tpte;
665	#endif	661	#endif
666		662
		663	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
		664	SWAP_RA_ORDER_CEILING);
		665	if (max_win == 1) {
		666	swap_ra->win = 1;
		667	return NULL;
		668	}
		669
667	faddr = vmf->address;	670	faddr = vmf->address;
668	entry = pte_to_swp_entry(vmf->orig_pte);	671	entry = pte_to_swp_entry(vmf->orig_pte);
669	if ((unlikely(non_swap_entry(entry))))	672	if ((unlikely(non_swap_entry(entry))))
@@ -672,12 +675,6 @@ struct page swap_readahead_detect(struct vm_fault vmf,
672	if (page)	675	if (page)
673	return page;	676	return page;
674		677
675	max_win = 1 << READ_ONCE(swap_ra_max_order);
676	if (max_win == 1) {
677	swap_ra->win = 1;
678	return NULL;
679	}
680
681	fpfn = PFN_DOWN(faddr);	678	fpfn = PFN_DOWN(faddr);
682	swap_ra_info = GET_SWAP_RA_VAL(vma);	679	swap_ra_info = GET_SWAP_RA_VAL(vma);
683	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));	680	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
786	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,	783	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
787	vma_ra_enabled_store);	784	vma_ra_enabled_store);
788		785
789	static ssize_t vma_ra_max_order_show(struct kobject *kobj,
790	struct kobj_attribute attr, char buf)
791	{
792	return sprintf(buf, "%d\n", swap_ra_max_order);
793	}
794	static ssize_t vma_ra_max_order_store(struct kobject *kobj,
795	struct kobj_attribute *attr,
796	const char *buf, size_t count)
797	{
798	int err, v;
799
800	err = kstrtoint(buf, 10, &v);
801	if (err \|\| v > SWAP_RA_ORDER_CEILING \|\| v <= 0)
802	return -EINVAL;
803
804	swap_ra_max_order = v;
805
806	return count;
807	}
808	static struct kobj_attribute vma_ra_max_order_attr =
809	__ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
810	vma_ra_max_order_store);
811
812	static struct attribute *swap_attrs[] = {	786	static struct attribute *swap_attrs[] = {
813	&vma_ra_enabled_attr.attr,	787	&vma_ra_enabled_attr.attr,
814	&vma_ra_max_order_attr.attr,
815	NULL,	788	NULL,
816	};	789	};
817		790