summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2017-05-08 18:57:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-08 20:15:13 -0400
commit54f180d3c181277457fb003dd9524c2aa1ef8160 (patch)
treefc763d5c91a4b0c0dfe5f4a6d594dd768866074f /mm/swapfile.c
parentbc4e54f6e966e9ca35064cd60f91b1478c07a1b2 (diff)
mm, swap: use kvzalloc to allocate some swap data structures
Now vzalloc() is used in swap code to allocate various data structures, such as swap cache, swap slots cache, cluster info, etc. Because the size may be too large on some system, so that normal kzalloc() may fail. But using kzalloc() has some advantages, for example, less memory fragmentation, less TLB pressure, etc. So change the data structure allocation in swap code to use kvzalloc() which will try kzalloc() firstly, and fallback to vzalloc() if kzalloc() failed. In general, although kmalloc() will reduce the number of high-order pages in short term, vmalloc() will cause more pain for memory fragmentation in the long term. And the swap data structure allocation that is changed in this patch is expected to be long term allocation. From Dave Hansen: "for example, we have a two-page data structure. vmalloc() takes two effectively random order-0 pages, probably from two different 2M pages and pins them. That "kills" two 2M pages. kmalloc(), allocating two *contiguous* pages, will not cross a 2M boundary. That means it will only "kill" the possibility of a single 2M page. More 2M pages == less fragmentation. The allocation in this patch occurs during swap on time, which is usually done during system boot, so usually we have high opportunity to allocate the contiguous pages successfully. The allocation for swap_map[] in struct swap_info_struct is not changed, because that is usually quite large and vmalloc_to_page() is used for it. That makes it a little harder to change. Link: http://lkml.kernel.org/r/20170407064911.25447-1-ying.huang@intel.com Signed-off-by: Huang Ying <ying.huang@intel.com> Acked-by: Tim Chen <tim.c.chen@intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Shaohua Li <shli@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c10
1 files changed, 6 insertions, 4 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b86b2aca3fb9..4f6cba1b6632 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2270,8 +2270,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2270 free_percpu(p->percpu_cluster); 2270 free_percpu(p->percpu_cluster);
2271 p->percpu_cluster = NULL; 2271 p->percpu_cluster = NULL;
2272 vfree(swap_map); 2272 vfree(swap_map);
2273 vfree(cluster_info); 2273 kvfree(cluster_info);
2274 vfree(frontswap_map); 2274 kvfree(frontswap_map);
2275 /* Destroy swap account information */ 2275 /* Destroy swap account information */
2276 swap_cgroup_swapoff(p->type); 2276 swap_cgroup_swapoff(p->type);
2277 exit_swap_address_space(p->type); 2277 exit_swap_address_space(p->type);
@@ -2794,7 +2794,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2794 p->cluster_next = 1 + (prandom_u32() % p->highest_bit); 2794 p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
2795 nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); 2795 nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
2796 2796
2797 cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info)); 2797 cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
2798 GFP_KERNEL);
2798 if (!cluster_info) { 2799 if (!cluster_info) {
2799 error = -ENOMEM; 2800 error = -ENOMEM;
2800 goto bad_swap; 2801 goto bad_swap;
@@ -2827,7 +2828,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2827 } 2828 }
2828 /* frontswap enabled? set up bit-per-page map for frontswap */ 2829 /* frontswap enabled? set up bit-per-page map for frontswap */
2829 if (IS_ENABLED(CONFIG_FRONTSWAP)) 2830 if (IS_ENABLED(CONFIG_FRONTSWAP))
2830 frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); 2831 frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
2832 GFP_KERNEL);
2831 2833
2832 if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { 2834 if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
2833 /* 2835 /*