summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorHuang, Ying <ying.huang@intel.com>2017-02-22 18:45:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:30 -0500
commit4b3ef9daa4fc0bba742a79faecb17fdaaead083b (patch)
tree52a387923455792179754189a685ba9c01f4b160 /mm/swapfile.c
parent235b62176712b970c815923e36b9a9cc05d4d901 (diff)
mm/swap: split swap cache into 64MB trunks
The patch is to improve the scalability of the swap out/in via using fine grained locks for the swap cache. In current kernel, one address space will be used for each swap device. And in the common configuration, the number of the swap device is very small (one is typical). This causes the heavy lock contention on the radix tree of the address space if multiple tasks swap out/in concurrently. But in fact, there is no dependency between pages in the swap cache. So that, we can split the one shared address space for each swap device into several address spaces to reduce the lock contention. In the patch, the shared address space is split into 64MB trunks. 64MB is chosen to balance the memory space usage and effect of lock contention reduction. The size of struct address_space on x86_64 architecture is 408B, so with the patch, 6528B more memory will be used for every 1GB swap space on x86_64 architecture. One address space is still shared for the swap entries in the same 64M trunks. To avoid lock contention for the first round of swap space allocation, the order of the swap clusters in the initial free clusters list is changed. The swap space distance between the consecutive swap clusters in the free cluster list is at least 64M. After the first round of allocation, the swap clusters are expected to be freed randomly, so the lock contention should be reduced effectively. Link: http://lkml.kernel.org/r/735bab895e64c930581ffb0a05b661e01da82bc5.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> escreveu: Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index eb71b5d9430b..66e95eb73040 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2084,6 +2084,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2084 vfree(frontswap_map); 2084 vfree(frontswap_map);
2085 /* Destroy swap account information */ 2085 /* Destroy swap account information */
2086 swap_cgroup_swapoff(p->type); 2086 swap_cgroup_swapoff(p->type);
2087 exit_swap_address_space(p->type);
2087 2088
2088 inode = mapping->host; 2089 inode = mapping->host;
2089 if (S_ISBLK(inode->i_mode)) { 2090 if (S_ISBLK(inode->i_mode)) {
@@ -2407,8 +2408,12 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
2407 return maxpages; 2408 return maxpages;
2408} 2409}
2409 2410
2410#define SWAP_CLUSTER_COLS \ 2411#define SWAP_CLUSTER_INFO_COLS \
2411 DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) 2412 DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
2413#define SWAP_CLUSTER_SPACE_COLS \
2414 DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
2415#define SWAP_CLUSTER_COLS \
2416 max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
2412 2417
2413static int setup_swap_map_and_extents(struct swap_info_struct *p, 2418static int setup_swap_map_and_extents(struct swap_info_struct *p,
2414 union swap_header *swap_header, 2419 union swap_header *swap_header,
@@ -2471,7 +2476,10 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
2471 return nr_extents; 2476 return nr_extents;
2472 2477
2473 2478
2474 /* Reduce false cache line sharing between cluster_info */ 2479 /*
2480 * Reduce false cache line sharing between cluster_info and
2481 * sharing same address space.
2482 */
2475 for (k = 0; k < SWAP_CLUSTER_COLS; k++) { 2483 for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
2476 j = (k + col) % SWAP_CLUSTER_COLS; 2484 j = (k + col) % SWAP_CLUSTER_COLS;
2477 for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { 2485 for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
@@ -2661,6 +2669,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2661 } 2669 }
2662 } 2670 }
2663 2671
2672 error = init_swap_address_space(p->type, maxpages);
2673 if (error)
2674 goto bad_swap;
2675
2664 mutex_lock(&swapon_mutex); 2676 mutex_lock(&swapon_mutex);
2665 prio = -1; 2677 prio = -1;
2666 if (swap_flags & SWAP_FLAG_PREFER) 2678 if (swap_flags & SWAP_FLAG_PREFER)