summaryrefslogtreecommitdiffstats
path: root/mm/swap_state.c
diff options
context:
space:
mode:
authorHuang, Ying <ying.huang@intel.com>2017-02-22 18:45:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:30 -0500
commit4b3ef9daa4fc0bba742a79faecb17fdaaead083b (patch)
tree52a387923455792179754189a685ba9c01f4b160 /mm/swap_state.c
parent235b62176712b970c815923e36b9a9cc05d4d901 (diff)
mm/swap: split swap cache into 64MB trunks
The patch is to improve the scalability of the swap out/in via using fine grained locks for the swap cache. In current kernel, one address space will be used for each swap device. And in the common configuration, the number of the swap device is very small (one is typical). This causes the heavy lock contention on the radix tree of the address space if multiple tasks swap out/in concurrently. But in fact, there is no dependency between pages in the swap cache. So that, we can split the one shared address space for each swap device into several address spaces to reduce the lock contention. In the patch, the shared address space is split into 64MB trunks. 64MB is chosen to balance the memory space usage and effect of lock contention reduction. The size of struct address_space on x86_64 architecture is 408B, so with the patch, 6528B more memory will be used for every 1GB swap space on x86_64 architecture. One address space is still shared for the swap entries in the same 64M trunks. To avoid lock contention for the first round of swap space allocation, the order of the swap clusters in the initial free clusters list is changed. The swap space distance between the consecutive swap clusters in the free cluster list is at least 64M. After the first round of allocation, the swap clusters are expected to be freed randomly, so the lock contention should be reduced effectively. Link: http://lkml.kernel.org/r/735bab895e64c930581ffb0a05b661e01da82bc5.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> escreveu: Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r--mm/swap_state.c68
1 files changed, 56 insertions, 12 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 35d7e0ee1c77..3863acd6189c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,6 +17,7 @@
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/migrate.h> 19#include <linux/migrate.h>
20#include <linux/vmalloc.h>
20 21
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22 23
@@ -32,15 +33,8 @@ static const struct address_space_operations swap_aops = {
32#endif 33#endif
33}; 34};
34 35
35struct address_space swapper_spaces[MAX_SWAPFILES] = { 36struct address_space *swapper_spaces[MAX_SWAPFILES];
36 [0 ... MAX_SWAPFILES - 1] = { 37static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
37 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
38 .i_mmap_writable = ATOMIC_INIT(0),
39 .a_ops = &swap_aops,
40 /* swap cache doesn't use writeback related tags */
41 .flags = 1 << AS_NO_WRITEBACK_TAGS,
42 }
43};
44 38
45#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 39#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
46 40
@@ -53,11 +47,26 @@ static struct {
53 47
54unsigned long total_swapcache_pages(void) 48unsigned long total_swapcache_pages(void)
55{ 49{
56 int i; 50 unsigned int i, j, nr;
57 unsigned long ret = 0; 51 unsigned long ret = 0;
52 struct address_space *spaces;
58 53
59 for (i = 0; i < MAX_SWAPFILES; i++) 54 rcu_read_lock();
60 ret += swapper_spaces[i].nrpages; 55 for (i = 0; i < MAX_SWAPFILES; i++) {
56 /*
57 * The corresponding entries in nr_swapper_spaces and
58 * swapper_spaces will be reused only after at least
59 * one grace period. So it is impossible for them
60 * belongs to different usage.
61 */
62 nr = nr_swapper_spaces[i];
63 spaces = rcu_dereference(swapper_spaces[i]);
64 if (!nr || !spaces)
65 continue;
66 for (j = 0; j < nr; j++)
67 ret += spaces[j].nrpages;
68 }
69 rcu_read_unlock();
61 return ret; 70 return ret;
62} 71}
63 72
@@ -505,3 +514,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
505skip: 514skip:
506 return read_swap_cache_async(entry, gfp_mask, vma, addr); 515 return read_swap_cache_async(entry, gfp_mask, vma, addr);
507} 516}
517
518int init_swap_address_space(unsigned int type, unsigned long nr_pages)
519{
520 struct address_space *spaces, *space;
521 unsigned int i, nr;
522
523 nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
524 spaces = vzalloc(sizeof(struct address_space) * nr);
525 if (!spaces)
526 return -ENOMEM;
527 for (i = 0; i < nr; i++) {
528 space = spaces + i;
529 INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
530 atomic_set(&space->i_mmap_writable, 0);
531 space->a_ops = &swap_aops;
532 /* swap cache doesn't use writeback related tags */
533 mapping_set_no_writeback_tags(space);
534 spin_lock_init(&space->tree_lock);
535 }
536 nr_swapper_spaces[type] = nr;
537 rcu_assign_pointer(swapper_spaces[type], spaces);
538
539 return 0;
540}
541
542void exit_swap_address_space(unsigned int type)
543{
544 struct address_space *spaces;
545
546 spaces = swapper_spaces[type];
547 nr_swapper_spaces[type] = 0;
548 rcu_assign_pointer(swapper_spaces[type], NULL);
549 synchronize_rcu();
550 kvfree(spaces);
551}