aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-02-22 19:34:37 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:17 -0500
commit33806f06da654092182410d974b6d3c5396ea3eb (patch)
tree7f7da99d94481a1d4c78ebf05b410fc8ba654a39
parent9800339b5e0f0e24ab3dac349e0de80d2018832e (diff)
swap: make each swap partition have one address_space
When I use several fast SSD to do swap, swapper_space.tree_lock is heavily contended. This makes each swap partition have one address_space to reduce the lock contention. There is an array of address_space for swap. The swap entry type is the index to the array. In my test with 3 SSD, this increases the swapout throughput 20%. [akpm@linux-foundation.org: revert unneeded change to __add_to_swap_cache] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/meminfo.c4
-rw-r--r--include/linux/swap.h9
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/mincore.c5
-rw-r--r--mm/swap.c9
-rw-r--r--mm/swap_state.c55
-rw-r--r--mm/swapfile.c5
-rw-r--r--mm/util.c10
8 files changed, 67 insertions, 34 deletions
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index c3dac611c3c0..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
40 * sysctl_overcommit_ratio / 100) + total_swap_pages; 40 * sysctl_overcommit_ratio / 100) + total_swap_pages;
41 41
42 cached = global_page_state(NR_FILE_PAGES) - 42 cached = global_page_state(NR_FILE_PAGES) -
43 total_swapcache_pages - i.bufferram; 43 total_swapcache_pages() - i.bufferram;
44 if (cached < 0) 44 if (cached < 0)
45 cached = 0; 45 cached = 0;
46 46
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
109 K(i.freeram), 109 K(i.freeram),
110 K(i.bufferram), 110 K(i.bufferram),
111 K(cached), 111 K(cached),
112 K(total_swapcache_pages), 112 K(total_swapcache_pages()),
113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), 113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), 114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
115 K(pages[LRU_ACTIVE_ANON]), 115 K(pages[LRU_ACTIVE_ANON]),
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8c66486a8ca8..235c039892ee 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -8,7 +8,7 @@
8#include <linux/memcontrol.h> 8#include <linux/memcontrol.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/node.h> 10#include <linux/node.h>
11 11#include <linux/fs.h>
12#include <linux/atomic.h> 12#include <linux/atomic.h>
13#include <asm/page.h> 13#include <asm/page.h>
14 14
@@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
330 sector_t *); 330 sector_t *);
331 331
332/* linux/mm/swap_state.c */ 332/* linux/mm/swap_state.c */
333extern struct address_space swapper_space; 333extern struct address_space swapper_spaces[];
334#define total_swapcache_pages swapper_space.nrpages 334#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
335extern unsigned long total_swapcache_pages(void);
335extern void show_swap_cache_info(void); 336extern void show_swap_cache_info(void);
336extern int add_to_swap(struct page *); 337extern int add_to_swap(struct page *);
337extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); 338extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
@@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
382 383
383#define nr_swap_pages 0L 384#define nr_swap_pages 0L
384#define total_swap_pages 0L 385#define total_swap_pages 0L
385#define total_swapcache_pages 0UL 386#define total_swapcache_pages() 0UL
386 387
387#define si_swapinfo(val) \ 388#define si_swapinfo(val) \
388 do { (val)->freeswap = (val)->totalswap = 0; } while (0) 389 do { (val)->freeswap = (val)->totalswap = 0; } while (0)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c878b1c69510..f85861531f22 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6307,7 +6307,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
6307 * Because lookup_swap_cache() updates some statistics counter, 6307 * Because lookup_swap_cache() updates some statistics counter,
6308 * we call find_get_page() with swapper_space directly. 6308 * we call find_get_page() with swapper_space directly.
6309 */ 6309 */
6310 page = find_get_page(&swapper_space, ent.val); 6310 page = find_get_page(swap_address_space(ent), ent.val);
6311 if (do_swap_account) 6311 if (do_swap_account)
6312 entry->val = ent.val; 6312 entry->val = ent.val;
6313 6313
@@ -6348,7 +6348,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
6348 swp_entry_t swap = radix_to_swp_entry(page); 6348 swp_entry_t swap = radix_to_swp_entry(page);
6349 if (do_swap_account) 6349 if (do_swap_account)
6350 *entry = swap; 6350 *entry = swap;
6351 page = find_get_page(&swapper_space, swap.val); 6351 page = find_get_page(swap_address_space(swap), swap.val);
6352 } 6352 }
6353#endif 6353#endif
6354 return page; 6354 return page;
diff --git a/mm/mincore.c b/mm/mincore.c
index 936b4cee8cb1..da2be56a7b8f 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
75 /* shmem/tmpfs may return swap: account for swapcache page too. */ 75 /* shmem/tmpfs may return swap: account for swapcache page too. */
76 if (radix_tree_exceptional_entry(page)) { 76 if (radix_tree_exceptional_entry(page)) {
77 swp_entry_t swap = radix_to_swp_entry(page); 77 swp_entry_t swap = radix_to_swp_entry(page);
78 page = find_get_page(&swapper_space, swap.val); 78 page = find_get_page(swap_address_space(swap), swap.val);
79 } 79 }
80#endif 80#endif
81 if (page) { 81 if (page) {
@@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
135 } else { 135 } else {
136#ifdef CONFIG_SWAP 136#ifdef CONFIG_SWAP
137 pgoff = entry.val; 137 pgoff = entry.val;
138 *vec = mincore_page(&swapper_space, pgoff); 138 *vec = mincore_page(swap_address_space(entry),
139 pgoff);
139#else 140#else
140 WARN_ON(1); 141 WARN_ON(1);
141 *vec = 1; 142 *vec = 1;
diff --git a/mm/swap.c b/mm/swap.c
index 6310dc2008ff..8a529a01e8fc 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
855void __init swap_setup(void) 855void __init swap_setup(void)
856{ 856{
857 unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); 857 unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
858
859#ifdef CONFIG_SWAP 858#ifdef CONFIG_SWAP
860 bdi_init(swapper_space.backing_dev_info); 859 int i;
860
861 bdi_init(swapper_spaces[0].backing_dev_info);
862 for (i = 0; i < MAX_SWAPFILES; i++) {
863 spin_lock_init(&swapper_spaces[i].tree_lock);
864 INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
865 }
861#endif 866#endif
862 867
863 /* Use a smaller cluster for small-memory machines */ 868 /* Use a smaller cluster for small-memory machines */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 0cb36fb1f61c..8d6644c5d0cc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -36,12 +36,12 @@ static struct backing_dev_info swap_backing_dev_info = {
36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, 36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
37}; 37};
38 38
39struct address_space swapper_space = { 39struct address_space swapper_spaces[MAX_SWAPFILES] = {
40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 40 [0 ... MAX_SWAPFILES - 1] = {
41 .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), 41 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
42 .a_ops = &swap_aops, 42 .a_ops = &swap_aops,
43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 43 .backing_dev_info = &swap_backing_dev_info,
44 .backing_dev_info = &swap_backing_dev_info, 44 }
45}; 45};
46 46
47#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 47#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
@@ -53,9 +53,19 @@ static struct {
53 unsigned long find_total; 53 unsigned long find_total;
54} swap_cache_info; 54} swap_cache_info;
55 55
56unsigned long total_swapcache_pages(void)
57{
58 int i;
59 unsigned long ret = 0;
60
61 for (i = 0; i < MAX_SWAPFILES; i++)
62 ret += swapper_spaces[i].nrpages;
63 return ret;
64}
65
56void show_swap_cache_info(void) 66void show_swap_cache_info(void)
57{ 67{
58 printk("%lu pages in swap cache\n", total_swapcache_pages); 68 printk("%lu pages in swap cache\n", total_swapcache_pages());
59 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", 69 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
60 swap_cache_info.add_total, swap_cache_info.del_total, 70 swap_cache_info.add_total, swap_cache_info.del_total,
61 swap_cache_info.find_success, swap_cache_info.find_total); 71 swap_cache_info.find_success, swap_cache_info.find_total);
@@ -70,6 +80,7 @@ void show_swap_cache_info(void)
70static int __add_to_swap_cache(struct page *page, swp_entry_t entry) 80static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
71{ 81{
72 int error; 82 int error;
83 struct address_space *address_space;
73 84
74 VM_BUG_ON(!PageLocked(page)); 85 VM_BUG_ON(!PageLocked(page));
75 VM_BUG_ON(PageSwapCache(page)); 86 VM_BUG_ON(PageSwapCache(page));
@@ -79,14 +90,16 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
79 SetPageSwapCache(page); 90 SetPageSwapCache(page);
80 set_page_private(page, entry.val); 91 set_page_private(page, entry.val);
81 92
82 spin_lock_irq(&swapper_space.tree_lock); 93 address_space = swap_address_space(entry);
83 error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); 94 spin_lock_irq(&address_space->tree_lock);
95 error = radix_tree_insert(&address_space->page_tree,
96 entry.val, page);
84 if (likely(!error)) { 97 if (likely(!error)) {
85 total_swapcache_pages++; 98 address_space->nrpages++;
86 __inc_zone_page_state(page, NR_FILE_PAGES); 99 __inc_zone_page_state(page, NR_FILE_PAGES);
87 INC_CACHE_INFO(add_total); 100 INC_CACHE_INFO(add_total);
88 } 101 }
89 spin_unlock_irq(&swapper_space.tree_lock); 102 spin_unlock_irq(&address_space->tree_lock);
90 103
91 if (unlikely(error)) { 104 if (unlikely(error)) {
92 /* 105 /*
@@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
122 */ 135 */
123void __delete_from_swap_cache(struct page *page) 136void __delete_from_swap_cache(struct page *page)
124{ 137{
138 swp_entry_t entry;
139 struct address_space *address_space;
140
125 VM_BUG_ON(!PageLocked(page)); 141 VM_BUG_ON(!PageLocked(page));
126 VM_BUG_ON(!PageSwapCache(page)); 142 VM_BUG_ON(!PageSwapCache(page));
127 VM_BUG_ON(PageWriteback(page)); 143 VM_BUG_ON(PageWriteback(page));
128 144
129 radix_tree_delete(&swapper_space.page_tree, page_private(page)); 145 entry.val = page_private(page);
146 address_space = swap_address_space(entry);
147 radix_tree_delete(&address_space->page_tree, page_private(page));
130 set_page_private(page, 0); 148 set_page_private(page, 0);
131 ClearPageSwapCache(page); 149 ClearPageSwapCache(page);
132 total_swapcache_pages--; 150 address_space->nrpages--;
133 __dec_zone_page_state(page, NR_FILE_PAGES); 151 __dec_zone_page_state(page, NR_FILE_PAGES);
134 INC_CACHE_INFO(del_total); 152 INC_CACHE_INFO(del_total);
135} 153}
@@ -195,12 +213,14 @@ int add_to_swap(struct page *page)
195void delete_from_swap_cache(struct page *page) 213void delete_from_swap_cache(struct page *page)
196{ 214{
197 swp_entry_t entry; 215 swp_entry_t entry;
216 struct address_space *address_space;
198 217
199 entry.val = page_private(page); 218 entry.val = page_private(page);
200 219
201 spin_lock_irq(&swapper_space.tree_lock); 220 address_space = swap_address_space(entry);
221 spin_lock_irq(&address_space->tree_lock);
202 __delete_from_swap_cache(page); 222 __delete_from_swap_cache(page);
203 spin_unlock_irq(&swapper_space.tree_lock); 223 spin_unlock_irq(&address_space->tree_lock);
204 224
205 swapcache_free(entry, page); 225 swapcache_free(entry, page);
206 page_cache_release(page); 226 page_cache_release(page);
@@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
263{ 283{
264 struct page *page; 284 struct page *page;
265 285
266 page = find_get_page(&swapper_space, entry.val); 286 page = find_get_page(swap_address_space(entry), entry.val);
267 287
268 if (page) 288 if (page)
269 INC_CACHE_INFO(find_success); 289 INC_CACHE_INFO(find_success);
@@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
290 * called after lookup_swap_cache() failed, re-calling 310 * called after lookup_swap_cache() failed, re-calling
291 * that would confuse statistics. 311 * that would confuse statistics.
292 */ 312 */
293 found_page = find_get_page(&swapper_space, entry.val); 313 found_page = find_get_page(swap_address_space(entry),
314 entry.val);
294 if (found_page) 315 if (found_page)
295 break; 316 break;
296 317
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e97a0e5aea91..e51864e6fe8b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -79,7 +79,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
79 struct page *page; 79 struct page *page;
80 int ret = 0; 80 int ret = 0;
81 81
82 page = find_get_page(&swapper_space, entry.val); 82 page = find_get_page(swap_address_space(entry), entry.val);
83 if (!page) 83 if (!page)
84 return 0; 84 return 0;
85 /* 85 /*
@@ -699,7 +699,8 @@ int free_swap_and_cache(swp_entry_t entry)
699 p = swap_info_get(entry); 699 p = swap_info_get(entry);
700 if (p) { 700 if (p) {
701 if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { 701 if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
702 page = find_get_page(&swapper_space, entry.val); 702 page = find_get_page(swap_address_space(entry),
703 entry.val);
703 if (page && !trylock_page(page)) { 704 if (page && !trylock_page(page)) {
704 page_cache_release(page); 705 page_cache_release(page);
705 page = NULL; 706 page = NULL;
diff --git a/mm/util.c b/mm/util.c
index 16a73195a37b..ab1424dbe2e6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/security.h> 7#include <linux/security.h>
8#include <linux/swap.h> 8#include <linux/swap.h>
9#include <linux/swapops.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10 11
11#include "internal.h" 12#include "internal.h"
@@ -389,9 +390,12 @@ struct address_space *page_mapping(struct page *page)
389 390
390 VM_BUG_ON(PageSlab(page)); 391 VM_BUG_ON(PageSlab(page));
391#ifdef CONFIG_SWAP 392#ifdef CONFIG_SWAP
392 if (unlikely(PageSwapCache(page))) 393 if (unlikely(PageSwapCache(page))) {
393 mapping = &swapper_space; 394 swp_entry_t entry;
394 else 395
396 entry.val = page_private(page);
397 mapping = swap_address_space(entry);
398 } else
395#endif 399#endif
396 if ((unsigned long)mapping & PAGE_MAPPING_ANON) 400 if ((unsigned long)mapping & PAGE_MAPPING_ANON)
397 mapping = NULL; 401 mapping = NULL;