diff options
author | Shaohua Li <shli@kernel.org> | 2013-02-22 19:34:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:17 -0500 |
commit | 33806f06da654092182410d974b6d3c5396ea3eb (patch) | |
tree | 7f7da99d94481a1d4c78ebf05b410fc8ba654a39 | |
parent | 9800339b5e0f0e24ab3dac349e0de80d2018832e (diff) |
swap: make each swap partition have one address_space
When I use several fast SSD to do swap, swapper_space.tree_lock is
heavily contended. This makes each swap partition have one
address_space to reduce the lock contention. There is an array of
address_space for swap. The swap entry type is the index to the array.
In my test with 3 SSD, this increases the swapout throughput 20%.
[akpm@linux-foundation.org: revert unneeded change to __add_to_swap_cache]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/meminfo.c | 4 | ||||
-rw-r--r-- | include/linux/swap.h | 9 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/mincore.c | 5 | ||||
-rw-r--r-- | mm/swap.c | 9 | ||||
-rw-r--r-- | mm/swap_state.c | 55 | ||||
-rw-r--r-- | mm/swapfile.c | 5 | ||||
-rw-r--r-- | mm/util.c | 10 |
8 files changed, 67 insertions, 34 deletions
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c3dac611c3c0..1efaaa19c4f3 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
40 | * sysctl_overcommit_ratio / 100) + total_swap_pages; | 40 | * sysctl_overcommit_ratio / 100) + total_swap_pages; |
41 | 41 | ||
42 | cached = global_page_state(NR_FILE_PAGES) - | 42 | cached = global_page_state(NR_FILE_PAGES) - |
43 | total_swapcache_pages - i.bufferram; | 43 | total_swapcache_pages() - i.bufferram; |
44 | if (cached < 0) | 44 | if (cached < 0) |
45 | cached = 0; | 45 | cached = 0; |
46 | 46 | ||
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
109 | K(i.freeram), | 109 | K(i.freeram), |
110 | K(i.bufferram), | 110 | K(i.bufferram), |
111 | K(cached), | 111 | K(cached), |
112 | K(total_swapcache_pages), | 112 | K(total_swapcache_pages()), |
113 | K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), | 113 | K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), |
114 | K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), | 114 | K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), |
115 | K(pages[LRU_ACTIVE_ANON]), | 115 | K(pages[LRU_ACTIVE_ANON]), |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 8c66486a8ca8..235c039892ee 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/memcontrol.h> | 8 | #include <linux/memcontrol.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/node.h> | 10 | #include <linux/node.h> |
11 | 11 | #include <linux/fs.h> | |
12 | #include <linux/atomic.h> | 12 | #include <linux/atomic.h> |
13 | #include <asm/page.h> | 13 | #include <asm/page.h> |
14 | 14 | ||
@@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, | |||
330 | sector_t *); | 330 | sector_t *); |
331 | 331 | ||
332 | /* linux/mm/swap_state.c */ | 332 | /* linux/mm/swap_state.c */ |
333 | extern struct address_space swapper_space; | 333 | extern struct address_space swapper_spaces[]; |
334 | #define total_swapcache_pages swapper_space.nrpages | 334 | #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) |
335 | extern unsigned long total_swapcache_pages(void); | ||
335 | extern void show_swap_cache_info(void); | 336 | extern void show_swap_cache_info(void); |
336 | extern int add_to_swap(struct page *); | 337 | extern int add_to_swap(struct page *); |
337 | extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); | 338 | extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); |
@@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) | |||
382 | 383 | ||
383 | #define nr_swap_pages 0L | 384 | #define nr_swap_pages 0L |
384 | #define total_swap_pages 0L | 385 | #define total_swap_pages 0L |
385 | #define total_swapcache_pages 0UL | 386 | #define total_swapcache_pages() 0UL |
386 | 387 | ||
387 | #define si_swapinfo(val) \ | 388 | #define si_swapinfo(val) \ |
388 | do { (val)->freeswap = (val)->totalswap = 0; } while (0) | 389 | do { (val)->freeswap = (val)->totalswap = 0; } while (0) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c878b1c69510..f85861531f22 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -6307,7 +6307,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | |||
6307 | * Because lookup_swap_cache() updates some statistics counter, | 6307 | * Because lookup_swap_cache() updates some statistics counter, |
6308 | * we call find_get_page() with swapper_space directly. | 6308 | * we call find_get_page() with swapper_space directly. |
6309 | */ | 6309 | */ |
6310 | page = find_get_page(&swapper_space, ent.val); | 6310 | page = find_get_page(swap_address_space(ent), ent.val); |
6311 | if (do_swap_account) | 6311 | if (do_swap_account) |
6312 | entry->val = ent.val; | 6312 | entry->val = ent.val; |
6313 | 6313 | ||
@@ -6348,7 +6348,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
6348 | swp_entry_t swap = radix_to_swp_entry(page); | 6348 | swp_entry_t swap = radix_to_swp_entry(page); |
6349 | if (do_swap_account) | 6349 | if (do_swap_account) |
6350 | *entry = swap; | 6350 | *entry = swap; |
6351 | page = find_get_page(&swapper_space, swap.val); | 6351 | page = find_get_page(swap_address_space(swap), swap.val); |
6352 | } | 6352 | } |
6353 | #endif | 6353 | #endif |
6354 | return page; | 6354 | return page; |
diff --git a/mm/mincore.c b/mm/mincore.c index 936b4cee8cb1..da2be56a7b8f 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) | |||
75 | /* shmem/tmpfs may return swap: account for swapcache page too. */ | 75 | /* shmem/tmpfs may return swap: account for swapcache page too. */ |
76 | if (radix_tree_exceptional_entry(page)) { | 76 | if (radix_tree_exceptional_entry(page)) { |
77 | swp_entry_t swap = radix_to_swp_entry(page); | 77 | swp_entry_t swap = radix_to_swp_entry(page); |
78 | page = find_get_page(&swapper_space, swap.val); | 78 | page = find_get_page(swap_address_space(swap), swap.val); |
79 | } | 79 | } |
80 | #endif | 80 | #endif |
81 | if (page) { | 81 | if (page) { |
@@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
135 | } else { | 135 | } else { |
136 | #ifdef CONFIG_SWAP | 136 | #ifdef CONFIG_SWAP |
137 | pgoff = entry.val; | 137 | pgoff = entry.val; |
138 | *vec = mincore_page(&swapper_space, pgoff); | 138 | *vec = mincore_page(swap_address_space(entry), |
139 | pgoff); | ||
139 | #else | 140 | #else |
140 | WARN_ON(1); | 141 | WARN_ON(1); |
141 | *vec = 1; | 142 | *vec = 1; |
@@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag); | |||
855 | void __init swap_setup(void) | 855 | void __init swap_setup(void) |
856 | { | 856 | { |
857 | unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); | 857 | unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); |
858 | |||
859 | #ifdef CONFIG_SWAP | 858 | #ifdef CONFIG_SWAP |
860 | bdi_init(swapper_space.backing_dev_info); | 859 | int i; |
860 | |||
861 | bdi_init(swapper_spaces[0].backing_dev_info); | ||
862 | for (i = 0; i < MAX_SWAPFILES; i++) { | ||
863 | spin_lock_init(&swapper_spaces[i].tree_lock); | ||
864 | INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); | ||
865 | } | ||
861 | #endif | 866 | #endif |
862 | 867 | ||
863 | /* Use a smaller cluster for small-memory machines */ | 868 | /* Use a smaller cluster for small-memory machines */ |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 0cb36fb1f61c..8d6644c5d0cc 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -36,12 +36,12 @@ static struct backing_dev_info swap_backing_dev_info = { | |||
36 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, | 36 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, |
37 | }; | 37 | }; |
38 | 38 | ||
39 | struct address_space swapper_space = { | 39 | struct address_space swapper_spaces[MAX_SWAPFILES] = { |
40 | .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), | 40 | [0 ... MAX_SWAPFILES - 1] = { |
41 | .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), | 41 | .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), |
42 | .a_ops = &swap_aops, | 42 | .a_ops = &swap_aops, |
43 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), | 43 | .backing_dev_info = &swap_backing_dev_info, |
44 | .backing_dev_info = &swap_backing_dev_info, | 44 | } |
45 | }; | 45 | }; |
46 | 46 | ||
47 | #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) | 47 | #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) |
@@ -53,9 +53,19 @@ static struct { | |||
53 | unsigned long find_total; | 53 | unsigned long find_total; |
54 | } swap_cache_info; | 54 | } swap_cache_info; |
55 | 55 | ||
56 | unsigned long total_swapcache_pages(void) | ||
57 | { | ||
58 | int i; | ||
59 | unsigned long ret = 0; | ||
60 | |||
61 | for (i = 0; i < MAX_SWAPFILES; i++) | ||
62 | ret += swapper_spaces[i].nrpages; | ||
63 | return ret; | ||
64 | } | ||
65 | |||
56 | void show_swap_cache_info(void) | 66 | void show_swap_cache_info(void) |
57 | { | 67 | { |
58 | printk("%lu pages in swap cache\n", total_swapcache_pages); | 68 | printk("%lu pages in swap cache\n", total_swapcache_pages()); |
59 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", | 69 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", |
60 | swap_cache_info.add_total, swap_cache_info.del_total, | 70 | swap_cache_info.add_total, swap_cache_info.del_total, |
61 | swap_cache_info.find_success, swap_cache_info.find_total); | 71 | swap_cache_info.find_success, swap_cache_info.find_total); |
@@ -70,6 +80,7 @@ void show_swap_cache_info(void) | |||
70 | static int __add_to_swap_cache(struct page *page, swp_entry_t entry) | 80 | static int __add_to_swap_cache(struct page *page, swp_entry_t entry) |
71 | { | 81 | { |
72 | int error; | 82 | int error; |
83 | struct address_space *address_space; | ||
73 | 84 | ||
74 | VM_BUG_ON(!PageLocked(page)); | 85 | VM_BUG_ON(!PageLocked(page)); |
75 | VM_BUG_ON(PageSwapCache(page)); | 86 | VM_BUG_ON(PageSwapCache(page)); |
@@ -79,14 +90,16 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry) | |||
79 | SetPageSwapCache(page); | 90 | SetPageSwapCache(page); |
80 | set_page_private(page, entry.val); | 91 | set_page_private(page, entry.val); |
81 | 92 | ||
82 | spin_lock_irq(&swapper_space.tree_lock); | 93 | address_space = swap_address_space(entry); |
83 | error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); | 94 | spin_lock_irq(&address_space->tree_lock); |
95 | error = radix_tree_insert(&address_space->page_tree, | ||
96 | entry.val, page); | ||
84 | if (likely(!error)) { | 97 | if (likely(!error)) { |
85 | total_swapcache_pages++; | 98 | address_space->nrpages++; |
86 | __inc_zone_page_state(page, NR_FILE_PAGES); | 99 | __inc_zone_page_state(page, NR_FILE_PAGES); |
87 | INC_CACHE_INFO(add_total); | 100 | INC_CACHE_INFO(add_total); |
88 | } | 101 | } |
89 | spin_unlock_irq(&swapper_space.tree_lock); | 102 | spin_unlock_irq(&address_space->tree_lock); |
90 | 103 | ||
91 | if (unlikely(error)) { | 104 | if (unlikely(error)) { |
92 | /* | 105 | /* |
@@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
122 | */ | 135 | */ |
123 | void __delete_from_swap_cache(struct page *page) | 136 | void __delete_from_swap_cache(struct page *page) |
124 | { | 137 | { |
138 | swp_entry_t entry; | ||
139 | struct address_space *address_space; | ||
140 | |||
125 | VM_BUG_ON(!PageLocked(page)); | 141 | VM_BUG_ON(!PageLocked(page)); |
126 | VM_BUG_ON(!PageSwapCache(page)); | 142 | VM_BUG_ON(!PageSwapCache(page)); |
127 | VM_BUG_ON(PageWriteback(page)); | 143 | VM_BUG_ON(PageWriteback(page)); |
128 | 144 | ||
129 | radix_tree_delete(&swapper_space.page_tree, page_private(page)); | 145 | entry.val = page_private(page); |
146 | address_space = swap_address_space(entry); | ||
147 | radix_tree_delete(&address_space->page_tree, page_private(page)); | ||
130 | set_page_private(page, 0); | 148 | set_page_private(page, 0); |
131 | ClearPageSwapCache(page); | 149 | ClearPageSwapCache(page); |
132 | total_swapcache_pages--; | 150 | address_space->nrpages--; |
133 | __dec_zone_page_state(page, NR_FILE_PAGES); | 151 | __dec_zone_page_state(page, NR_FILE_PAGES); |
134 | INC_CACHE_INFO(del_total); | 152 | INC_CACHE_INFO(del_total); |
135 | } | 153 | } |
@@ -195,12 +213,14 @@ int add_to_swap(struct page *page) | |||
195 | void delete_from_swap_cache(struct page *page) | 213 | void delete_from_swap_cache(struct page *page) |
196 | { | 214 | { |
197 | swp_entry_t entry; | 215 | swp_entry_t entry; |
216 | struct address_space *address_space; | ||
198 | 217 | ||
199 | entry.val = page_private(page); | 218 | entry.val = page_private(page); |
200 | 219 | ||
201 | spin_lock_irq(&swapper_space.tree_lock); | 220 | address_space = swap_address_space(entry); |
221 | spin_lock_irq(&address_space->tree_lock); | ||
202 | __delete_from_swap_cache(page); | 222 | __delete_from_swap_cache(page); |
203 | spin_unlock_irq(&swapper_space.tree_lock); | 223 | spin_unlock_irq(&address_space->tree_lock); |
204 | 224 | ||
205 | swapcache_free(entry, page); | 225 | swapcache_free(entry, page); |
206 | page_cache_release(page); | 226 | page_cache_release(page); |
@@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry) | |||
263 | { | 283 | { |
264 | struct page *page; | 284 | struct page *page; |
265 | 285 | ||
266 | page = find_get_page(&swapper_space, entry.val); | 286 | page = find_get_page(swap_address_space(entry), entry.val); |
267 | 287 | ||
268 | if (page) | 288 | if (page) |
269 | INC_CACHE_INFO(find_success); | 289 | INC_CACHE_INFO(find_success); |
@@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
290 | * called after lookup_swap_cache() failed, re-calling | 310 | * called after lookup_swap_cache() failed, re-calling |
291 | * that would confuse statistics. | 311 | * that would confuse statistics. |
292 | */ | 312 | */ |
293 | found_page = find_get_page(&swapper_space, entry.val); | 313 | found_page = find_get_page(swap_address_space(entry), |
314 | entry.val); | ||
294 | if (found_page) | 315 | if (found_page) |
295 | break; | 316 | break; |
296 | 317 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index e97a0e5aea91..e51864e6fe8b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -79,7 +79,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) | |||
79 | struct page *page; | 79 | struct page *page; |
80 | int ret = 0; | 80 | int ret = 0; |
81 | 81 | ||
82 | page = find_get_page(&swapper_space, entry.val); | 82 | page = find_get_page(swap_address_space(entry), entry.val); |
83 | if (!page) | 83 | if (!page) |
84 | return 0; | 84 | return 0; |
85 | /* | 85 | /* |
@@ -699,7 +699,8 @@ int free_swap_and_cache(swp_entry_t entry) | |||
699 | p = swap_info_get(entry); | 699 | p = swap_info_get(entry); |
700 | if (p) { | 700 | if (p) { |
701 | if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { | 701 | if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { |
702 | page = find_get_page(&swapper_space, entry.val); | 702 | page = find_get_page(swap_address_space(entry), |
703 | entry.val); | ||
703 | if (page && !trylock_page(page)) { | 704 | if (page && !trylock_page(page)) { |
704 | page_cache_release(page); | 705 | page_cache_release(page); |
705 | page = NULL; | 706 | page = NULL; |
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/security.h> | 7 | #include <linux/security.h> |
8 | #include <linux/swap.h> | 8 | #include <linux/swap.h> |
9 | #include <linux/swapops.h> | ||
9 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
10 | 11 | ||
11 | #include "internal.h" | 12 | #include "internal.h" |
@@ -389,9 +390,12 @@ struct address_space *page_mapping(struct page *page) | |||
389 | 390 | ||
390 | VM_BUG_ON(PageSlab(page)); | 391 | VM_BUG_ON(PageSlab(page)); |
391 | #ifdef CONFIG_SWAP | 392 | #ifdef CONFIG_SWAP |
392 | if (unlikely(PageSwapCache(page))) | 393 | if (unlikely(PageSwapCache(page))) { |
393 | mapping = &swapper_space; | 394 | swp_entry_t entry; |
394 | else | 395 | |
396 | entry.val = page_private(page); | ||
397 | mapping = swap_address_space(entry); | ||
398 | } else | ||
395 | #endif | 399 | #endif |
396 | if ((unsigned long)mapping & PAGE_MAPPING_ANON) | 400 | if ((unsigned long)mapping & PAGE_MAPPING_ANON) |
397 | mapping = NULL; | 401 | mapping = NULL; |