diff options
author | Shaohua Li <shli@kernel.org> | 2013-02-22 19:34:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:17 -0500 |
commit | ec8acf20afb8534ed511f6613dd2226b9e301010 (patch) | |
tree | a0d6779eeffa0f523a2799dbb619e0a34fd786d4 /include/linux/swap.h | |
parent | 33806f06da654092182410d974b6d3c5396ea3eb (diff) |
swap: add per-partition lock for swapfile
swap_lock is heavily contended when I test swap to 3 fast SSD (even
slightly slower than swap to 2 such SSD). The main contention comes
from swap_info_get(). This patch tries to fix the gap with adding a new
per-partition lock.
Global data like nr_swapfiles, total_swap_pages, least_priority and
swap_list are still protected by swap_lock.
nr_swap_pages is an atomic now, it can be changed without swap_lock. In
theory, it's possible get_swap_page() finds no swap pages but actually
there are free swap pages. But sounds not a big problem.
Accessing partition specific data (like scan_swap_map and so on) is only
protected by swap_info_struct.lock.
Changing swap_info_struct.flags need hold swap_lock and
swap_info_struct.lock, because scan_scan_map() will check it. read the
flags is ok with either the locks hold.
If both swap_lock and swap_info_struct.lock must be hold, we always hold
the former first to avoid deadlock.
swap_entry_free() can change swap_list. To delete that code, we add a
new highest_priority_index. Whenever get_swap_page() is called, we
check it. If it's valid, we use it.
It's a pity get_swap_page() still holds swap_lock(). But in practice,
swap_lock() isn't heavily contended in my test with this patch (or I can
say there are other much more heavier bottlenecks like TLB flush). And
BTW, looks get_swap_page() doesn't really need the lock. We never free
swap_info[] and we check SWAP_WRITEOK flag. The only risk without the
lock is we could swapout to some low priority swap, but we can quickly
recover after several rounds of swap, so sounds not a big deal to me.
But I'd prefer to fix this if it's a real problem.
"swap: make each swap partition have one address_space" improved the
swapout speed from 1.7G/s to 2G/s. This patch further improves the
speed to 2.3G/s, so around 15% improvement. It's a multi-process test,
so TLB flush isn't the biggest bottleneck before the patches.
[arnd@arndb.de: fix it for nommu]
[hughd@google.com: add missing unlock]
[minchan@kernel.org: get rid of lockdep whinge on sys_swapon]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/swap.h')
-rw-r--r-- | include/linux/swap.h | 32 |
1 files changed, 27 insertions, 5 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h index 235c039892ee..a3e22d357e91 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -202,6 +202,18 @@ struct swap_info_struct { | |||
202 | unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ | 202 | unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ |
203 | atomic_t frontswap_pages; /* frontswap pages in-use counter */ | 203 | atomic_t frontswap_pages; /* frontswap pages in-use counter */ |
204 | #endif | 204 | #endif |
205 | spinlock_t lock; /* | ||
206 | * protect map scan related fields like | ||
207 | * swap_map, lowest_bit, highest_bit, | ||
208 | * inuse_pages, cluster_next, | ||
209 | * cluster_nr, lowest_alloc and | ||
210 | * highest_alloc. other fields are only | ||
211 | * changed at swapon/swapoff, so are | ||
212 | * protected by swap_lock. changing | ||
213 | * flags need hold this lock and | ||
214 | * swap_lock. If both locks need hold, | ||
215 | * hold swap_lock first. | ||
216 | */ | ||
205 | }; | 217 | }; |
206 | 218 | ||
207 | struct swap_list_t { | 219 | struct swap_list_t { |
@@ -209,9 +221,6 @@ struct swap_list_t { | |||
209 | int next; /* swapfile to be used next */ | 221 | int next; /* swapfile to be used next */ |
210 | }; | 222 | }; |
211 | 223 | ||
212 | /* Swap 50% full? Release swapcache more aggressively.. */ | ||
213 | #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) | ||
214 | |||
215 | /* linux/mm/page_alloc.c */ | 224 | /* linux/mm/page_alloc.c */ |
216 | extern unsigned long totalram_pages; | 225 | extern unsigned long totalram_pages; |
217 | extern unsigned long totalreserve_pages; | 226 | extern unsigned long totalreserve_pages; |
@@ -347,8 +356,20 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, | |||
347 | struct vm_area_struct *vma, unsigned long addr); | 356 | struct vm_area_struct *vma, unsigned long addr); |
348 | 357 | ||
349 | /* linux/mm/swapfile.c */ | 358 | /* linux/mm/swapfile.c */ |
350 | extern long nr_swap_pages; | 359 | extern atomic_long_t nr_swap_pages; |
351 | extern long total_swap_pages; | 360 | extern long total_swap_pages; |
361 | |||
362 | /* Swap 50% full? Release swapcache more aggressively.. */ | ||
363 | static inline bool vm_swap_full(void) | ||
364 | { | ||
365 | return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages; | ||
366 | } | ||
367 | |||
368 | static inline long get_nr_swap_pages(void) | ||
369 | { | ||
370 | return atomic_long_read(&nr_swap_pages); | ||
371 | } | ||
372 | |||
352 | extern void si_swapinfo(struct sysinfo *); | 373 | extern void si_swapinfo(struct sysinfo *); |
353 | extern swp_entry_t get_swap_page(void); | 374 | extern swp_entry_t get_swap_page(void); |
354 | extern swp_entry_t get_swap_page_of_type(int); | 375 | extern swp_entry_t get_swap_page_of_type(int); |
@@ -381,9 +402,10 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) | |||
381 | 402 | ||
382 | #else /* CONFIG_SWAP */ | 403 | #else /* CONFIG_SWAP */ |
383 | 404 | ||
384 | #define nr_swap_pages 0L | 405 | #define get_nr_swap_pages() 0L |
385 | #define total_swap_pages 0L | 406 | #define total_swap_pages 0L |
386 | #define total_swapcache_pages() 0UL | 407 | #define total_swapcache_pages() 0UL |
408 | #define vm_swap_full() 0 | ||
387 | 409 | ||
388 | #define si_swapinfo(val) \ | 410 | #define si_swapinfo(val) \ |
389 | do { (val)->freeswap = (val)->totalswap = 0; } while (0) | 411 | do { (val)->freeswap = (val)->totalswap = 0; } while (0) |