diff options
author | Shaohua Li <shli@kernel.org> | 2013-02-22 19:34:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:17 -0500 |
commit | ec8acf20afb8534ed511f6613dd2226b9e301010 (patch) | |
tree | a0d6779eeffa0f523a2799dbb619e0a34fd786d4 /mm/vmscan.c | |
parent | 33806f06da654092182410d974b6d3c5396ea3eb (diff) |
swap: add per-partition lock for swapfile
swap_lock is heavily contended when I test swap to 3 fast SSD (even
slightly slower than swap to 2 such SSD). The main contention comes
from swap_info_get(). This patch tries to fix the gap with adding a new
per-partition lock.
Global data like nr_swapfiles, total_swap_pages, least_priority and
swap_list are still protected by swap_lock.
nr_swap_pages is an atomic now, it can be changed without swap_lock. In
theory, it's possible get_swap_page() finds no swap pages but actually
there are free swap pages. But sounds not a big problem.
Accessing partition specific data (like scan_swap_map and so on) is only
protected by swap_info_struct.lock.
Changing swap_info_struct.flags need hold swap_lock and
swap_info_struct.lock, because scan_scan_map() will check it. read the
flags is ok with either the locks hold.
If both swap_lock and swap_info_struct.lock must be hold, we always hold
the former first to avoid deadlock.
swap_entry_free() can change swap_list. To delete that code, we add a
new highest_priority_index. Whenever get_swap_page() is called, we
check it. If it's valid, we use it.
It's a pity get_swap_page() still holds swap_lock(). But in practice,
swap_lock() isn't heavily contended in my test with this patch (or I can
say there are other much more heavier bottlenecks like TLB flush). And
BTW, looks get_swap_page() doesn't really need the lock. We never free
swap_info[] and we check SWAP_WRITEOK flag. The only risk without the
lock is we could swapout to some low priority swap, but we can quickly
recover after several rounds of swap, so sounds not a big deal to me.
But I'd prefer to fix this if it's a real problem.
"swap: make each swap partition have one address_space" improved the
swapout speed from 1.7G/s to 2G/s. This patch further improves the
speed to 2.3G/s, so around 15% improvement. It's a multi-process test,
so TLB flush isn't the biggest bottleneck before the patches.
[arnd@arndb.de: fix it for nommu]
[hughd@google.com: add missing unlock]
[minchan@kernel.org: get rid of lockdep whinge on sys_swapon]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index a68fa20269d9..b7d8015a6d54 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1684,7 +1684,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, | |||
1684 | force_scan = true; | 1684 | force_scan = true; |
1685 | 1685 | ||
1686 | /* If we have no swap space, do not bother scanning anon pages. */ | 1686 | /* If we have no swap space, do not bother scanning anon pages. */ |
1687 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | 1687 | if (!sc->may_swap || (get_nr_swap_pages() <= 0)) { |
1688 | scan_balance = SCAN_FILE; | 1688 | scan_balance = SCAN_FILE; |
1689 | goto out; | 1689 | goto out; |
1690 | } | 1690 | } |
@@ -1933,7 +1933,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
1933 | */ | 1933 | */ |
1934 | pages_for_compaction = (2UL << sc->order); | 1934 | pages_for_compaction = (2UL << sc->order); |
1935 | inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); | 1935 | inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); |
1936 | if (nr_swap_pages > 0) | 1936 | if (get_nr_swap_pages() > 0) |
1937 | inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); | 1937 | inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); |
1938 | if (sc->nr_reclaimed < pages_for_compaction && | 1938 | if (sc->nr_reclaimed < pages_for_compaction && |
1939 | inactive_lru_pages > pages_for_compaction) | 1939 | inactive_lru_pages > pages_for_compaction) |
@@ -3085,7 +3085,7 @@ unsigned long global_reclaimable_pages(void) | |||
3085 | nr = global_page_state(NR_ACTIVE_FILE) + | 3085 | nr = global_page_state(NR_ACTIVE_FILE) + |
3086 | global_page_state(NR_INACTIVE_FILE); | 3086 | global_page_state(NR_INACTIVE_FILE); |
3087 | 3087 | ||
3088 | if (nr_swap_pages > 0) | 3088 | if (get_nr_swap_pages() > 0) |
3089 | nr += global_page_state(NR_ACTIVE_ANON) + | 3089 | nr += global_page_state(NR_ACTIVE_ANON) + |
3090 | global_page_state(NR_INACTIVE_ANON); | 3090 | global_page_state(NR_INACTIVE_ANON); |
3091 | 3091 | ||
@@ -3099,7 +3099,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone) | |||
3099 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + | 3099 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + |
3100 | zone_page_state(zone, NR_INACTIVE_FILE); | 3100 | zone_page_state(zone, NR_INACTIVE_FILE); |
3101 | 3101 | ||
3102 | if (nr_swap_pages > 0) | 3102 | if (get_nr_swap_pages() > 0) |
3103 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + | 3103 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + |
3104 | zone_page_state(zone, NR_INACTIVE_ANON); | 3104 | zone_page_state(zone, NR_INACTIVE_ANON); |
3105 | 3105 | ||