aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-02-22 19:34:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:17 -0500
commitec8acf20afb8534ed511f6613dd2226b9e301010 (patch)
treea0d6779eeffa0f523a2799dbb619e0a34fd786d4 /mm/vmscan.c
parent33806f06da654092182410d974b6d3c5396ea3eb (diff)
swap: add per-partition lock for swapfile
swap_lock is heavily contended when I test swap to 3 fast SSD (even slightly slower than swap to 2 such SSD). The main contention comes from swap_info_get(). This patch tries to fix the gap with adding a new per-partition lock. Global data like nr_swapfiles, total_swap_pages, least_priority and swap_list are still protected by swap_lock. nr_swap_pages is an atomic now, it can be changed without swap_lock. In theory, it's possible get_swap_page() finds no swap pages but actually there are free swap pages. But sounds not a big problem. Accessing partition specific data (like scan_swap_map and so on) is only protected by swap_info_struct.lock. Changing swap_info_struct.flags need hold swap_lock and swap_info_struct.lock, because scan_scan_map() will check it. read the flags is ok with either the locks hold. If both swap_lock and swap_info_struct.lock must be hold, we always hold the former first to avoid deadlock. swap_entry_free() can change swap_list. To delete that code, we add a new highest_priority_index. Whenever get_swap_page() is called, we check it. If it's valid, we use it. It's a pity get_swap_page() still holds swap_lock(). But in practice, swap_lock() isn't heavily contended in my test with this patch (or I can say there are other much more heavier bottlenecks like TLB flush). And BTW, looks get_swap_page() doesn't really need the lock. We never free swap_info[] and we check SWAP_WRITEOK flag. The only risk without the lock is we could swapout to some low priority swap, but we can quickly recover after several rounds of swap, so sounds not a big deal to me. But I'd prefer to fix this if it's a real problem. "swap: make each swap partition have one address_space" improved the swapout speed from 1.7G/s to 2G/s. This patch further improves the speed to 2.3G/s, so around 15% improvement. It's a multi-process test, so TLB flush isn't the biggest bottleneck before the patches. [arnd@arndb.de: fix it for nommu] [hughd@google.com: add missing unlock] [minchan@kernel.org: get rid of lockdep whinge on sys_swapon] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a68fa20269d9..b7d8015a6d54 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1684,7 +1684,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1684 force_scan = true; 1684 force_scan = true;
1685 1685
1686 /* If we have no swap space, do not bother scanning anon pages. */ 1686 /* If we have no swap space, do not bother scanning anon pages. */
1687 if (!sc->may_swap || (nr_swap_pages <= 0)) { 1687 if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
1688 scan_balance = SCAN_FILE; 1688 scan_balance = SCAN_FILE;
1689 goto out; 1689 goto out;
1690 } 1690 }
@@ -1933,7 +1933,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
1933 */ 1933 */
1934 pages_for_compaction = (2UL << sc->order); 1934 pages_for_compaction = (2UL << sc->order);
1935 inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); 1935 inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE);
1936 if (nr_swap_pages > 0) 1936 if (get_nr_swap_pages() > 0)
1937 inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); 1937 inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON);
1938 if (sc->nr_reclaimed < pages_for_compaction && 1938 if (sc->nr_reclaimed < pages_for_compaction &&
1939 inactive_lru_pages > pages_for_compaction) 1939 inactive_lru_pages > pages_for_compaction)
@@ -3085,7 +3085,7 @@ unsigned long global_reclaimable_pages(void)
3085 nr = global_page_state(NR_ACTIVE_FILE) + 3085 nr = global_page_state(NR_ACTIVE_FILE) +
3086 global_page_state(NR_INACTIVE_FILE); 3086 global_page_state(NR_INACTIVE_FILE);
3087 3087
3088 if (nr_swap_pages > 0) 3088 if (get_nr_swap_pages() > 0)
3089 nr += global_page_state(NR_ACTIVE_ANON) + 3089 nr += global_page_state(NR_ACTIVE_ANON) +
3090 global_page_state(NR_INACTIVE_ANON); 3090 global_page_state(NR_INACTIVE_ANON);
3091 3091
@@ -3099,7 +3099,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
3099 nr = zone_page_state(zone, NR_ACTIVE_FILE) + 3099 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
3100 zone_page_state(zone, NR_INACTIVE_FILE); 3100 zone_page_state(zone, NR_INACTIVE_FILE);
3101 3101
3102 if (nr_swap_pages > 0) 3102 if (get_nr_swap_pages() > 0)
3103 nr += zone_page_state(zone, NR_ACTIVE_ANON) + 3103 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
3104 zone_page_state(zone, NR_INACTIVE_ANON); 3104 zone_page_state(zone, NR_INACTIVE_ANON);
3105 3105