summaryrefslogtreecommitdiffstats
path: root/mm/swap_state.c
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-07-10 18:47:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-10 19:32:30 -0400
commit23955622ff8d231bcc9650b3d06583f117a6e3ba (patch)
treef0cea083054f298d7fd7eff3371ae07005e28506 /mm/swap_state.c
parent9eb788800510ae1a6bc419636a66071ee4deafd5 (diff)
swap: add block io poll in swapin path
For fast flash disk, async IO could introduce overhead because of context switch. block-mq now supports IO poll, which improves performance and latency a lot. swapin is a good place to use this technique, because the task is waiting for the swapin page to continue execution. In my virtual machine, directly read 4k data from a NVMe with iopoll is about 60% better than that without poll. With iopoll support in swapin patch, my microbenchmark (a task does random memory write) is about 10%~25% faster. CPU utilization increases a lot though, 2x and even 3x CPU utilization. This will depend on disk speed. While iopoll in swapin isn't intended for all usage cases, it's a win for latency sensistive workloads with high speed swap disk. block layer has knob to control poll in runtime. If poll isn't enabled in block layer, there should be no noticeable change in swapin. I got a chance to run the same test in a NVMe with DRAM as the media. In simple fio IO test, blkpoll boosts 50% performance in single thread test and ~20% in 8 threads test. So this is the base line. In above swap test, blkpoll boosts ~27% performance in single thread test. blkpoll uses 2x CPU time though. If we enable hybid polling, the performance gain has very slight drop but CPU time is only 50% worse than that without blkpoll. Also we can adjust parameter of hybid poll, with it, the CPU time penality is reduced further. In 8 threads test, blkpoll doesn't help though. The performance is similar to that without blkpoll, but cpu utilization is similar too. There is lock contention in swap path. The cpu time spending on blkpoll isn't high. So overall, blkpoll swapin isn't worse than that without it. The swapin readahead might read several pages in in the same time and form a big IO request. Since the IO will take longer time, it doesn't make sense to do poll, so the patch only does iopoll for single page swapin. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c Signed-off-by: Shaohua Li <shli@fb.com> Cc: Tim Chen <tim.c.chen@intel.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Jens Axboe <axboe@fb.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r--mm/swap_state.c10
1 files changed, 6 insertions, 4 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 9c71b6b2562f..b68c93014f50 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -412,14 +412,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
412 * the swap entry is no longer in use. 412 * the swap entry is no longer in use.
413 */ 413 */
414struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 414struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
415 struct vm_area_struct *vma, unsigned long addr) 415 struct vm_area_struct *vma, unsigned long addr, bool do_poll)
416{ 416{
417 bool page_was_allocated; 417 bool page_was_allocated;
418 struct page *retpage = __read_swap_cache_async(entry, gfp_mask, 418 struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
419 vma, addr, &page_was_allocated); 419 vma, addr, &page_was_allocated);
420 420
421 if (page_was_allocated) 421 if (page_was_allocated)
422 swap_readpage(retpage); 422 swap_readpage(retpage, do_poll);
423 423
424 return retpage; 424 return retpage;
425} 425}
@@ -496,11 +496,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
496 unsigned long start_offset, end_offset; 496 unsigned long start_offset, end_offset;
497 unsigned long mask; 497 unsigned long mask;
498 struct blk_plug plug; 498 struct blk_plug plug;
499 bool do_poll = true;
499 500
500 mask = swapin_nr_pages(offset) - 1; 501 mask = swapin_nr_pages(offset) - 1;
501 if (!mask) 502 if (!mask)
502 goto skip; 503 goto skip;
503 504
505 do_poll = false;
504 /* Read a page_cluster sized and aligned cluster around offset. */ 506 /* Read a page_cluster sized and aligned cluster around offset. */
505 start_offset = offset & ~mask; 507 start_offset = offset & ~mask;
506 end_offset = offset | mask; 508 end_offset = offset | mask;
@@ -511,7 +513,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
511 for (offset = start_offset; offset <= end_offset ; offset++) { 513 for (offset = start_offset; offset <= end_offset ; offset++) {
512 /* Ok, do the async read-ahead now */ 514 /* Ok, do the async read-ahead now */
513 page = read_swap_cache_async(swp_entry(swp_type(entry), offset), 515 page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
514 gfp_mask, vma, addr); 516 gfp_mask, vma, addr, false);
515 if (!page) 517 if (!page)
516 continue; 518 continue;
517 if (offset != entry_offset && likely(!PageTransCompound(page))) 519 if (offset != entry_offset && likely(!PageTransCompound(page)))
@@ -522,7 +524,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
522 524
523 lru_add_drain(); /* Push any new pages onto the LRU now */ 525 lru_add_drain(); /* Push any new pages onto the LRU now */
524skip: 526skip:
525 return read_swap_cache_async(entry, gfp_mask, vma, addr); 527 return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
526} 528}
527 529
528int init_swap_address_space(unsigned int type, unsigned long nr_pages) 530int init_swap_address_space(unsigned int type, unsigned long nr_pages)