aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_io.c
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-07-10 18:47:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-10 19:32:30 -0400
commit23955622ff8d231bcc9650b3d06583f117a6e3ba (patch)
treef0cea083054f298d7fd7eff3371ae07005e28506 /mm/page_io.c
parent9eb788800510ae1a6bc419636a66071ee4deafd5 (diff)
swap: add block io poll in swapin path
For fast flash disk, async IO could introduce overhead because of context switch. block-mq now supports IO poll, which improves performance and latency a lot. swapin is a good place to use this technique, because the task is waiting for the swapin page to continue execution. In my virtual machine, directly read 4k data from a NVMe with iopoll is about 60% better than that without poll. With iopoll support in swapin patch, my microbenchmark (a task does random memory write) is about 10%~25% faster. CPU utilization increases a lot though, 2x and even 3x CPU utilization. This will depend on disk speed. While iopoll in swapin isn't intended for all usage cases, it's a win for latency sensistive workloads with high speed swap disk. block layer has knob to control poll in runtime. If poll isn't enabled in block layer, there should be no noticeable change in swapin. I got a chance to run the same test in a NVMe with DRAM as the media. In simple fio IO test, blkpoll boosts 50% performance in single thread test and ~20% in 8 threads test. So this is the base line. In above swap test, blkpoll boosts ~27% performance in single thread test. blkpoll uses 2x CPU time though. If we enable hybid polling, the performance gain has very slight drop but CPU time is only 50% worse than that without blkpoll. Also we can adjust parameter of hybid poll, with it, the CPU time penality is reduced further. In 8 threads test, blkpoll doesn't help though. The performance is similar to that without blkpoll, but cpu utilization is similar too. There is lock contention in swap path. The cpu time spending on blkpoll isn't high. So overall, blkpoll swapin isn't worse than that without it. The swapin readahead might read several pages in in the same time and form a big IO request. Since the IO will take longer time, it doesn't make sense to do poll, so the patch only does iopoll for single page swapin. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c Signed-off-by: Shaohua Li <shli@fb.com> Cc: Tim Chen <tim.c.chen@intel.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Jens Axboe <axboe@fb.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_io.c')
-rw-r--r--mm/page_io.c23
1 files changed, 21 insertions, 2 deletions
diff --git a/mm/page_io.c b/mm/page_io.c
index 2da71e627812..b6c4ac388209 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -117,6 +117,7 @@ static void swap_slot_free_notify(struct page *page)
117static void end_swap_bio_read(struct bio *bio) 117static void end_swap_bio_read(struct bio *bio)
118{ 118{
119 struct page *page = bio->bi_io_vec[0].bv_page; 119 struct page *page = bio->bi_io_vec[0].bv_page;
120 struct task_struct *waiter = bio->bi_private;
120 121
121 if (bio->bi_status) { 122 if (bio->bi_status) {
122 SetPageError(page); 123 SetPageError(page);
@@ -132,7 +133,9 @@ static void end_swap_bio_read(struct bio *bio)
132 swap_slot_free_notify(page); 133 swap_slot_free_notify(page);
133out: 134out:
134 unlock_page(page); 135 unlock_page(page);
136 WRITE_ONCE(bio->bi_private, NULL);
135 bio_put(bio); 137 bio_put(bio);
138 wake_up_process(waiter);
136} 139}
137 140
138int generic_swapfile_activate(struct swap_info_struct *sis, 141int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -329,11 +332,13 @@ out:
329 return ret; 332 return ret;
330} 333}
331 334
332int swap_readpage(struct page *page) 335int swap_readpage(struct page *page, bool do_poll)
333{ 336{
334 struct bio *bio; 337 struct bio *bio;
335 int ret = 0; 338 int ret = 0;
336 struct swap_info_struct *sis = page_swap_info(page); 339 struct swap_info_struct *sis = page_swap_info(page);
340 blk_qc_t qc;
341 struct block_device *bdev;
337 342
338 VM_BUG_ON_PAGE(!PageSwapCache(page), page); 343 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
339 VM_BUG_ON_PAGE(!PageLocked(page), page); 344 VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -372,9 +377,23 @@ int swap_readpage(struct page *page)
372 ret = -ENOMEM; 377 ret = -ENOMEM;
373 goto out; 378 goto out;
374 } 379 }
380 bdev = bio->bi_bdev;
381 bio->bi_private = current;
375 bio_set_op_attrs(bio, REQ_OP_READ, 0); 382 bio_set_op_attrs(bio, REQ_OP_READ, 0);
376 count_vm_event(PSWPIN); 383 count_vm_event(PSWPIN);
377 submit_bio(bio); 384 bio_get(bio);
385 qc = submit_bio(bio);
386 while (do_poll) {
387 set_current_state(TASK_UNINTERRUPTIBLE);
388 if (!READ_ONCE(bio->bi_private))
389 break;
390
391 if (!blk_mq_poll(bdev_get_queue(bdev), qc))
392 break;
393 }
394 __set_current_state(TASK_RUNNING);
395 bio_put(bio);
396
378out: 397out:
379 return ret; 398 return ret;
380} 399}