aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache/writeback.c
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-09-24 02:17:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-24 17:41:43 -0400
commitc2a4f3183a1248f615a695fbd8905da55ad11bba (patch)
tree51233866301869506c0728c812fe3668ae1e94ce /drivers/md/bcache/writeback.c
parent61cbd250f867f98bb4738000afc6002d6f2b14bd (diff)
bcache: Fix a writeback performance regression
Background writeback works by scanning the btree for dirty data and adding those keys into a fixed size buffer, then for each dirty key in the keybuf writing it to the backing device. When read_dirty() finishes and it's time to scan for more dirty data, we need to wait for the outstanding writeback IO to finish - they still take up slots in the keybuf (so that foreground writes can check for them to avoid races) - without that wait, we'll continually rescan when we'll be able to add at most a key or two to the keybuf, and that takes locks that starves foreground IO. Doh. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: linux-stable <stable@vger.kernel.org> # >= v3.10 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/md/bcache/writeback.c')
-rw-r--r--drivers/md/bcache/writeback.c43
1 files changed, 21 insertions, 22 deletions
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff551628..27ac51934822 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
94 94
95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) 95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
96{ 96{
97 uint64_t ret;
98
97 if (atomic_read(&dc->disk.detaching) || 99 if (atomic_read(&dc->disk.detaching) ||
98 !dc->writeback_percent) 100 !dc->writeback_percent)
99 return 0; 101 return 0;
100 102
101 return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); 103 ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
104
105 return min_t(uint64_t, ret, HZ);
102} 106}
103 107
104/* Background writeback */ 108/* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
208 212
209 up_write(&dc->writeback_lock); 213 up_write(&dc->writeback_lock);
210 214
211 ratelimit_reset(&dc->writeback_rate); 215 bch_ratelimit_reset(&dc->writeback_rate);
212 216
213 /* Punt to workqueue only so we don't recurse and blow the stack */ 217 /* Punt to workqueue only so we don't recurse and blow the stack */
214 continue_at(cl, read_dirty, dirty_wq); 218 continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
318 } 322 }
319 323
320 bch_keybuf_del(&dc->writeback_keys, w); 324 bch_keybuf_del(&dc->writeback_keys, w);
321 atomic_dec_bug(&dc->in_flight); 325 up(&dc->in_flight);
322
323 closure_wake_up(&dc->writeback_wait);
324 326
325 closure_return_with_destructor(cl, dirty_io_destructor); 327 closure_return_with_destructor(cl, dirty_io_destructor);
326} 328}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
349 351
350 closure_bio_submit(&io->bio, cl, &io->dc->disk); 352 closure_bio_submit(&io->bio, cl, &io->dc->disk);
351 353
352 continue_at(cl, write_dirty_finish, dirty_wq); 354 continue_at(cl, write_dirty_finish, system_wq);
353} 355}
354 356
355static void read_dirty_endio(struct bio *bio, int error) 357static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
369 371
370 closure_bio_submit(&io->bio, cl, &io->dc->disk); 372 closure_bio_submit(&io->bio, cl, &io->dc->disk);
371 373
372 continue_at(cl, write_dirty, dirty_wq); 374 continue_at(cl, write_dirty, system_wq);
373} 375}
374 376
375static void read_dirty(struct closure *cl) 377static void read_dirty(struct closure *cl)
@@ -394,12 +396,9 @@ static void read_dirty(struct closure *cl)
394 396
395 if (delay > 0 && 397 if (delay > 0 &&
396 (KEY_START(&w->key) != dc->last_read || 398 (KEY_START(&w->key) != dc->last_read ||
397 jiffies_to_msecs(delay) > 50)) { 399 jiffies_to_msecs(delay) > 50))
398 w->private = NULL; 400 while (delay)
399 401 delay = schedule_timeout(delay);
400 closure_delay(&dc->writeback, delay);
401 continue_at(cl, read_dirty, dirty_wq);
402 }
403 402
404 dc->last_read = KEY_OFFSET(&w->key); 403 dc->last_read = KEY_OFFSET(&w->key);
405 404
@@ -424,15 +423,10 @@ static void read_dirty(struct closure *cl)
424 423
425 trace_bcache_writeback(&w->key); 424 trace_bcache_writeback(&w->key);
426 425
427 closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); 426 down(&dc->in_flight);
427 closure_call(&io->cl, read_dirty_submit, NULL, cl);
428 428
429 delay = writeback_delay(dc, KEY_SIZE(&w->key)); 429 delay = writeback_delay(dc, KEY_SIZE(&w->key));
430
431 atomic_inc(&dc->in_flight);
432
433 if (!closure_wait_event(&dc->writeback_wait, cl,
434 atomic_read(&dc->in_flight) < 64))
435 continue_at(cl, read_dirty, dirty_wq);
436 } 430 }
437 431
438 if (0) { 432 if (0) {
@@ -442,7 +436,11 @@ err:
442 bch_keybuf_del(&dc->writeback_keys, w); 436 bch_keybuf_del(&dc->writeback_keys, w);
443 } 437 }
444 438
445 refill_dirty(cl); 439 /*
440 * Wait for outstanding writeback IOs to finish (and keybuf slots to be
441 * freed) before refilling again
442 */
443 continue_at(cl, refill_dirty, dirty_wq);
446} 444}
447 445
448/* Init */ 446/* Init */
@@ -484,6 +482,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
484 482
485void bch_cached_dev_writeback_init(struct cached_dev *dc) 483void bch_cached_dev_writeback_init(struct cached_dev *dc)
486{ 484{
485 sema_init(&dc->in_flight, 64);
487 closure_init_unlocked(&dc->writeback); 486 closure_init_unlocked(&dc->writeback);
488 init_rwsem(&dc->writeback_lock); 487 init_rwsem(&dc->writeback_lock);
489 488
@@ -513,7 +512,7 @@ void bch_writeback_exit(void)
513 512
514int __init bch_writeback_init(void) 513int __init bch_writeback_init(void)
515{ 514{
516 dirty_wq = create_singlethread_workqueue("bcache_writeback"); 515 dirty_wq = create_workqueue("bcache_writeback");
517 if (!dirty_wq) 516 if (!dirty_wq)
518 return -ENOMEM; 517 return -ENOMEM;
519 518